2 * linux/arch/x86_64/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
12 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/delay.h>
17 #include <linux/interrupt.h>
18 #include <linux/module.h>
19 #include <linux/sysdev.h>
20 #include <linux/nmi.h>
21 #include <linux/sysctl.h>
22 #include <linux/kprobes.h>
26 #include <asm/proto.h>
27 #include <asm/kdebug.h>
29 #include <asm/intel_arch_perfmon.h>
31 int unknown_nmi_panic
;
32 int nmi_watchdog_enabled
;
33 int panic_on_unrecovered_nmi
;
35 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
36 * evtsel_nmi_owner tracks the ownership of the event selection
37 * - different performance counters/ event selection may be reserved for
38 * different subsystems this reservation system just tries to coordinate
41 static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner
);
42 static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner
[2]);
44 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
47 #define NMI_MAX_COUNTER_BITS 66
50 * >0: the lapic NMI watchdog is active, but can be disabled
51 * <0: the lapic NMI watchdog has not been set up, and cannot
53 * 0: the lapic NMI watchdog is disabled, but can be enabled
55 atomic_t nmi_active
= ATOMIC_INIT(0); /* oprofile uses this */
58 unsigned int nmi_watchdog
= NMI_DEFAULT
;
59 static unsigned int nmi_hz
= HZ
;
61 struct nmi_watchdog_ctlblk
{
64 unsigned int cccr_msr
;
65 unsigned int perfctr_msr
; /* the MSR to reset in NMI handler */
66 unsigned int evntsel_msr
; /* the MSR to select the events to handle */
68 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk
, nmi_watchdog_ctlblk
);
70 /* local prototypes */
71 static int unknown_nmi_panic_callback(struct pt_regs
*regs
, int cpu
);
73 /* converts an msr to an appropriate reservation bit */
74 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr
)
76 /* returns the bit offset of the performance counter register */
77 switch (boot_cpu_data
.x86_vendor
) {
79 return (msr
- MSR_K7_PERFCTR0
);
80 case X86_VENDOR_INTEL
:
81 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
))
82 return (msr
- MSR_ARCH_PERFMON_PERFCTR0
);
84 return (msr
- MSR_P4_BPU_PERFCTR0
);
89 /* converts an msr to an appropriate reservation bit */
90 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr
)
92 /* returns the bit offset of the event selection register */
93 switch (boot_cpu_data
.x86_vendor
) {
95 return (msr
- MSR_K7_EVNTSEL0
);
96 case X86_VENDOR_INTEL
:
97 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
))
98 return (msr
- MSR_ARCH_PERFMON_EVENTSEL0
);
100 return (msr
- MSR_P4_BSU_ESCR0
);
105 /* checks for a bit availability (hack for oprofile) */
106 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter
)
108 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
110 return (!test_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
)));
113 /* checks the an msr for availability */
114 int avail_to_resrv_perfctr_nmi(unsigned int msr
)
116 unsigned int counter
;
118 counter
= nmi_perfctr_msr_to_bit(msr
);
119 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
121 return (!test_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
)));
124 int reserve_perfctr_nmi(unsigned int msr
)
126 unsigned int counter
;
128 counter
= nmi_perfctr_msr_to_bit(msr
);
129 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
131 if (!test_and_set_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
)))
136 void release_perfctr_nmi(unsigned int msr
)
138 unsigned int counter
;
140 counter
= nmi_perfctr_msr_to_bit(msr
);
141 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
143 clear_bit(counter
, &__get_cpu_var(perfctr_nmi_owner
));
146 int reserve_evntsel_nmi(unsigned int msr
)
148 unsigned int counter
;
150 counter
= nmi_evntsel_msr_to_bit(msr
);
151 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
153 if (!test_and_set_bit(counter
, &__get_cpu_var(evntsel_nmi_owner
)))
158 void release_evntsel_nmi(unsigned int msr
)
160 unsigned int counter
;
162 counter
= nmi_evntsel_msr_to_bit(msr
);
163 BUG_ON(counter
> NMI_MAX_COUNTER_BITS
);
165 clear_bit(counter
, &__get_cpu_var(evntsel_nmi_owner
));
168 static __cpuinit
inline int nmi_known_cpu(void)
170 switch (boot_cpu_data
.x86_vendor
) {
172 return boot_cpu_data
.x86
== 15;
173 case X86_VENDOR_INTEL
:
174 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
))
177 return (boot_cpu_data
.x86
== 15);
182 /* Run after command line and cpu_init init, but before all other checks */
183 void nmi_watchdog_default(void)
185 if (nmi_watchdog
!= NMI_DEFAULT
)
188 nmi_watchdog
= NMI_LOCAL_APIC
;
190 nmi_watchdog
= NMI_IO_APIC
;
194 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
195 * the CPU is idle. To make sure the NMI watchdog really ticks on all
196 * CPUs during the test make them busy.
198 static __init
void nmi_cpu_busy(void *data
)
200 volatile int *endflag
= data
;
201 local_irq_enable_in_hardirq();
202 /* Intentionally don't use cpu_relax here. This is
203 to make sure that the performance counter really ticks,
204 even if there is a simulator or similar that catches the
205 pause instruction. On a real HT machine this is fine because
206 all other CPUs are busy with "useless" delay loops and don't
207 care if they get somewhat less cycles. */
208 while (*endflag
== 0)
213 int __init
check_nmi_watchdog (void)
215 volatile int endflag
= 0;
219 if ((nmi_watchdog
== NMI_NONE
) || (nmi_watchdog
== NMI_DEFAULT
))
222 if (!atomic_read(&nmi_active
))
225 counts
= kmalloc(NR_CPUS
* sizeof(int), GFP_KERNEL
);
229 printk(KERN_INFO
"testing NMI watchdog ... ");
232 if (nmi_watchdog
== NMI_LOCAL_APIC
)
233 smp_call_function(nmi_cpu_busy
, (void *)&endflag
, 0, 0);
236 for (cpu
= 0; cpu
< NR_CPUS
; cpu
++)
237 counts
[cpu
] = cpu_pda(cpu
)->__nmi_count
;
239 mdelay((10*1000)/nmi_hz
); // wait 10 ticks
241 for_each_online_cpu(cpu
) {
242 if (!per_cpu(nmi_watchdog_ctlblk
, cpu
).enabled
)
244 if (cpu_pda(cpu
)->__nmi_count
- counts
[cpu
] <= 5) {
245 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
248 cpu_pda(cpu
)->__nmi_count
);
249 per_cpu(nmi_watchdog_ctlblk
, cpu
).enabled
= 0;
250 atomic_dec(&nmi_active
);
253 if (!atomic_read(&nmi_active
)) {
255 atomic_set(&nmi_active
, -1);
261 /* now that we know it works we can reduce NMI frequency to
262 something more reasonable; makes a difference in some configs */
263 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
264 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
268 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
269 * are writable, with higher bits sign extending from bit 31.
270 * So, we can only program the counter with 31 bit values and
271 * 32nd bit should be 1, for 33.. to be 1.
272 * Find the appropriate nmi_hz
274 if (wd
->perfctr_msr
== MSR_ARCH_PERFMON_PERFCTR0
&&
275 ((u64
)cpu_khz
* 1000) > 0x7fffffffULL
) {
276 nmi_hz
= ((u64
)cpu_khz
* 1000) / 0x7fffffffUL
+ 1;
284 int __init
setup_nmi_watchdog(char *str
)
288 if (!strncmp(str
,"panic",5)) {
289 panic_on_timeout
= 1;
290 str
= strchr(str
, ',');
296 get_option(&str
, &nmi
);
298 if ((nmi
>= NMI_INVALID
) || (nmi
< NMI_NONE
))
301 if ((nmi
== NMI_LOCAL_APIC
) && (nmi_known_cpu() == 0))
302 return 0; /* no lapic support */
307 __setup("nmi_watchdog=", setup_nmi_watchdog
);
309 static void disable_lapic_nmi_watchdog(void)
311 BUG_ON(nmi_watchdog
!= NMI_LOCAL_APIC
);
313 if (atomic_read(&nmi_active
) <= 0)
316 on_each_cpu(stop_apic_nmi_watchdog
, NULL
, 0, 1);
318 BUG_ON(atomic_read(&nmi_active
) != 0);
321 static void enable_lapic_nmi_watchdog(void)
323 BUG_ON(nmi_watchdog
!= NMI_LOCAL_APIC
);
325 /* are we already enabled */
326 if (atomic_read(&nmi_active
) != 0)
329 /* are we lapic aware */
330 if (nmi_known_cpu() <= 0)
333 on_each_cpu(setup_apic_nmi_watchdog
, NULL
, 0, 1);
334 touch_nmi_watchdog();
337 void disable_timer_nmi_watchdog(void)
339 BUG_ON(nmi_watchdog
!= NMI_IO_APIC
);
341 if (atomic_read(&nmi_active
) <= 0)
345 on_each_cpu(stop_apic_nmi_watchdog
, NULL
, 0, 1);
347 BUG_ON(atomic_read(&nmi_active
) != 0);
350 void enable_timer_nmi_watchdog(void)
352 BUG_ON(nmi_watchdog
!= NMI_IO_APIC
);
354 if (atomic_read(&nmi_active
) == 0) {
355 touch_nmi_watchdog();
356 on_each_cpu(setup_apic_nmi_watchdog
, NULL
, 0, 1);
363 static int nmi_pm_active
; /* nmi_active before suspend */
365 static int lapic_nmi_suspend(struct sys_device
*dev
, pm_message_t state
)
367 /* only CPU0 goes here, other CPUs should be offline */
368 nmi_pm_active
= atomic_read(&nmi_active
);
369 stop_apic_nmi_watchdog(NULL
);
370 BUG_ON(atomic_read(&nmi_active
) != 0);
374 static int lapic_nmi_resume(struct sys_device
*dev
)
376 /* only CPU0 goes here, other CPUs should be offline */
377 if (nmi_pm_active
> 0) {
378 setup_apic_nmi_watchdog(NULL
);
379 touch_nmi_watchdog();
384 static struct sysdev_class nmi_sysclass
= {
385 set_kset_name("lapic_nmi"),
386 .resume
= lapic_nmi_resume
,
387 .suspend
= lapic_nmi_suspend
,
390 static struct sys_device device_lapic_nmi
= {
392 .cls
= &nmi_sysclass
,
395 static int __init
init_lapic_nmi_sysfs(void)
399 /* should really be a BUG_ON but b/c this is an
400 * init call, it just doesn't work. -dcz
402 if (nmi_watchdog
!= NMI_LOCAL_APIC
)
405 if ( atomic_read(&nmi_active
) < 0 )
408 error
= sysdev_class_register(&nmi_sysclass
);
410 error
= sysdev_register(&device_lapic_nmi
);
413 /* must come after the local APIC's device_initcall() */
414 late_initcall(init_lapic_nmi_sysfs
);
416 #endif /* CONFIG_PM */
419 * Activate the NMI watchdog via the local APIC.
420 * Original code written by Keith Owens.
423 /* Note that these events don't tick when the CPU idles. This means
424 the frequency varies with CPU load. */
426 #define K7_EVNTSEL_ENABLE (1 << 22)
427 #define K7_EVNTSEL_INT (1 << 20)
428 #define K7_EVNTSEL_OS (1 << 17)
429 #define K7_EVNTSEL_USR (1 << 16)
430 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
431 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
433 static int setup_k7_watchdog(void)
435 unsigned int perfctr_msr
, evntsel_msr
;
436 unsigned int evntsel
;
437 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
439 perfctr_msr
= MSR_K7_PERFCTR0
;
440 evntsel_msr
= MSR_K7_EVNTSEL0
;
441 if (!reserve_perfctr_nmi(perfctr_msr
))
444 if (!reserve_evntsel_nmi(evntsel_msr
))
447 /* Simulator may not support it */
448 if (checking_wrmsrl(evntsel_msr
, 0UL))
450 wrmsrl(perfctr_msr
, 0UL);
452 evntsel
= K7_EVNTSEL_INT
457 /* setup the timer */
458 wrmsr(evntsel_msr
, evntsel
, 0);
459 wrmsrl(perfctr_msr
, -((u64
)cpu_khz
* 1000 / nmi_hz
));
460 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
461 evntsel
|= K7_EVNTSEL_ENABLE
;
462 wrmsr(evntsel_msr
, evntsel
, 0);
464 wd
->perfctr_msr
= perfctr_msr
;
465 wd
->evntsel_msr
= evntsel_msr
;
466 wd
->cccr_msr
= 0; //unused
467 wd
->check_bit
= 1ULL<<63;
470 release_evntsel_nmi(evntsel_msr
);
472 release_perfctr_nmi(perfctr_msr
);
477 static void stop_k7_watchdog(void)
479 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
481 wrmsr(wd
->evntsel_msr
, 0, 0);
483 release_evntsel_nmi(wd
->evntsel_msr
);
484 release_perfctr_nmi(wd
->perfctr_msr
);
487 /* Note that these events don't tick when the CPU idles. This means
488 the frequency varies with CPU load. */
490 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
491 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
492 #define P4_ESCR_OS (1<<3)
493 #define P4_ESCR_USR (1<<2)
494 #define P4_CCCR_OVF_PMI0 (1<<26)
495 #define P4_CCCR_OVF_PMI1 (1<<27)
496 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
497 #define P4_CCCR_COMPLEMENT (1<<19)
498 #define P4_CCCR_COMPARE (1<<18)
499 #define P4_CCCR_REQUIRED (3<<16)
500 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
501 #define P4_CCCR_ENABLE (1<<12)
502 #define P4_CCCR_OVF (1<<31)
503 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
504 CRU_ESCR0 (with any non-null event selector) through a complemented
505 max threshold. [IA32-Vol3, Section 14.9.9] */
507 static int setup_p4_watchdog(void)
509 unsigned int perfctr_msr
, evntsel_msr
, cccr_msr
;
510 unsigned int evntsel
, cccr_val
;
511 unsigned int misc_enable
, dummy
;
513 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
515 rdmsr(MSR_IA32_MISC_ENABLE
, misc_enable
, dummy
);
516 if (!(misc_enable
& MSR_P4_MISC_ENABLE_PERF_AVAIL
))
520 /* detect which hyperthread we are on */
521 if (smp_num_siblings
== 2) {
522 unsigned int ebx
, apicid
;
525 apicid
= (ebx
>> 24) & 0xff;
531 /* performance counters are shared resources
532 * assign each hyperthread its own set
533 * (re-use the ESCR0 register, seems safe
534 * and keeps the cccr_val the same)
538 perfctr_msr
= MSR_P4_IQ_PERFCTR0
;
539 evntsel_msr
= MSR_P4_CRU_ESCR0
;
540 cccr_msr
= MSR_P4_IQ_CCCR0
;
541 cccr_val
= P4_CCCR_OVF_PMI0
| P4_CCCR_ESCR_SELECT(4);
544 perfctr_msr
= MSR_P4_IQ_PERFCTR1
;
545 evntsel_msr
= MSR_P4_CRU_ESCR0
;
546 cccr_msr
= MSR_P4_IQ_CCCR1
;
547 cccr_val
= P4_CCCR_OVF_PMI1
| P4_CCCR_ESCR_SELECT(4);
550 if (!reserve_perfctr_nmi(perfctr_msr
))
553 if (!reserve_evntsel_nmi(evntsel_msr
))
556 evntsel
= P4_ESCR_EVENT_SELECT(0x3F)
560 cccr_val
|= P4_CCCR_THRESHOLD(15)
565 wrmsr(evntsel_msr
, evntsel
, 0);
566 wrmsr(cccr_msr
, cccr_val
, 0);
567 wrmsrl(perfctr_msr
, -((u64
)cpu_khz
* 1000 / nmi_hz
));
568 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
569 cccr_val
|= P4_CCCR_ENABLE
;
570 wrmsr(cccr_msr
, cccr_val
, 0);
572 wd
->perfctr_msr
= perfctr_msr
;
573 wd
->evntsel_msr
= evntsel_msr
;
574 wd
->cccr_msr
= cccr_msr
;
575 wd
->check_bit
= 1ULL<<39;
578 release_perfctr_nmi(perfctr_msr
);
583 static void stop_p4_watchdog(void)
585 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
587 wrmsr(wd
->cccr_msr
, 0, 0);
588 wrmsr(wd
->evntsel_msr
, 0, 0);
590 release_evntsel_nmi(wd
->evntsel_msr
);
591 release_perfctr_nmi(wd
->perfctr_msr
);
594 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
595 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
597 static int setup_intel_arch_watchdog(void)
600 union cpuid10_eax eax
;
602 unsigned int perfctr_msr
, evntsel_msr
;
603 unsigned int evntsel
;
604 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
607 * Check whether the Architectural PerfMon supports
608 * Unhalted Core Cycles Event or not.
609 * NOTE: Corresponding bit = 0 in ebx indicates event present.
611 cpuid(10, &(eax
.full
), &ebx
, &unused
, &unused
);
612 if ((eax
.split
.mask_length
< (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX
+1)) ||
613 (ebx
& ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
))
616 perfctr_msr
= MSR_ARCH_PERFMON_PERFCTR0
;
617 evntsel_msr
= MSR_ARCH_PERFMON_EVENTSEL0
;
619 if (!reserve_perfctr_nmi(perfctr_msr
))
622 if (!reserve_evntsel_nmi(evntsel_msr
))
625 wrmsrl(perfctr_msr
, 0UL);
627 evntsel
= ARCH_PERFMON_EVENTSEL_INT
628 | ARCH_PERFMON_EVENTSEL_OS
629 | ARCH_PERFMON_EVENTSEL_USR
630 | ARCH_PERFMON_NMI_EVENT_SEL
631 | ARCH_PERFMON_NMI_EVENT_UMASK
;
633 /* setup the timer */
634 wrmsr(evntsel_msr
, evntsel
, 0);
635 wrmsrl(perfctr_msr
, -((u64
)cpu_khz
* 1000 / nmi_hz
));
637 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
638 evntsel
|= ARCH_PERFMON_EVENTSEL0_ENABLE
;
639 wrmsr(evntsel_msr
, evntsel
, 0);
641 wd
->perfctr_msr
= perfctr_msr
;
642 wd
->evntsel_msr
= evntsel_msr
;
643 wd
->cccr_msr
= 0; //unused
644 wd
->check_bit
= 1ULL << (eax
.split
.bit_width
- 1);
647 release_perfctr_nmi(perfctr_msr
);
652 static void stop_intel_arch_watchdog(void)
655 union cpuid10_eax eax
;
657 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
660 * Check whether the Architectural PerfMon supports
661 * Unhalted Core Cycles Event or not.
662 * NOTE: Corresponding bit = 0 in ebx indicates event present.
664 cpuid(10, &(eax
.full
), &ebx
, &unused
, &unused
);
665 if ((eax
.split
.mask_length
< (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX
+1)) ||
666 (ebx
& ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
))
669 wrmsr(wd
->evntsel_msr
, 0, 0);
671 release_evntsel_nmi(wd
->evntsel_msr
);
672 release_perfctr_nmi(wd
->perfctr_msr
);
675 void setup_apic_nmi_watchdog(void *unused
)
677 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
679 /* only support LOCAL and IO APICs for now */
680 if ((nmi_watchdog
!= NMI_LOCAL_APIC
) &&
681 (nmi_watchdog
!= NMI_IO_APIC
))
684 if (wd
->enabled
== 1)
687 /* cheap hack to support suspend/resume */
688 /* if cpu0 is not active neither should the other cpus */
689 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active
) <= 0))
692 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
693 switch (boot_cpu_data
.x86_vendor
) {
695 if (strstr(boot_cpu_data
.x86_model_id
, "Screwdriver"))
697 if (!setup_k7_watchdog())
700 case X86_VENDOR_INTEL
:
701 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
)) {
702 if (!setup_intel_arch_watchdog())
706 if (!setup_p4_watchdog())
714 atomic_inc(&nmi_active
);
717 void stop_apic_nmi_watchdog(void *unused
)
719 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
721 /* only support LOCAL and IO APICs for now */
722 if ((nmi_watchdog
!= NMI_LOCAL_APIC
) &&
723 (nmi_watchdog
!= NMI_IO_APIC
))
726 if (wd
->enabled
== 0)
729 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
730 switch (boot_cpu_data
.x86_vendor
) {
732 if (strstr(boot_cpu_data
.x86_model_id
, "Screwdriver"))
736 case X86_VENDOR_INTEL
:
737 if (cpu_has(&boot_cpu_data
, X86_FEATURE_ARCH_PERFMON
)) {
738 stop_intel_arch_watchdog();
748 atomic_dec(&nmi_active
);
752 * the best way to detect whether a CPU has a 'hard lockup' problem
753 * is to check it's local APIC timer IRQ counts. If they are not
754 * changing then that CPU has some problem.
756 * as these watchdog NMI IRQs are generated on every CPU, we only
757 * have to check the current processor.
760 static DEFINE_PER_CPU(unsigned, last_irq_sum
);
761 static DEFINE_PER_CPU(local_t
, alert_counter
);
762 static DEFINE_PER_CPU(int, nmi_touch
);
764 void touch_nmi_watchdog (void)
766 if (nmi_watchdog
> 0) {
770 * Tell other CPUs to reset their alert counters. We cannot
771 * do it ourselves because the alert count increase is not
774 for_each_present_cpu (cpu
)
775 per_cpu(nmi_touch
, cpu
) = 1;
778 touch_softlockup_watchdog();
781 int __kprobes
nmi_watchdog_tick(struct pt_regs
* regs
, unsigned reason
)
785 struct nmi_watchdog_ctlblk
*wd
= &__get_cpu_var(nmi_watchdog_ctlblk
);
789 /* check for other users first */
790 if (notify_die(DIE_NMI
, "nmi", regs
, reason
, 2, SIGINT
)
796 sum
= read_pda(apic_timer_irqs
);
797 if (__get_cpu_var(nmi_touch
)) {
798 __get_cpu_var(nmi_touch
) = 0;
802 #ifdef CONFIG_X86_MCE
803 /* Could check oops_in_progress here too, but it's safer
805 if (atomic_read(&mce_entry
) > 0)
808 /* if the apic timer isn't firing, this cpu isn't doing much */
809 if (!touched
&& __get_cpu_var(last_irq_sum
) == sum
) {
811 * Ayiee, looks like this CPU is stuck ...
812 * wait a few IRQs (5 seconds) before doing the oops ...
814 local_inc(&__get_cpu_var(alert_counter
));
815 if (local_read(&__get_cpu_var(alert_counter
)) == 5*nmi_hz
)
816 die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs
,
819 __get_cpu_var(last_irq_sum
) = sum
;
820 local_set(&__get_cpu_var(alert_counter
), 0);
823 /* see if the nmi watchdog went off */
825 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
826 rdmsrl(wd
->perfctr_msr
, dummy
);
827 if (dummy
& wd
->check_bit
){
828 /* this wasn't a watchdog timer interrupt */
832 /* only Intel uses the cccr msr */
833 if (wd
->cccr_msr
!= 0) {
836 * - An overflown perfctr will assert its interrupt
837 * until the OVF flag in its CCCR is cleared.
838 * - LVTPC is masked on interrupt and must be
839 * unmasked by the LVTPC handler.
841 rdmsrl(wd
->cccr_msr
, dummy
);
842 dummy
&= ~P4_CCCR_OVF
;
843 wrmsrl(wd
->cccr_msr
, dummy
);
844 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
845 } else if (wd
->perfctr_msr
== MSR_ARCH_PERFMON_PERFCTR0
) {
847 * ArchPerfom/Core Duo needs to re-unmask
850 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
852 /* start the cycle over again */
853 wrmsrl(wd
->perfctr_msr
, -((u64
)cpu_khz
* 1000 / nmi_hz
));
855 } else if (nmi_watchdog
== NMI_IO_APIC
) {
856 /* don't know how to accurately check for this.
857 * just assume it was a watchdog timer interrupt
858 * This matches the old behaviour.
862 printk(KERN_WARNING
"Unknown enabled NMI hardware?!\n");
868 asmlinkage __kprobes
void do_nmi(struct pt_regs
* regs
, long error_code
)
871 add_pda(__nmi_count
,1);
872 default_do_nmi(regs
);
876 int do_nmi_callback(struct pt_regs
* regs
, int cpu
)
879 if (unknown_nmi_panic
)
880 return unknown_nmi_panic_callback(regs
, cpu
);
887 static int unknown_nmi_panic_callback(struct pt_regs
*regs
, int cpu
)
889 unsigned char reason
= get_nmi_reason();
892 sprintf(buf
, "NMI received for unknown reason %02x\n", reason
);
893 die_nmi(buf
, regs
, 1); /* Always panic here */
898 * proc handler for /proc/sys/kernel/nmi
900 int proc_nmi_enabled(struct ctl_table
*table
, int write
, struct file
*file
,
901 void __user
*buffer
, size_t *length
, loff_t
*ppos
)
905 nmi_watchdog_enabled
= (atomic_read(&nmi_active
) > 0) ? 1 : 0;
906 old_state
= nmi_watchdog_enabled
;
907 proc_dointvec(table
, write
, file
, buffer
, length
, ppos
);
908 if (!!old_state
== !!nmi_watchdog_enabled
)
911 if (atomic_read(&nmi_active
) < 0) {
912 printk( KERN_WARNING
"NMI watchdog is permanently disabled\n");
916 /* if nmi_watchdog is not set yet, then set it */
917 nmi_watchdog_default();
919 if (nmi_watchdog
== NMI_LOCAL_APIC
) {
920 if (nmi_watchdog_enabled
)
921 enable_lapic_nmi_watchdog();
923 disable_lapic_nmi_watchdog();
926 "NMI watchdog doesn't know what hardware to touch\n");
934 EXPORT_SYMBOL(nmi_active
);
935 EXPORT_SYMBOL(nmi_watchdog
);
936 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi
);
937 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit
);
938 EXPORT_SYMBOL(reserve_perfctr_nmi
);
939 EXPORT_SYMBOL(release_perfctr_nmi
);
940 EXPORT_SYMBOL(reserve_evntsel_nmi
);
941 EXPORT_SYMBOL(release_evntsel_nmi
);
942 EXPORT_SYMBOL(disable_timer_nmi_watchdog
);
943 EXPORT_SYMBOL(enable_timer_nmi_watchdog
);
944 EXPORT_SYMBOL(touch_nmi_watchdog
);