1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Derived from arch/i386/kernel/irq.c
4 * Copyright (C) 1992 Linus Torvalds
5 * Adapted from arch/i386 by Gary Thomas
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 * Updated and modified by Cort Dougan <cort@fsmlabs.com>
8 * Copyright (C) 1996-2001 Cort Dougan
9 * Adapted for Power Macintosh by Paul Mackerras
10 * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
12 * This file contains the code used by various IRQ handling routines:
13 * asking for different IRQ's should be done through these routines
14 * instead of just grabbing them. Thus setups with different IRQ numbers
15 * shouldn't result in any weird surprises, and installing new handlers
18 * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the
19 * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit
20 * mask register (of which only 16 are defined), hence the weird shifting
21 * and complement of the cached_irq_mask. I want to be able to stuff
22 * this right into the SIU SMASK register.
23 * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx
24 * to reduce code space and undefined function references.
29 #include <linux/export.h>
30 #include <linux/threads.h>
31 #include <linux/kernel_stat.h>
32 #include <linux/signal.h>
33 #include <linux/sched.h>
34 #include <linux/ptrace.h>
35 #include <linux/ioport.h>
36 #include <linux/interrupt.h>
37 #include <linux/timex.h>
38 #include <linux/init.h>
39 #include <linux/slab.h>
40 #include <linux/delay.h>
41 #include <linux/irq.h>
42 #include <linux/seq_file.h>
43 #include <linux/cpumask.h>
44 #include <linux/profile.h>
45 #include <linux/bitops.h>
46 #include <linux/list.h>
47 #include <linux/radix-tree.h>
48 #include <linux/mutex.h>
49 #include <linux/pci.h>
50 #include <linux/debugfs.h>
52 #include <linux/of_irq.h>
53 #include <linux/vmalloc.h>
55 #include <linux/uaccess.h>
57 #include <asm/pgtable.h>
59 #include <asm/cache.h>
61 #include <asm/ptrace.h>
62 #include <asm/machdep.h>
65 #include <asm/livepatch.h>
66 #include <asm/asm-prototypes.h>
67 #include <asm/hw_irq.h>
71 #include <asm/firmware.h>
72 #include <asm/lv1call.h>
74 #define CREATE_TRACE_POINTS
75 #include <asm/trace.h>
76 #include <asm/cpu_has_feature.h>
78 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t
, irq_stat
);
79 EXPORT_PER_CPU_SYMBOL(irq_stat
);
82 atomic_t ppc_n_lost_interrupts
;
85 extern int tau_initialized
;
86 u32
tau_interrupts(unsigned long cpu
);
88 #endif /* CONFIG_PPC32 */
92 int distribute_irqs
= 1;
94 static inline notrace
unsigned long get_irq_happened(void)
96 unsigned long happened
;
98 __asm__
__volatile__("lbz %0,%1(13)"
99 : "=r" (happened
) : "i" (offsetof(struct paca_struct
, irq_happened
)));
104 static inline notrace
int decrementer_check_overflow(void)
106 u64 now
= get_tb_or_rtc();
107 u64
*next_tb
= this_cpu_ptr(&decrementers_next_tb
);
109 return now
>= *next_tb
;
112 /* This is called whenever we are re-enabling interrupts
113 * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
114 * there's an EE, DEC or DBELL to generate.
116 * This is called in two contexts: From arch_local_irq_restore()
117 * before soft-enabling interrupts, and from the exception exit
118 * path when returning from an interrupt from a soft-disabled to
119 * a soft enabled context. In both case we have interrupts hard
122 * We take care of only clearing the bits we handled in the
123 * PACA irq_happened field since we can only re-emit one at a
124 * time and we don't want to "lose" one.
126 notrace
unsigned int __check_irq_replay(void)
129 * We use local_paca rather than get_paca() to avoid all
130 * the debug_smp_processor_id() business in this low level
133 unsigned char happened
= local_paca
->irq_happened
;
136 * We are responding to the next interrupt, so interrupt-off
137 * latencies should be reset here.
140 trace_hardirqs_off();
143 * We are always hard disabled here, but PACA_IRQ_HARD_DIS may
144 * not be set, which means interrupts have only just been hard
145 * disabled as part of the local_irq_restore or interrupt return
146 * code. In that case, skip the decrementr check becaus it's
147 * expensive to read the TB.
149 * HARD_DIS then gets cleared here, but it's reconciled later.
150 * Either local_irq_disable will replay the interrupt and that
151 * will reconcile state like other hard interrupts. Or interrupt
152 * retur will replay the interrupt and in that case it sets
153 * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S).
155 if (happened
& PACA_IRQ_HARD_DIS
) {
156 local_paca
->irq_happened
&= ~PACA_IRQ_HARD_DIS
;
159 * We may have missed a decrementer interrupt if hard disabled.
160 * Check the decrementer register in case we had a rollover
161 * while hard disabled.
163 if (!(happened
& PACA_IRQ_DEC
)) {
164 if (decrementer_check_overflow()) {
165 local_paca
->irq_happened
|= PACA_IRQ_DEC
;
166 happened
|= PACA_IRQ_DEC
;
172 * Force the delivery of pending soft-disabled interrupts on PS3.
173 * Any HV call will have this side effect.
175 if (firmware_has_feature(FW_FEATURE_PS3_LV1
)) {
177 lv1_get_version_info(&tmp
, &tmp2
);
181 * Check if an hypervisor Maintenance interrupt happened.
182 * This is a higher priority interrupt than the others, so
185 if (happened
& PACA_IRQ_HMI
) {
186 local_paca
->irq_happened
&= ~PACA_IRQ_HMI
;
190 if (happened
& PACA_IRQ_DEC
) {
191 local_paca
->irq_happened
&= ~PACA_IRQ_DEC
;
195 if (happened
& PACA_IRQ_PMI
) {
196 local_paca
->irq_happened
&= ~PACA_IRQ_PMI
;
200 if (happened
& PACA_IRQ_EE
) {
201 local_paca
->irq_happened
&= ~PACA_IRQ_EE
;
205 #ifdef CONFIG_PPC_BOOK3E
207 * Check if an EPR external interrupt happened this bit is typically
208 * set if we need to handle another "edge" interrupt from within the
209 * MPIC "EPR" handler.
211 if (happened
& PACA_IRQ_EE_EDGE
) {
212 local_paca
->irq_happened
&= ~PACA_IRQ_EE_EDGE
;
216 if (happened
& PACA_IRQ_DBELL
) {
217 local_paca
->irq_happened
&= ~PACA_IRQ_DBELL
;
221 if (happened
& PACA_IRQ_DBELL
) {
222 local_paca
->irq_happened
&= ~PACA_IRQ_DBELL
;
225 #endif /* CONFIG_PPC_BOOK3E */
227 /* There should be nothing left ! */
228 BUG_ON(local_paca
->irq_happened
!= 0);
233 notrace
void arch_local_irq_restore(unsigned long mask
)
235 unsigned char irq_happened
;
238 /* Write the new soft-enabled value */
239 irq_soft_mask_set(mask
);
244 * From this point onward, we can take interrupts, preempt,
245 * etc... unless we got hard-disabled. We check if an event
246 * happened. If none happened, we know we can just return.
248 * We may have preempted before the check below, in which case
249 * we are checking the "new" CPU instead of the old one. This
250 * is only a problem if an event happened on the "old" CPU.
252 * External interrupt events will have caused interrupts to
253 * be hard-disabled, so there is no problem, we
254 * cannot have preempted.
256 irq_happened
= get_irq_happened();
258 #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
259 WARN_ON_ONCE(!(mfmsr() & MSR_EE
));
265 * We need to hard disable to get a trusted value from
266 * __check_irq_replay(). We also need to soft-disable
267 * again to avoid warnings in there due to the use of
270 if (!(irq_happened
& PACA_IRQ_HARD_DIS
)) {
271 #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
272 WARN_ON_ONCE(!(mfmsr() & MSR_EE
));
274 __hard_irq_disable();
275 #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
278 * We should already be hard disabled here. We had bugs
279 * where that wasn't the case so let's dbl check it and
280 * warn if we are wrong. Only do that when IRQ tracing
281 * is enabled as mfmsr() can be costly.
283 if (WARN_ON_ONCE(mfmsr() & MSR_EE
))
284 __hard_irq_disable();
288 irq_soft_mask_set(IRQS_ALL_DISABLED
);
289 trace_hardirqs_off();
292 * Check if anything needs to be re-emitted. We haven't
293 * soft-enabled yet to avoid warnings in decrementer_check_overflow
294 * accessing per-cpu variables
296 replay
= __check_irq_replay();
298 /* We can soft-enable now */
300 irq_soft_mask_set(IRQS_ENABLED
);
303 * And replay if we have to. This will return with interrupts
307 __replay_interrupt(replay
);
311 /* Finally, let's ensure we are hard enabled */
314 EXPORT_SYMBOL(arch_local_irq_restore
);
317 * This is specifically called by assembly code to re-enable interrupts
318 * if they are currently disabled. This is typically called before
319 * schedule() or do_signal() when returning to userspace. We do it
320 * in C to avoid the burden of dealing with lockdep etc...
322 * NOTE: This is called with interrupts hard disabled but not marked
323 * as such in paca->irq_happened, so we need to resync this.
325 void notrace
restore_interrupts(void)
327 if (irqs_disabled()) {
328 local_paca
->irq_happened
|= PACA_IRQ_HARD_DIS
;
335 * This is a helper to use when about to go into idle low-power
336 * when the latter has the side effect of re-enabling interrupts
337 * (such as calling H_CEDE under pHyp).
339 * You call this function with interrupts soft-disabled (this is
340 * already the case when ppc_md.power_save is called). The function
341 * will return whether to enter power save or just return.
343 * In the former case, it will have notified lockdep of interrupts
344 * being re-enabled and generally sanitized the lazy irq state,
345 * and in the latter case it will leave with interrupts hard
346 * disabled and marked as such, so the local_irq_enable() call
347 * in arch_cpu_idle() will properly re-enable everything.
349 bool prep_irq_for_idle(void)
352 * First we need to hard disable to ensure no interrupt
353 * occurs before we effectively enter the low power state
355 __hard_irq_disable();
356 local_paca
->irq_happened
|= PACA_IRQ_HARD_DIS
;
359 * If anything happened while we were soft-disabled,
360 * we return now and do not enter the low power state.
362 if (lazy_irq_pending())
365 /* Tell lockdep we are about to re-enable */
369 * Mark interrupts as soft-enabled and clear the
370 * PACA_IRQ_HARD_DIS from the pending mask since we
371 * are about to hard enable as well as a side effect
372 * of entering the low power state.
374 local_paca
->irq_happened
&= ~PACA_IRQ_HARD_DIS
;
375 irq_soft_mask_set(IRQS_ENABLED
);
377 /* Tell the caller to enter the low power state */
381 #ifdef CONFIG_PPC_BOOK3S
383 * This is for idle sequences that return with IRQs off, but the
384 * idle state itself wakes on interrupt. Tell the irq tracer that
385 * IRQs are enabled for the duration of idle so it does not get long
386 * off times. Must be paired with fini_irq_for_idle_irqsoff.
388 bool prep_irq_for_idle_irqsoff(void)
390 WARN_ON(!irqs_disabled());
393 * First we need to hard disable to ensure no interrupt
394 * occurs before we effectively enter the low power state
396 __hard_irq_disable();
397 local_paca
->irq_happened
|= PACA_IRQ_HARD_DIS
;
400 * If anything happened while we were soft-disabled,
401 * we return now and do not enter the low power state.
403 if (lazy_irq_pending())
406 /* Tell lockdep we are about to re-enable */
413 * Take the SRR1 wakeup reason, index into this table to find the
414 * appropriate irq_happened bit.
416 * Sytem reset exceptions taken in idle state also come through here,
417 * but they are NMI interrupts so do not need to wait for IRQs to be
418 * restored, and should be taken as early as practical. These are marked
419 * with 0xff in the table. The Power ISA specifies 0100b as the system
420 * reset interrupt reason.
422 #define IRQ_SYSTEM_RESET 0xff
424 static const u8 srr1_to_lazyirq
[0x10] = {
436 void replay_system_reset(void)
440 ppc_save_regs(®s
);
442 get_paca()->in_nmi
= 1;
443 system_reset_exception(®s
);
444 get_paca()->in_nmi
= 0;
446 EXPORT_SYMBOL_GPL(replay_system_reset
);
448 void irq_set_pending_from_srr1(unsigned long srr1
)
450 unsigned int idx
= (srr1
& SRR1_WAKEMASK_P8
) >> 18;
451 u8 reason
= srr1_to_lazyirq
[idx
];
454 * Take the system reset now, which is immediately after registers
455 * are restored from idle. It's an NMI, so interrupts need not be
456 * re-enabled before it is taken.
458 if (unlikely(reason
== IRQ_SYSTEM_RESET
)) {
459 replay_system_reset();
464 * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
465 * so this can be called unconditionally with the SRR1 wake
466 * reason as returned by the idle code, which uses 0 to mean no
469 * If a future CPU was to designate this as an interrupt reason,
470 * then a new index for no interrupt must be assigned.
472 local_paca
->irq_happened
|= reason
;
474 #endif /* CONFIG_PPC_BOOK3S */
477 * Force a replay of the external interrupt handler on this CPU.
479 void force_external_irq_replay(void)
482 * This must only be called with interrupts soft-disabled,
483 * the replay will happen when re-enabling.
485 WARN_ON(!arch_irqs_disabled());
488 * Interrupts must always be hard disabled before irq_happened is
489 * modified (to prevent lost update in case of interrupt between
492 __hard_irq_disable();
493 local_paca
->irq_happened
|= PACA_IRQ_HARD_DIS
;
495 /* Indicate in the PACA that we have an interrupt to replay */
496 local_paca
->irq_happened
|= PACA_IRQ_EE
;
499 #endif /* CONFIG_PPC64 */
501 int arch_show_interrupts(struct seq_file
*p
, int prec
)
505 #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT)
506 if (tau_initialized
) {
507 seq_printf(p
, "%*s: ", prec
, "TAU");
508 for_each_online_cpu(j
)
509 seq_printf(p
, "%10u ", tau_interrupts(j
));
510 seq_puts(p
, " PowerPC Thermal Assist (cpu temp)\n");
512 #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */
514 seq_printf(p
, "%*s: ", prec
, "LOC");
515 for_each_online_cpu(j
)
516 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).timer_irqs_event
);
517 seq_printf(p
, " Local timer interrupts for timer event device\n");
519 seq_printf(p
, "%*s: ", prec
, "BCT");
520 for_each_online_cpu(j
)
521 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).broadcast_irqs_event
);
522 seq_printf(p
, " Broadcast timer interrupts for timer event device\n");
524 seq_printf(p
, "%*s: ", prec
, "LOC");
525 for_each_online_cpu(j
)
526 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).timer_irqs_others
);
527 seq_printf(p
, " Local timer interrupts for others\n");
529 seq_printf(p
, "%*s: ", prec
, "SPU");
530 for_each_online_cpu(j
)
531 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).spurious_irqs
);
532 seq_printf(p
, " Spurious interrupts\n");
534 seq_printf(p
, "%*s: ", prec
, "PMI");
535 for_each_online_cpu(j
)
536 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).pmu_irqs
);
537 seq_printf(p
, " Performance monitoring interrupts\n");
539 seq_printf(p
, "%*s: ", prec
, "MCE");
540 for_each_online_cpu(j
)
541 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).mce_exceptions
);
542 seq_printf(p
, " Machine check exceptions\n");
544 if (cpu_has_feature(CPU_FTR_HVMODE
)) {
545 seq_printf(p
, "%*s: ", prec
, "HMI");
546 for_each_online_cpu(j
)
547 seq_printf(p
, "%10u ",
548 per_cpu(irq_stat
, j
).hmi_exceptions
);
549 seq_printf(p
, " Hypervisor Maintenance Interrupts\n");
552 seq_printf(p
, "%*s: ", prec
, "NMI");
553 for_each_online_cpu(j
)
554 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).sreset_irqs
);
555 seq_printf(p
, " System Reset interrupts\n");
557 #ifdef CONFIG_PPC_WATCHDOG
558 seq_printf(p
, "%*s: ", prec
, "WDG");
559 for_each_online_cpu(j
)
560 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).soft_nmi_irqs
);
561 seq_printf(p
, " Watchdog soft-NMI interrupts\n");
564 #ifdef CONFIG_PPC_DOORBELL
565 if (cpu_has_feature(CPU_FTR_DBELL
)) {
566 seq_printf(p
, "%*s: ", prec
, "DBL");
567 for_each_online_cpu(j
)
568 seq_printf(p
, "%10u ", per_cpu(irq_stat
, j
).doorbell_irqs
);
569 seq_printf(p
, " Doorbell interrupts\n");
579 u64
arch_irq_stat_cpu(unsigned int cpu
)
581 u64 sum
= per_cpu(irq_stat
, cpu
).timer_irqs_event
;
583 sum
+= per_cpu(irq_stat
, cpu
).broadcast_irqs_event
;
584 sum
+= per_cpu(irq_stat
, cpu
).pmu_irqs
;
585 sum
+= per_cpu(irq_stat
, cpu
).mce_exceptions
;
586 sum
+= per_cpu(irq_stat
, cpu
).spurious_irqs
;
587 sum
+= per_cpu(irq_stat
, cpu
).timer_irqs_others
;
588 sum
+= per_cpu(irq_stat
, cpu
).hmi_exceptions
;
589 sum
+= per_cpu(irq_stat
, cpu
).sreset_irqs
;
590 #ifdef CONFIG_PPC_WATCHDOG
591 sum
+= per_cpu(irq_stat
, cpu
).soft_nmi_irqs
;
593 #ifdef CONFIG_PPC_DOORBELL
594 sum
+= per_cpu(irq_stat
, cpu
).doorbell_irqs
;
600 static inline void check_stack_overflow(void)
602 #ifdef CONFIG_DEBUG_STACKOVERFLOW
605 sp
= current_stack_pointer() & (THREAD_SIZE
-1);
607 /* check for stack overflow: is there less than 2KB free? */
608 if (unlikely(sp
< 2048)) {
609 pr_err("do_IRQ: stack overflow: %ld\n", sp
);
615 void __do_irq(struct pt_regs
*regs
)
621 trace_irq_entry(regs
);
624 * Query the platform PIC for the interrupt & ack it.
626 * This will typically lower the interrupt line to the CPU
628 irq
= ppc_md
.get_irq();
630 /* We can hard enable interrupts now to allow perf interrupts */
631 may_hard_irq_enable();
633 /* And finally process it */
635 __this_cpu_inc(irq_stat
.spurious_irqs
);
637 generic_handle_irq(irq
);
639 trace_irq_exit(regs
);
644 void do_IRQ(struct pt_regs
*regs
)
646 struct pt_regs
*old_regs
= set_irq_regs(regs
);
647 void *cursp
, *irqsp
, *sirqsp
;
649 /* Switch to the irq stack to handle this */
650 cursp
= (void *)(current_stack_pointer() & ~(THREAD_SIZE
- 1));
651 irqsp
= hardirq_ctx
[raw_smp_processor_id()];
652 sirqsp
= softirq_ctx
[raw_smp_processor_id()];
654 check_stack_overflow();
656 /* Already there ? */
657 if (unlikely(cursp
== irqsp
|| cursp
== sirqsp
)) {
659 set_irq_regs(old_regs
);
662 /* Switch stack and call */
663 call_do_irq(regs
, irqsp
);
665 set_irq_regs(old_regs
);
668 static void *__init
alloc_vm_stack(void)
670 return __vmalloc_node_range(THREAD_SIZE
, THREAD_ALIGN
, VMALLOC_START
,
671 VMALLOC_END
, THREADINFO_GFP
, PAGE_KERNEL
,
672 0, NUMA_NO_NODE
, (void*)_RET_IP_
);
675 static void __init
vmap_irqstack_init(void)
679 for_each_possible_cpu(i
) {
680 softirq_ctx
[i
] = alloc_vm_stack();
681 hardirq_ctx
[i
] = alloc_vm_stack();
686 void __init
init_IRQ(void)
688 if (IS_ENABLED(CONFIG_VMAP_STACK
))
689 vmap_irqstack_init();
695 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
696 void *critirq_ctx
[NR_CPUS
] __read_mostly
;
697 void *dbgirq_ctx
[NR_CPUS
] __read_mostly
;
698 void *mcheckirq_ctx
[NR_CPUS
] __read_mostly
;
701 void *softirq_ctx
[NR_CPUS
] __read_mostly
;
702 void *hardirq_ctx
[NR_CPUS
] __read_mostly
;
704 void do_softirq_own_stack(void)
706 call_do_softirq(softirq_ctx
[smp_processor_id()]);
709 irq_hw_number_t
virq_to_hw(unsigned int virq
)
711 struct irq_data
*irq_data
= irq_get_irq_data(virq
);
712 return WARN_ON(!irq_data
) ? 0 : irq_data
->hwirq
;
714 EXPORT_SYMBOL_GPL(virq_to_hw
);
717 int irq_choose_cpu(const struct cpumask
*mask
)
721 if (cpumask_equal(mask
, cpu_online_mask
)) {
722 static int irq_rover
;
723 static DEFINE_RAW_SPINLOCK(irq_rover_lock
);
726 /* Round-robin distribution... */
728 raw_spin_lock_irqsave(&irq_rover_lock
, flags
);
730 irq_rover
= cpumask_next(irq_rover
, cpu_online_mask
);
731 if (irq_rover
>= nr_cpu_ids
)
732 irq_rover
= cpumask_first(cpu_online_mask
);
736 raw_spin_unlock_irqrestore(&irq_rover_lock
, flags
);
738 cpuid
= cpumask_first_and(mask
, cpu_online_mask
);
739 if (cpuid
>= nr_cpu_ids
)
743 return get_hard_smp_processor_id(cpuid
);
746 int irq_choose_cpu(const struct cpumask
*mask
)
748 return hard_smp_processor_id();
753 static int __init
setup_noirqdistrib(char *str
)
759 __setup("noirqdistrib", setup_noirqdistrib
);
760 #endif /* CONFIG_PPC64 */