1 // SPDX-License-Identifier: GPL-2.0-only
3 * Common interrupt code for 32 and 64 bit
6 #include <linux/interrupt.h>
7 #include <linux/kernel_stat.h>
9 #include <linux/seq_file.h>
10 #include <linux/smp.h>
11 #include <linux/ftrace.h>
12 #include <linux/delay.h>
13 #include <linux/export.h>
14 #include <linux/irq.h>
16 #include <asm/irq_stack.h>
18 #include <asm/io_apic.h>
21 #include <asm/hw_irq.h>
23 #include <asm/traps.h>
24 #include <asm/thermal.h>
25 #include <asm/posted_intr.h>
26 #include <asm/irq_remapping.h>
28 #define CREATE_TRACE_POINTS
29 #include <asm/trace/irq_vectors.h>
31 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t
, irq_stat
);
32 EXPORT_PER_CPU_SYMBOL(irq_stat
);
34 atomic_t irq_err_count
;
37 * 'what should we do if we get a hw irq event on an illegal vector'.
38 * each architecture has to answer this themselves.
40 void ack_bad_irq(unsigned int irq
)
42 if (printk_ratelimit())
43 pr_err("unexpected IRQ trap at vector %02x\n", irq
);
46 * Currently unexpected vectors happen only on SMP and APIC.
47 * We _must_ ack these because every local APIC has only N
48 * irq slots per priority level, and a 'hanging, unacked' IRQ
49 * holds up an irq slot - in excessive cases (when multiple
50 * unexpected vectors occur) that might lock up the APIC
52 * But only ack when the APIC is enabled -AK
57 #define irq_stats(x) (&per_cpu(irq_stat, x))
59 * /proc/interrupts printing for arch specific interrupts
61 int arch_show_interrupts(struct seq_file
*p
, int prec
)
65 seq_printf(p
, "%*s: ", prec
, "NMI");
66 for_each_online_cpu(j
)
67 seq_printf(p
, "%10u ", irq_stats(j
)->__nmi_count
);
68 seq_puts(p
, " Non-maskable interrupts\n");
69 #ifdef CONFIG_X86_LOCAL_APIC
70 seq_printf(p
, "%*s: ", prec
, "LOC");
71 for_each_online_cpu(j
)
72 seq_printf(p
, "%10u ", irq_stats(j
)->apic_timer_irqs
);
73 seq_puts(p
, " Local timer interrupts\n");
75 seq_printf(p
, "%*s: ", prec
, "SPU");
76 for_each_online_cpu(j
)
77 seq_printf(p
, "%10u ", irq_stats(j
)->irq_spurious_count
);
78 seq_puts(p
, " Spurious interrupts\n");
79 seq_printf(p
, "%*s: ", prec
, "PMI");
80 for_each_online_cpu(j
)
81 seq_printf(p
, "%10u ", irq_stats(j
)->apic_perf_irqs
);
82 seq_puts(p
, " Performance monitoring interrupts\n");
83 seq_printf(p
, "%*s: ", prec
, "IWI");
84 for_each_online_cpu(j
)
85 seq_printf(p
, "%10u ", irq_stats(j
)->apic_irq_work_irqs
);
86 seq_puts(p
, " IRQ work interrupts\n");
87 seq_printf(p
, "%*s: ", prec
, "RTR");
88 for_each_online_cpu(j
)
89 seq_printf(p
, "%10u ", irq_stats(j
)->icr_read_retry_count
);
90 seq_puts(p
, " APIC ICR read retries\n");
91 if (x86_platform_ipi_callback
) {
92 seq_printf(p
, "%*s: ", prec
, "PLT");
93 for_each_online_cpu(j
)
94 seq_printf(p
, "%10u ", irq_stats(j
)->x86_platform_ipis
);
95 seq_puts(p
, " Platform interrupts\n");
99 seq_printf(p
, "%*s: ", prec
, "RES");
100 for_each_online_cpu(j
)
101 seq_printf(p
, "%10u ", irq_stats(j
)->irq_resched_count
);
102 seq_puts(p
, " Rescheduling interrupts\n");
103 seq_printf(p
, "%*s: ", prec
, "CAL");
104 for_each_online_cpu(j
)
105 seq_printf(p
, "%10u ", irq_stats(j
)->irq_call_count
);
106 seq_puts(p
, " Function call interrupts\n");
107 seq_printf(p
, "%*s: ", prec
, "TLB");
108 for_each_online_cpu(j
)
109 seq_printf(p
, "%10u ", irq_stats(j
)->irq_tlb_count
);
110 seq_puts(p
, " TLB shootdowns\n");
112 #ifdef CONFIG_X86_THERMAL_VECTOR
113 seq_printf(p
, "%*s: ", prec
, "TRM");
114 for_each_online_cpu(j
)
115 seq_printf(p
, "%10u ", irq_stats(j
)->irq_thermal_count
);
116 seq_puts(p
, " Thermal event interrupts\n");
118 #ifdef CONFIG_X86_MCE_THRESHOLD
119 seq_printf(p
, "%*s: ", prec
, "THR");
120 for_each_online_cpu(j
)
121 seq_printf(p
, "%10u ", irq_stats(j
)->irq_threshold_count
);
122 seq_puts(p
, " Threshold APIC interrupts\n");
124 #ifdef CONFIG_X86_MCE_AMD
125 seq_printf(p
, "%*s: ", prec
, "DFR");
126 for_each_online_cpu(j
)
127 seq_printf(p
, "%10u ", irq_stats(j
)->irq_deferred_error_count
);
128 seq_puts(p
, " Deferred Error APIC interrupts\n");
130 #ifdef CONFIG_X86_MCE
131 seq_printf(p
, "%*s: ", prec
, "MCE");
132 for_each_online_cpu(j
)
133 seq_printf(p
, "%10u ", per_cpu(mce_exception_count
, j
));
134 seq_puts(p
, " Machine check exceptions\n");
135 seq_printf(p
, "%*s: ", prec
, "MCP");
136 for_each_online_cpu(j
)
137 seq_printf(p
, "%10u ", per_cpu(mce_poll_count
, j
));
138 seq_puts(p
, " Machine check polls\n");
140 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
141 if (test_bit(HYPERVISOR_CALLBACK_VECTOR
, system_vectors
)) {
142 seq_printf(p
, "%*s: ", prec
, "HYP");
143 for_each_online_cpu(j
)
144 seq_printf(p
, "%10u ",
145 irq_stats(j
)->irq_hv_callback_count
);
146 seq_puts(p
, " Hypervisor callback interrupts\n");
149 #if IS_ENABLED(CONFIG_HYPERV)
150 if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR
, system_vectors
)) {
151 seq_printf(p
, "%*s: ", prec
, "HRE");
152 for_each_online_cpu(j
)
153 seq_printf(p
, "%10u ",
154 irq_stats(j
)->irq_hv_reenlightenment_count
);
155 seq_puts(p
, " Hyper-V reenlightenment interrupts\n");
157 if (test_bit(HYPERV_STIMER0_VECTOR
, system_vectors
)) {
158 seq_printf(p
, "%*s: ", prec
, "HVS");
159 for_each_online_cpu(j
)
160 seq_printf(p
, "%10u ",
161 irq_stats(j
)->hyperv_stimer0_count
);
162 seq_puts(p
, " Hyper-V stimer0 interrupts\n");
165 seq_printf(p
, "%*s: %10u\n", prec
, "ERR", atomic_read(&irq_err_count
));
166 #if defined(CONFIG_X86_IO_APIC)
167 seq_printf(p
, "%*s: %10u\n", prec
, "MIS", atomic_read(&irq_mis_count
));
169 #if IS_ENABLED(CONFIG_KVM)
170 seq_printf(p
, "%*s: ", prec
, "PIN");
171 for_each_online_cpu(j
)
172 seq_printf(p
, "%10u ", irq_stats(j
)->kvm_posted_intr_ipis
);
173 seq_puts(p
, " Posted-interrupt notification event\n");
175 seq_printf(p
, "%*s: ", prec
, "NPI");
176 for_each_online_cpu(j
)
177 seq_printf(p
, "%10u ",
178 irq_stats(j
)->kvm_posted_intr_nested_ipis
);
179 seq_puts(p
, " Nested posted-interrupt event\n");
181 seq_printf(p
, "%*s: ", prec
, "PIW");
182 for_each_online_cpu(j
)
183 seq_printf(p
, "%10u ",
184 irq_stats(j
)->kvm_posted_intr_wakeup_ipis
);
185 seq_puts(p
, " Posted-interrupt wakeup event\n");
187 #ifdef CONFIG_X86_POSTED_MSI
188 seq_printf(p
, "%*s: ", prec
, "PMN");
189 for_each_online_cpu(j
)
190 seq_printf(p
, "%10u ",
191 irq_stats(j
)->posted_msi_notification_count
);
192 seq_puts(p
, " Posted MSI notification event\n");
200 u64
arch_irq_stat_cpu(unsigned int cpu
)
202 u64 sum
= irq_stats(cpu
)->__nmi_count
;
204 #ifdef CONFIG_X86_LOCAL_APIC
205 sum
+= irq_stats(cpu
)->apic_timer_irqs
;
206 sum
+= irq_stats(cpu
)->irq_spurious_count
;
207 sum
+= irq_stats(cpu
)->apic_perf_irqs
;
208 sum
+= irq_stats(cpu
)->apic_irq_work_irqs
;
209 sum
+= irq_stats(cpu
)->icr_read_retry_count
;
210 if (x86_platform_ipi_callback
)
211 sum
+= irq_stats(cpu
)->x86_platform_ipis
;
214 sum
+= irq_stats(cpu
)->irq_resched_count
;
215 sum
+= irq_stats(cpu
)->irq_call_count
;
217 #ifdef CONFIG_X86_THERMAL_VECTOR
218 sum
+= irq_stats(cpu
)->irq_thermal_count
;
220 #ifdef CONFIG_X86_MCE_THRESHOLD
221 sum
+= irq_stats(cpu
)->irq_threshold_count
;
223 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
224 sum
+= irq_stats(cpu
)->irq_hv_callback_count
;
226 #if IS_ENABLED(CONFIG_HYPERV)
227 sum
+= irq_stats(cpu
)->irq_hv_reenlightenment_count
;
228 sum
+= irq_stats(cpu
)->hyperv_stimer0_count
;
230 #ifdef CONFIG_X86_MCE
231 sum
+= per_cpu(mce_exception_count
, cpu
);
232 sum
+= per_cpu(mce_poll_count
, cpu
);
237 u64
arch_irq_stat(void)
239 u64 sum
= atomic_read(&irq_err_count
);
243 static __always_inline
void handle_irq(struct irq_desc
*desc
,
244 struct pt_regs
*regs
)
246 if (IS_ENABLED(CONFIG_X86_64
))
247 generic_handle_irq_desc(desc
);
249 __handle_irq(desc
, regs
);
252 static __always_inline
int call_irq_handler(int vector
, struct pt_regs
*regs
)
254 struct irq_desc
*desc
;
257 desc
= __this_cpu_read(vector_irq
[vector
]);
258 if (likely(!IS_ERR_OR_NULL(desc
))) {
259 handle_irq(desc
, regs
);
262 if (desc
== VECTOR_UNUSED
) {
263 pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
264 __func__
, smp_processor_id(),
267 __this_cpu_write(vector_irq
[vector
], VECTOR_UNUSED
);
275 * common_interrupt() handles all normal device IRQ's (the special SMP
276 * cross-CPU interrupts have their own entry points).
278 DEFINE_IDTENTRY_IRQ(common_interrupt
)
280 struct pt_regs
*old_regs
= set_irq_regs(regs
);
282 /* entry code tells RCU that we're not quiescent. Check it. */
283 RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
285 if (unlikely(call_irq_handler(vector
, regs
)))
288 set_irq_regs(old_regs
);
291 #ifdef CONFIG_X86_LOCAL_APIC
292 /* Function pointer for generic interrupt vector handling */
293 void (*x86_platform_ipi_callback
)(void) = NULL
;
295 * Handler for X86_PLATFORM_IPI_VECTOR.
297 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi
)
299 struct pt_regs
*old_regs
= set_irq_regs(regs
);
302 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR
);
303 inc_irq_stat(x86_platform_ipis
);
304 if (x86_platform_ipi_callback
)
305 x86_platform_ipi_callback();
306 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR
);
307 set_irq_regs(old_regs
);
311 #if IS_ENABLED(CONFIG_KVM)
312 static void dummy_handler(void) {}
313 static void (*kvm_posted_intr_wakeup_handler
)(void) = dummy_handler
;
315 void kvm_set_posted_intr_wakeup_handler(void (*handler
)(void))
318 kvm_posted_intr_wakeup_handler
= handler
;
320 kvm_posted_intr_wakeup_handler
= dummy_handler
;
324 EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler
);
327 * Handler for POSTED_INTERRUPT_VECTOR.
329 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi
)
332 inc_irq_stat(kvm_posted_intr_ipis
);
336 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
338 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi
)
341 inc_irq_stat(kvm_posted_intr_wakeup_ipis
);
342 kvm_posted_intr_wakeup_handler();
346 * Handler for POSTED_INTERRUPT_NESTED_VECTOR.
348 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi
)
351 inc_irq_stat(kvm_posted_intr_nested_ipis
);
355 #ifdef CONFIG_X86_POSTED_MSI
357 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */
358 DEFINE_PER_CPU_ALIGNED(struct pi_desc
, posted_msi_pi_desc
);
360 void intel_posted_msi_init(void)
365 this_cpu_write(posted_msi_pi_desc
.nv
, POSTED_MSI_NOTIFICATION_VECTOR
);
368 * APIC destination ID is stored in bit 8:15 while in XAPIC mode.
371 apic_id
= this_cpu_read(x86_cpu_to_apicid
);
372 destination
= x2apic_enabled() ? apic_id
: apic_id
<< 8;
373 this_cpu_write(posted_msi_pi_desc
.ndst
, destination
);
377 * De-multiplexing posted interrupts is on the performance path, the code
378 * below is written to optimize the cache performance based on the following
380 * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently
381 * accessed by both CPU and IOMMU.
382 * 2.During posted MSI processing, the CPU needs to do 64-bit read and xchg
383 * for checking and clearing posted interrupt request (PIR), a 256 bit field
385 * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache
386 * line when posting interrupts and setting control bits.
387 * 4.The CPU can access the cache line a magnitude faster than the IOMMU.
388 * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID
389 * cache line. The cache line states after each operation are as follows:
390 * CPU IOMMU PID Cache line state
391 * ---------------------------------------------------------------
393 *...lock xchg64 modified
394 *... post/atomic swap invalid
395 *...-------------------------------------------------------------
397 * To reduce L1 data cache miss, it is important to avoid contention with
398 * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used
399 * to dispatch interrupt handlers.
401 * In addition, the code is trying to keep the cache line state consistent
402 * as much as possible. e.g. when making a copy and clearing the PIR
403 * (assuming non-zero PIR bits are present in the entire PIR), it does:
404 * read, read, read, read, xchg, xchg, xchg, xchg
406 * read, xchg, read, xchg, read, xchg, read, xchg
408 static __always_inline
bool handle_pending_pir(u64
*pir
, struct pt_regs
*regs
)
410 int i
, vec
= FIRST_EXTERNAL_VECTOR
;
411 unsigned long pir_copy
[4];
412 bool handled
= false;
414 for (i
= 0; i
< 4; i
++)
415 pir_copy
[i
] = pir
[i
];
417 for (i
= 0; i
< 4; i
++) {
421 pir_copy
[i
] = arch_xchg(&pir
[i
], 0);
426 for_each_set_bit_from(vec
, pir_copy
, FIRST_SYSTEM_VECTOR
)
427 call_irq_handler(vec
, regs
);
434 * Performance data shows that 3 is good enough to harvest 90+% of the benefit
435 * on high IRQ rate workload.
437 #define MAX_POSTED_MSI_COALESCING_LOOP 3
440 * For MSIs that are delivered as posted interrupts, the CPU notifications
441 * can be coalesced if the MSIs arrive in high frequency bursts.
443 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification
)
445 struct pt_regs
*old_regs
= set_irq_regs(regs
);
449 pid
= this_cpu_ptr(&posted_msi_pi_desc
);
451 inc_irq_stat(posted_msi_notification_count
);
455 * Max coalescing count includes the extra round of handle_pending_pir
456 * after clearing the outstanding notification bit. Hence, at most
457 * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here.
459 while (++i
< MAX_POSTED_MSI_COALESCING_LOOP
) {
460 if (!handle_pending_pir(pid
->pir64
, regs
))
465 * Clear outstanding notification bit to allow new IRQ notifications,
466 * do this last to maximize the window of interrupt coalescing.
471 * There could be a race of PI notification and the clearing of ON bit,
472 * process PIR bits one last time such that handling the new interrupts
473 * are not delayed until the next IRQ.
475 handle_pending_pir(pid
->pir64
, regs
);
479 set_irq_regs(old_regs
);
481 #endif /* X86_POSTED_MSI */
483 #ifdef CONFIG_HOTPLUG_CPU
484 /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
485 void fixup_irqs(void)
488 struct irq_desc
*desc
;
489 struct irq_data
*data
;
490 struct irq_chip
*chip
;
492 irq_migrate_all_off_this_cpu();
495 * We can remove mdelay() and then send spurious interrupts to
496 * new cpu targets for all the irqs that were handled previously by
497 * this cpu. While it works, I have seen spurious interrupt messages
498 * (nothing wrong but still...).
500 * So for now, retain mdelay(1) and check the IRR and then send those
501 * interrupts to new targets as this cpu is already offlined...
506 * We can walk the vector array of this cpu without holding
507 * vector_lock because the cpu is already marked !online, so
508 * nothing else will touch it.
510 for (vector
= FIRST_EXTERNAL_VECTOR
; vector
< NR_VECTORS
; vector
++) {
511 if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq
[vector
])))
514 if (is_vector_pending(vector
)) {
515 desc
= __this_cpu_read(vector_irq
[vector
]);
517 raw_spin_lock(&desc
->lock
);
518 data
= irq_desc_get_irq_data(desc
);
519 chip
= irq_data_get_irq_chip(data
);
520 if (chip
->irq_retrigger
) {
521 chip
->irq_retrigger(data
);
522 __this_cpu_write(vector_irq
[vector
], VECTOR_RETRIGGERED
);
524 raw_spin_unlock(&desc
->lock
);
526 if (__this_cpu_read(vector_irq
[vector
]) != VECTOR_RETRIGGERED
)
527 __this_cpu_write(vector_irq
[vector
], VECTOR_UNUSED
);
532 #ifdef CONFIG_X86_THERMAL_VECTOR
533 static void smp_thermal_vector(void)
535 if (x86_thermal_enabled())
536 intel_thermal_interrupt();
538 pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
542 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal
)
544 trace_thermal_apic_entry(THERMAL_APIC_VECTOR
);
545 inc_irq_stat(irq_thermal_count
);
546 smp_thermal_vector();
547 trace_thermal_apic_exit(THERMAL_APIC_VECTOR
);