1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * TLB flush routines for radix kernels.
5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
14 #include <asm/ppc-opcode.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 #include <asm/plpar_wrappers.h>
21 #define RIC_FLUSH_TLB 0
22 #define RIC_FLUSH_PWC 1
23 #define RIC_FLUSH_ALL 2
26 * tlbiel instruction for radix, set invalidation
27 * i.e., r=1 and is=01 or is=10 or is=11
29 static __always_inline
void tlbiel_radix_set_isa300(unsigned int set
, unsigned int is
,
31 unsigned int ric
, unsigned int prs
)
36 rb
= (set
<< PPC_BITLSHIFT(51)) | (is
<< PPC_BITLSHIFT(53));
37 rs
= ((unsigned long)pid
<< PPC_BITLSHIFT(31));
39 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
40 : : "r"(rb
), "r"(rs
), "i"(ric
), "i"(prs
)
44 static void tlbiel_all_isa300(unsigned int num_sets
, unsigned int is
)
48 asm volatile("ptesync": : :"memory");
51 * Flush the first set of the TLB, and the entire Page Walk Cache
52 * and partition table entries. Then flush the remaining sets of the
56 if (early_cpu_has_feature(CPU_FTR_HVMODE
)) {
57 /* MSR[HV] should flush partition scope translations first. */
58 tlbiel_radix_set_isa300(0, is
, 0, RIC_FLUSH_ALL
, 0);
60 if (!early_cpu_has_feature(CPU_FTR_ARCH_31
)) {
61 for (set
= 1; set
< num_sets
; set
++)
62 tlbiel_radix_set_isa300(set
, is
, 0,
67 /* Flush process scoped entries. */
68 tlbiel_radix_set_isa300(0, is
, 0, RIC_FLUSH_ALL
, 1);
70 if (!early_cpu_has_feature(CPU_FTR_ARCH_31
)) {
71 for (set
= 1; set
< num_sets
; set
++)
72 tlbiel_radix_set_isa300(set
, is
, 0, RIC_FLUSH_TLB
, 1);
75 ppc_after_tlbiel_barrier();
78 void radix__tlbiel_all(unsigned int action
)
83 case TLB_INVAL_SCOPE_GLOBAL
:
86 case TLB_INVAL_SCOPE_LPID
:
93 if (early_cpu_has_feature(CPU_FTR_ARCH_300
))
94 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX
, is
);
96 WARN(1, "%s called on pre-POWER9 CPU\n", __func__
);
98 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT
"; isync" : : :"memory");
101 static __always_inline
void __tlbiel_pid(unsigned long pid
, int set
,
104 unsigned long rb
,rs
,prs
,r
;
106 rb
= PPC_BIT(53); /* IS = 1 */
107 rb
|= set
<< PPC_BITLSHIFT(51);
108 rs
= ((unsigned long)pid
) << PPC_BITLSHIFT(31);
109 prs
= 1; /* process scoped */
110 r
= 1; /* radix format */
112 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
113 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
114 trace_tlbie(0, 1, rb
, rs
, ric
, prs
, r
);
117 static __always_inline
void __tlbie_pid(unsigned long pid
, unsigned long ric
)
119 unsigned long rb
,rs
,prs
,r
;
121 rb
= PPC_BIT(53); /* IS = 1 */
122 rs
= pid
<< PPC_BITLSHIFT(31);
123 prs
= 1; /* process scoped */
124 r
= 1; /* radix format */
126 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
127 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
128 trace_tlbie(0, 0, rb
, rs
, ric
, prs
, r
);
131 static __always_inline
void __tlbie_lpid(unsigned long lpid
, unsigned long ric
)
133 unsigned long rb
,rs
,prs
,r
;
135 rb
= PPC_BIT(52); /* IS = 2 */
137 prs
= 0; /* partition scoped */
138 r
= 1; /* radix format */
140 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
141 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
142 trace_tlbie(lpid
, 0, rb
, rs
, ric
, prs
, r
);
145 static __always_inline
void __tlbie_lpid_guest(unsigned long lpid
, unsigned long ric
)
147 unsigned long rb
,rs
,prs
,r
;
149 rb
= PPC_BIT(52); /* IS = 2 */
151 prs
= 1; /* process scoped */
152 r
= 1; /* radix format */
154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
155 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
156 trace_tlbie(lpid
, 0, rb
, rs
, ric
, prs
, r
);
159 static __always_inline
void __tlbiel_va(unsigned long va
, unsigned long pid
,
160 unsigned long ap
, unsigned long ric
)
162 unsigned long rb
,rs
,prs
,r
;
164 rb
= va
& ~(PPC_BITMASK(52, 63));
165 rb
|= ap
<< PPC_BITLSHIFT(58);
166 rs
= pid
<< PPC_BITLSHIFT(31);
167 prs
= 1; /* process scoped */
168 r
= 1; /* radix format */
170 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
171 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
172 trace_tlbie(0, 1, rb
, rs
, ric
, prs
, r
);
175 static __always_inline
void __tlbie_va(unsigned long va
, unsigned long pid
,
176 unsigned long ap
, unsigned long ric
)
178 unsigned long rb
,rs
,prs
,r
;
180 rb
= va
& ~(PPC_BITMASK(52, 63));
181 rb
|= ap
<< PPC_BITLSHIFT(58);
182 rs
= pid
<< PPC_BITLSHIFT(31);
183 prs
= 1; /* process scoped */
184 r
= 1; /* radix format */
186 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
187 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
188 trace_tlbie(0, 0, rb
, rs
, ric
, prs
, r
);
191 static __always_inline
void __tlbie_lpid_va(unsigned long va
, unsigned long lpid
,
192 unsigned long ap
, unsigned long ric
)
194 unsigned long rb
,rs
,prs
,r
;
196 rb
= va
& ~(PPC_BITMASK(52, 63));
197 rb
|= ap
<< PPC_BITLSHIFT(58);
199 prs
= 0; /* partition scoped */
200 r
= 1; /* radix format */
202 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
203 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
204 trace_tlbie(lpid
, 0, rb
, rs
, ric
, prs
, r
);
208 static inline void fixup_tlbie_va(unsigned long va
, unsigned long pid
,
211 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG
)) {
212 asm volatile("ptesync": : :"memory");
213 __tlbie_va(va
, 0, ap
, RIC_FLUSH_TLB
);
216 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG
)) {
217 asm volatile("ptesync": : :"memory");
218 __tlbie_va(va
, pid
, ap
, RIC_FLUSH_TLB
);
222 static inline void fixup_tlbie_va_range(unsigned long va
, unsigned long pid
,
225 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG
)) {
226 asm volatile("ptesync": : :"memory");
227 __tlbie_pid(0, RIC_FLUSH_TLB
);
230 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG
)) {
231 asm volatile("ptesync": : :"memory");
232 __tlbie_va(va
, pid
, ap
, RIC_FLUSH_TLB
);
236 static inline void fixup_tlbie_pid(unsigned long pid
)
239 * We can use any address for the invalidation, pick one which is
240 * probably unused as an optimisation.
242 unsigned long va
= ((1UL << 52) - 1);
244 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG
)) {
245 asm volatile("ptesync": : :"memory");
246 __tlbie_pid(0, RIC_FLUSH_TLB
);
249 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG
)) {
250 asm volatile("ptesync": : :"memory");
251 __tlbie_va(va
, pid
, mmu_get_ap(MMU_PAGE_64K
), RIC_FLUSH_TLB
);
256 static inline void fixup_tlbie_lpid_va(unsigned long va
, unsigned long lpid
,
259 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG
)) {
260 asm volatile("ptesync": : :"memory");
261 __tlbie_lpid_va(va
, 0, ap
, RIC_FLUSH_TLB
);
264 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG
)) {
265 asm volatile("ptesync": : :"memory");
266 __tlbie_lpid_va(va
, lpid
, ap
, RIC_FLUSH_TLB
);
270 static inline void fixup_tlbie_lpid(unsigned long lpid
)
273 * We can use any address for the invalidation, pick one which is
274 * probably unused as an optimisation.
276 unsigned long va
= ((1UL << 52) - 1);
278 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG
)) {
279 asm volatile("ptesync": : :"memory");
280 __tlbie_lpid(0, RIC_FLUSH_TLB
);
283 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG
)) {
284 asm volatile("ptesync": : :"memory");
285 __tlbie_lpid_va(va
, lpid
, mmu_get_ap(MMU_PAGE_64K
), RIC_FLUSH_TLB
);
290 * We use 128 set in radix mode and 256 set in hpt mode.
292 static __always_inline
void _tlbiel_pid(unsigned long pid
, unsigned long ric
)
296 asm volatile("ptesync": : :"memory");
299 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
300 * also flush the entire Page Walk Cache.
302 __tlbiel_pid(pid
, 0, ric
);
304 /* For PWC, only one flush is needed */
305 if (ric
== RIC_FLUSH_PWC
) {
306 ppc_after_tlbiel_barrier();
310 if (!cpu_has_feature(CPU_FTR_ARCH_31
)) {
311 /* For the remaining sets, just flush the TLB */
312 for (set
= 1; set
< POWER9_TLB_SETS_RADIX
; set
++)
313 __tlbiel_pid(pid
, set
, RIC_FLUSH_TLB
);
316 ppc_after_tlbiel_barrier();
317 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER
"; isync" : : :"memory");
320 static inline void _tlbie_pid(unsigned long pid
, unsigned long ric
)
322 asm volatile("ptesync": : :"memory");
325 * Workaround the fact that the "ric" argument to __tlbie_pid
326 * must be a compile-time contraint to match the "i" constraint
327 * in the asm statement.
331 __tlbie_pid(pid
, RIC_FLUSH_TLB
);
332 fixup_tlbie_pid(pid
);
335 __tlbie_pid(pid
, RIC_FLUSH_PWC
);
339 __tlbie_pid(pid
, RIC_FLUSH_ALL
);
340 fixup_tlbie_pid(pid
);
342 asm volatile("eieio; tlbsync; ptesync": : :"memory");
350 static void do_tlbiel_pid(void *info
)
352 struct tlbiel_pid
*t
= info
;
354 if (t
->ric
== RIC_FLUSH_TLB
)
355 _tlbiel_pid(t
->pid
, RIC_FLUSH_TLB
);
356 else if (t
->ric
== RIC_FLUSH_PWC
)
357 _tlbiel_pid(t
->pid
, RIC_FLUSH_PWC
);
359 _tlbiel_pid(t
->pid
, RIC_FLUSH_ALL
);
362 static inline void _tlbiel_pid_multicast(struct mm_struct
*mm
,
363 unsigned long pid
, unsigned long ric
)
365 struct cpumask
*cpus
= mm_cpumask(mm
);
366 struct tlbiel_pid t
= { .pid
= pid
, .ric
= ric
};
368 on_each_cpu_mask(cpus
, do_tlbiel_pid
, &t
, 1);
370 * Always want the CPU translations to be invalidated with tlbiel in
371 * these paths, so while coprocessors must use tlbie, we can not
372 * optimise away the tlbiel component.
374 if (atomic_read(&mm
->context
.copros
) > 0)
375 _tlbie_pid(pid
, RIC_FLUSH_ALL
);
378 static inline void _tlbie_lpid(unsigned long lpid
, unsigned long ric
)
380 asm volatile("ptesync": : :"memory");
383 * Workaround the fact that the "ric" argument to __tlbie_pid
384 * must be a compile-time contraint to match the "i" constraint
385 * in the asm statement.
389 __tlbie_lpid(lpid
, RIC_FLUSH_TLB
);
390 fixup_tlbie_lpid(lpid
);
393 __tlbie_lpid(lpid
, RIC_FLUSH_PWC
);
397 __tlbie_lpid(lpid
, RIC_FLUSH_ALL
);
398 fixup_tlbie_lpid(lpid
);
400 asm volatile("eieio; tlbsync; ptesync": : :"memory");
403 static __always_inline
void _tlbie_lpid_guest(unsigned long lpid
, unsigned long ric
)
406 * Workaround the fact that the "ric" argument to __tlbie_pid
407 * must be a compile-time contraint to match the "i" constraint
408 * in the asm statement.
412 __tlbie_lpid_guest(lpid
, RIC_FLUSH_TLB
);
415 __tlbie_lpid_guest(lpid
, RIC_FLUSH_PWC
);
419 __tlbie_lpid_guest(lpid
, RIC_FLUSH_ALL
);
421 fixup_tlbie_lpid(lpid
);
422 asm volatile("eieio; tlbsync; ptesync": : :"memory");
425 static inline void __tlbiel_va_range(unsigned long start
, unsigned long end
,
426 unsigned long pid
, unsigned long page_size
,
430 unsigned long ap
= mmu_get_ap(psize
);
432 for (addr
= start
; addr
< end
; addr
+= page_size
)
433 __tlbiel_va(addr
, pid
, ap
, RIC_FLUSH_TLB
);
436 static __always_inline
void _tlbiel_va(unsigned long va
, unsigned long pid
,
437 unsigned long psize
, unsigned long ric
)
439 unsigned long ap
= mmu_get_ap(psize
);
441 asm volatile("ptesync": : :"memory");
442 __tlbiel_va(va
, pid
, ap
, ric
);
443 ppc_after_tlbiel_barrier();
446 static inline void _tlbiel_va_range(unsigned long start
, unsigned long end
,
447 unsigned long pid
, unsigned long page_size
,
448 unsigned long psize
, bool also_pwc
)
450 asm volatile("ptesync": : :"memory");
452 __tlbiel_pid(pid
, 0, RIC_FLUSH_PWC
);
453 __tlbiel_va_range(start
, end
, pid
, page_size
, psize
);
454 ppc_after_tlbiel_barrier();
457 static inline void __tlbie_va_range(unsigned long start
, unsigned long end
,
458 unsigned long pid
, unsigned long page_size
,
462 unsigned long ap
= mmu_get_ap(psize
);
464 for (addr
= start
; addr
< end
; addr
+= page_size
)
465 __tlbie_va(addr
, pid
, ap
, RIC_FLUSH_TLB
);
467 fixup_tlbie_va_range(addr
- page_size
, pid
, ap
);
470 static __always_inline
void _tlbie_va(unsigned long va
, unsigned long pid
,
471 unsigned long psize
, unsigned long ric
)
473 unsigned long ap
= mmu_get_ap(psize
);
475 asm volatile("ptesync": : :"memory");
476 __tlbie_va(va
, pid
, ap
, ric
);
477 fixup_tlbie_va(va
, pid
, ap
);
478 asm volatile("eieio; tlbsync; ptesync": : :"memory");
488 static void do_tlbiel_va(void *info
)
490 struct tlbiel_va
*t
= info
;
492 if (t
->ric
== RIC_FLUSH_TLB
)
493 _tlbiel_va(t
->va
, t
->pid
, t
->psize
, RIC_FLUSH_TLB
);
494 else if (t
->ric
== RIC_FLUSH_PWC
)
495 _tlbiel_va(t
->va
, t
->pid
, t
->psize
, RIC_FLUSH_PWC
);
497 _tlbiel_va(t
->va
, t
->pid
, t
->psize
, RIC_FLUSH_ALL
);
500 static inline void _tlbiel_va_multicast(struct mm_struct
*mm
,
501 unsigned long va
, unsigned long pid
,
502 unsigned long psize
, unsigned long ric
)
504 struct cpumask
*cpus
= mm_cpumask(mm
);
505 struct tlbiel_va t
= { .va
= va
, .pid
= pid
, .psize
= psize
, .ric
= ric
};
506 on_each_cpu_mask(cpus
, do_tlbiel_va
, &t
, 1);
507 if (atomic_read(&mm
->context
.copros
) > 0)
508 _tlbie_va(va
, pid
, psize
, RIC_FLUSH_TLB
);
511 struct tlbiel_va_range
{
515 unsigned long page_size
;
520 static void do_tlbiel_va_range(void *info
)
522 struct tlbiel_va_range
*t
= info
;
524 _tlbiel_va_range(t
->start
, t
->end
, t
->pid
, t
->page_size
,
525 t
->psize
, t
->also_pwc
);
528 static __always_inline
void _tlbie_lpid_va(unsigned long va
, unsigned long lpid
,
529 unsigned long psize
, unsigned long ric
)
531 unsigned long ap
= mmu_get_ap(psize
);
533 asm volatile("ptesync": : :"memory");
534 __tlbie_lpid_va(va
, lpid
, ap
, ric
);
535 fixup_tlbie_lpid_va(va
, lpid
, ap
);
536 asm volatile("eieio; tlbsync; ptesync": : :"memory");
539 static inline void _tlbie_va_range(unsigned long start
, unsigned long end
,
540 unsigned long pid
, unsigned long page_size
,
541 unsigned long psize
, bool also_pwc
)
543 asm volatile("ptesync": : :"memory");
545 __tlbie_pid(pid
, RIC_FLUSH_PWC
);
546 __tlbie_va_range(start
, end
, pid
, page_size
, psize
);
547 asm volatile("eieio; tlbsync; ptesync": : :"memory");
550 static inline void _tlbiel_va_range_multicast(struct mm_struct
*mm
,
551 unsigned long start
, unsigned long end
,
552 unsigned long pid
, unsigned long page_size
,
553 unsigned long psize
, bool also_pwc
)
555 struct cpumask
*cpus
= mm_cpumask(mm
);
556 struct tlbiel_va_range t
= { .start
= start
, .end
= end
,
557 .pid
= pid
, .page_size
= page_size
,
558 .psize
= psize
, .also_pwc
= also_pwc
};
560 on_each_cpu_mask(cpus
, do_tlbiel_va_range
, &t
, 1);
561 if (atomic_read(&mm
->context
.copros
) > 0)
562 _tlbie_va_range(start
, end
, pid
, page_size
, psize
, also_pwc
);
566 * Base TLB flushing operations:
568 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
569 * - flush_tlb_page(vma, vmaddr) flushes one page
570 * - flush_tlb_range(vma, start, end) flushes a range of pages
571 * - flush_tlb_kernel_range(start, end) flushes kernel pages
573 * - local_* variants of page and mm only apply to the current
576 void radix__local_flush_tlb_mm(struct mm_struct
*mm
)
581 pid
= mm
->context
.id
;
582 if (pid
!= MMU_NO_CONTEXT
)
583 _tlbiel_pid(pid
, RIC_FLUSH_TLB
);
586 EXPORT_SYMBOL(radix__local_flush_tlb_mm
);
589 void radix__local_flush_all_mm(struct mm_struct
*mm
)
594 pid
= mm
->context
.id
;
595 if (pid
!= MMU_NO_CONTEXT
)
596 _tlbiel_pid(pid
, RIC_FLUSH_ALL
);
599 EXPORT_SYMBOL(radix__local_flush_all_mm
);
601 static void __flush_all_mm(struct mm_struct
*mm
, bool fullmm
)
603 radix__local_flush_all_mm(mm
);
605 #endif /* CONFIG_SMP */
607 void radix__local_flush_tlb_page_psize(struct mm_struct
*mm
, unsigned long vmaddr
,
613 pid
= mm
->context
.id
;
614 if (pid
!= MMU_NO_CONTEXT
)
615 _tlbiel_va(vmaddr
, pid
, psize
, RIC_FLUSH_TLB
);
619 void radix__local_flush_tlb_page(struct vm_area_struct
*vma
, unsigned long vmaddr
)
621 #ifdef CONFIG_HUGETLB_PAGE
622 /* need the return fix for nohash.c */
623 if (is_vm_hugetlb_page(vma
))
624 return radix__local_flush_hugetlb_page(vma
, vmaddr
);
626 radix__local_flush_tlb_page_psize(vma
->vm_mm
, vmaddr
, mmu_virtual_psize
);
628 EXPORT_SYMBOL(radix__local_flush_tlb_page
);
630 static bool mm_is_singlethreaded(struct mm_struct
*mm
)
632 if (atomic_read(&mm
->context
.copros
) > 0)
634 if (atomic_read(&mm
->mm_users
) <= 1 && current
->mm
== mm
)
639 static bool mm_needs_flush_escalation(struct mm_struct
*mm
)
642 * P9 nest MMU has issues with the page walk cache
643 * caching PTEs and not flushing them properly when
644 * RIC = 0 for a PID/LPID invalidate
646 if (atomic_read(&mm
->context
.copros
) > 0)
652 static void do_exit_flush_lazy_tlb(void *arg
)
654 struct mm_struct
*mm
= arg
;
655 unsigned long pid
= mm
->context
.id
;
658 * A kthread could have done a mmget_not_zero() after the flushing CPU
659 * checked mm_is_singlethreaded, and be in the process of
660 * kthread_use_mm when interrupted here. In that case, current->mm will
661 * be set to mm, because kthread_use_mm() setting ->mm and switching to
662 * the mm is done with interrupts off.
664 if (current
->mm
== mm
)
667 if (current
->active_mm
== mm
) {
668 WARN_ON_ONCE(current
->mm
!= NULL
);
669 /* Is a kernel thread and is using mm as the lazy tlb */
671 current
->active_mm
= &init_mm
;
672 switch_mm_irqs_off(mm
, &init_mm
, current
);
676 atomic_dec(&mm
->context
.active_cpus
);
677 cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm
));
680 _tlbiel_pid(pid
, RIC_FLUSH_ALL
);
683 static void exit_flush_lazy_tlbs(struct mm_struct
*mm
)
686 * Would be nice if this was async so it could be run in
687 * parallel with our local flush, but generic code does not
688 * give a good API for it. Could extend the generic code or
689 * make a special powerpc IPI for flushing TLBs.
690 * For now it's not too performance critical.
692 smp_call_function_many(mm_cpumask(mm
), do_exit_flush_lazy_tlb
,
696 void radix__flush_tlb_mm(struct mm_struct
*mm
)
700 pid
= mm
->context
.id
;
701 if (unlikely(pid
== MMU_NO_CONTEXT
))
706 * Order loads of mm_cpumask vs previous stores to clear ptes before
707 * the invalidate. See barrier in switch_mm_irqs_off
710 if (!mm_is_thread_local(mm
)) {
711 if (unlikely(mm_is_singlethreaded(mm
))) {
712 exit_flush_lazy_tlbs(mm
);
716 if (!mmu_has_feature(MMU_FTR_GTSE
)) {
717 unsigned long tgt
= H_RPTI_TARGET_CMMU
;
719 if (atomic_read(&mm
->context
.copros
) > 0)
720 tgt
|= H_RPTI_TARGET_NMMU
;
721 pseries_rpt_invalidate(pid
, tgt
, H_RPTI_TYPE_TLB
,
722 H_RPTI_PAGE_ALL
, 0, -1UL);
723 } else if (cputlb_use_tlbie()) {
724 if (mm_needs_flush_escalation(mm
))
725 _tlbie_pid(pid
, RIC_FLUSH_ALL
);
727 _tlbie_pid(pid
, RIC_FLUSH_TLB
);
729 _tlbiel_pid_multicast(mm
, pid
, RIC_FLUSH_TLB
);
733 _tlbiel_pid(pid
, RIC_FLUSH_TLB
);
737 EXPORT_SYMBOL(radix__flush_tlb_mm
);
739 static void __flush_all_mm(struct mm_struct
*mm
, bool fullmm
)
743 pid
= mm
->context
.id
;
744 if (unlikely(pid
== MMU_NO_CONTEXT
))
748 smp_mb(); /* see radix__flush_tlb_mm */
749 if (!mm_is_thread_local(mm
)) {
750 if (unlikely(mm_is_singlethreaded(mm
))) {
752 exit_flush_lazy_tlbs(mm
);
756 if (!mmu_has_feature(MMU_FTR_GTSE
)) {
757 unsigned long tgt
= H_RPTI_TARGET_CMMU
;
758 unsigned long type
= H_RPTI_TYPE_TLB
| H_RPTI_TYPE_PWC
|
761 if (atomic_read(&mm
->context
.copros
) > 0)
762 tgt
|= H_RPTI_TARGET_NMMU
;
763 pseries_rpt_invalidate(pid
, tgt
, type
,
764 H_RPTI_PAGE_ALL
, 0, -1UL);
765 } else if (cputlb_use_tlbie())
766 _tlbie_pid(pid
, RIC_FLUSH_ALL
);
768 _tlbiel_pid_multicast(mm
, pid
, RIC_FLUSH_ALL
);
771 _tlbiel_pid(pid
, RIC_FLUSH_ALL
);
776 void radix__flush_all_mm(struct mm_struct
*mm
)
778 __flush_all_mm(mm
, false);
780 EXPORT_SYMBOL(radix__flush_all_mm
);
782 void radix__flush_tlb_page_psize(struct mm_struct
*mm
, unsigned long vmaddr
,
787 pid
= mm
->context
.id
;
788 if (unlikely(pid
== MMU_NO_CONTEXT
))
792 smp_mb(); /* see radix__flush_tlb_mm */
793 if (!mm_is_thread_local(mm
)) {
794 if (unlikely(mm_is_singlethreaded(mm
))) {
795 exit_flush_lazy_tlbs(mm
);
798 if (!mmu_has_feature(MMU_FTR_GTSE
)) {
799 unsigned long tgt
, pg_sizes
, size
;
801 tgt
= H_RPTI_TARGET_CMMU
;
802 pg_sizes
= psize_to_rpti_pgsize(psize
);
803 size
= 1UL << mmu_psize_to_shift(psize
);
805 if (atomic_read(&mm
->context
.copros
) > 0)
806 tgt
|= H_RPTI_TARGET_NMMU
;
807 pseries_rpt_invalidate(pid
, tgt
, H_RPTI_TYPE_TLB
,
810 } else if (cputlb_use_tlbie())
811 _tlbie_va(vmaddr
, pid
, psize
, RIC_FLUSH_TLB
);
813 _tlbiel_va_multicast(mm
, vmaddr
, pid
, psize
, RIC_FLUSH_TLB
);
816 _tlbiel_va(vmaddr
, pid
, psize
, RIC_FLUSH_TLB
);
821 void radix__flush_tlb_page(struct vm_area_struct
*vma
, unsigned long vmaddr
)
823 #ifdef CONFIG_HUGETLB_PAGE
824 if (is_vm_hugetlb_page(vma
))
825 return radix__flush_hugetlb_page(vma
, vmaddr
);
827 radix__flush_tlb_page_psize(vma
->vm_mm
, vmaddr
, mmu_virtual_psize
);
829 EXPORT_SYMBOL(radix__flush_tlb_page
);
831 #else /* CONFIG_SMP */
832 static inline void exit_flush_lazy_tlbs(struct mm_struct
*mm
) { }
833 #endif /* CONFIG_SMP */
835 static void do_tlbiel_kernel(void *info
)
837 _tlbiel_pid(0, RIC_FLUSH_ALL
);
840 static inline void _tlbiel_kernel_broadcast(void)
842 on_each_cpu(do_tlbiel_kernel
, NULL
, 1);
845 * Coherent accelerators don't refcount kernel memory mappings,
846 * so have to always issue a tlbie for them. This is quite a
849 _tlbie_pid(0, RIC_FLUSH_ALL
);
854 * If kernel TLBIs ever become local rather than global, then
855 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
856 * assumes kernel TLBIs are global.
858 void radix__flush_tlb_kernel_range(unsigned long start
, unsigned long end
)
860 if (!mmu_has_feature(MMU_FTR_GTSE
)) {
861 unsigned long tgt
= H_RPTI_TARGET_CMMU
| H_RPTI_TARGET_NMMU
;
862 unsigned long type
= H_RPTI_TYPE_TLB
| H_RPTI_TYPE_PWC
|
865 pseries_rpt_invalidate(0, tgt
, type
, H_RPTI_PAGE_ALL
,
867 } else if (cputlb_use_tlbie())
868 _tlbie_pid(0, RIC_FLUSH_ALL
);
870 _tlbiel_kernel_broadcast();
872 EXPORT_SYMBOL(radix__flush_tlb_kernel_range
);
874 #define TLB_FLUSH_ALL -1UL
877 * Number of pages above which we invalidate the entire PID rather than
878 * flush individual pages, for local and global flushes respectively.
880 * tlbie goes out to the interconnect and individual ops are more costly.
881 * It also does not iterate over sets like the local tlbiel variant when
882 * invalidating a full PID, so it has a far lower threshold to change from
883 * individual page flushes to full-pid flushes.
885 static unsigned long tlb_single_page_flush_ceiling __read_mostly
= 33;
886 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly
= POWER9_TLB_SETS_RADIX
* 2;
888 static inline void __radix__flush_tlb_range(struct mm_struct
*mm
,
889 unsigned long start
, unsigned long end
)
893 unsigned int page_shift
= mmu_psize_defs
[mmu_virtual_psize
].shift
;
894 unsigned long page_size
= 1UL << page_shift
;
895 unsigned long nr_pages
= (end
- start
) >> page_shift
;
898 pid
= mm
->context
.id
;
899 if (unlikely(pid
== MMU_NO_CONTEXT
))
903 smp_mb(); /* see radix__flush_tlb_mm */
904 if (!mm_is_thread_local(mm
)) {
905 if (unlikely(mm_is_singlethreaded(mm
))) {
906 if (end
!= TLB_FLUSH_ALL
) {
907 exit_flush_lazy_tlbs(mm
);
912 full
= (end
== TLB_FLUSH_ALL
||
913 nr_pages
> tlb_single_page_flush_ceiling
);
917 full
= (end
== TLB_FLUSH_ALL
||
918 nr_pages
> tlb_local_single_page_flush_ceiling
);
921 if (!mmu_has_feature(MMU_FTR_GTSE
) && !local
) {
922 unsigned long tgt
= H_RPTI_TARGET_CMMU
;
923 unsigned long pg_sizes
= psize_to_rpti_pgsize(mmu_virtual_psize
);
925 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE
))
926 pg_sizes
|= psize_to_rpti_pgsize(MMU_PAGE_2M
);
927 if (atomic_read(&mm
->context
.copros
) > 0)
928 tgt
|= H_RPTI_TARGET_NMMU
;
929 pseries_rpt_invalidate(pid
, tgt
, H_RPTI_TYPE_TLB
, pg_sizes
,
933 _tlbiel_pid(pid
, RIC_FLUSH_TLB
);
935 if (cputlb_use_tlbie()) {
936 if (mm_needs_flush_escalation(mm
))
937 _tlbie_pid(pid
, RIC_FLUSH_ALL
);
939 _tlbie_pid(pid
, RIC_FLUSH_TLB
);
941 _tlbiel_pid_multicast(mm
, pid
, RIC_FLUSH_TLB
);
946 unsigned long hstart
, hend
;
948 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE
)) {
949 hstart
= (start
+ PMD_SIZE
- 1) & PMD_MASK
;
950 hend
= end
& PMD_MASK
;
956 asm volatile("ptesync": : :"memory");
957 __tlbiel_va_range(start
, end
, pid
, page_size
, mmu_virtual_psize
);
959 __tlbiel_va_range(hstart
, hend
, pid
,
960 PMD_SIZE
, MMU_PAGE_2M
);
961 ppc_after_tlbiel_barrier();
962 } else if (cputlb_use_tlbie()) {
963 asm volatile("ptesync": : :"memory");
964 __tlbie_va_range(start
, end
, pid
, page_size
, mmu_virtual_psize
);
966 __tlbie_va_range(hstart
, hend
, pid
,
967 PMD_SIZE
, MMU_PAGE_2M
);
968 asm volatile("eieio; tlbsync; ptesync": : :"memory");
970 _tlbiel_va_range_multicast(mm
,
971 start
, end
, pid
, page_size
, mmu_virtual_psize
, false);
973 _tlbiel_va_range_multicast(mm
,
974 hstart
, hend
, pid
, PMD_SIZE
, MMU_PAGE_2M
, false);
980 void radix__flush_tlb_range(struct vm_area_struct
*vma
, unsigned long start
,
984 #ifdef CONFIG_HUGETLB_PAGE
985 if (is_vm_hugetlb_page(vma
))
986 return radix__flush_hugetlb_tlb_range(vma
, start
, end
);
989 __radix__flush_tlb_range(vma
->vm_mm
, start
, end
);
991 EXPORT_SYMBOL(radix__flush_tlb_range
);
993 static int radix_get_mmu_psize(int page_size
)
997 if (page_size
== (1UL << mmu_psize_defs
[mmu_virtual_psize
].shift
))
998 psize
= mmu_virtual_psize
;
999 else if (page_size
== (1UL << mmu_psize_defs
[MMU_PAGE_2M
].shift
))
1000 psize
= MMU_PAGE_2M
;
1001 else if (page_size
== (1UL << mmu_psize_defs
[MMU_PAGE_1G
].shift
))
1002 psize
= MMU_PAGE_1G
;
1009 * Flush partition scoped LPID address translation for all CPUs.
1011 void radix__flush_tlb_lpid_page(unsigned int lpid
,
1013 unsigned long page_size
)
1015 int psize
= radix_get_mmu_psize(page_size
);
1017 _tlbie_lpid_va(addr
, lpid
, psize
, RIC_FLUSH_TLB
);
1019 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page
);
1022 * Flush partition scoped PWC from LPID for all CPUs.
1024 void radix__flush_pwc_lpid(unsigned int lpid
)
1026 _tlbie_lpid(lpid
, RIC_FLUSH_PWC
);
1028 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid
);
1031 * Flush partition scoped translations from LPID (=LPIDR)
1033 void radix__flush_all_lpid(unsigned int lpid
)
1035 _tlbie_lpid(lpid
, RIC_FLUSH_ALL
);
1037 EXPORT_SYMBOL_GPL(radix__flush_all_lpid
);
1040 * Flush process scoped translations from LPID (=LPIDR)
1042 void radix__flush_all_lpid_guest(unsigned int lpid
)
1044 _tlbie_lpid_guest(lpid
, RIC_FLUSH_ALL
);
1047 static void radix__flush_tlb_pwc_range_psize(struct mm_struct
*mm
, unsigned long start
,
1048 unsigned long end
, int psize
);
1050 void radix__tlb_flush(struct mmu_gather
*tlb
)
1053 struct mm_struct
*mm
= tlb
->mm
;
1054 int page_size
= tlb
->page_size
;
1055 unsigned long start
= tlb
->start
;
1056 unsigned long end
= tlb
->end
;
1059 * if page size is not something we understand, do a full mm flush
1061 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1062 * that flushes the process table entry cache upon process teardown.
1063 * See the comment for radix in arch_exit_mmap().
1065 if (tlb
->fullmm
|| tlb
->need_flush_all
) {
1066 __flush_all_mm(mm
, true);
1067 } else if ( (psize
= radix_get_mmu_psize(page_size
)) == -1) {
1068 if (!tlb
->freed_tables
)
1069 radix__flush_tlb_mm(mm
);
1071 radix__flush_all_mm(mm
);
1073 if (!tlb
->freed_tables
)
1074 radix__flush_tlb_range_psize(mm
, start
, end
, psize
);
1076 radix__flush_tlb_pwc_range_psize(mm
, start
, end
, psize
);
1080 static __always_inline
void __radix__flush_tlb_range_psize(struct mm_struct
*mm
,
1081 unsigned long start
, unsigned long end
,
1082 int psize
, bool also_pwc
)
1085 unsigned int page_shift
= mmu_psize_defs
[psize
].shift
;
1086 unsigned long page_size
= 1UL << page_shift
;
1087 unsigned long nr_pages
= (end
- start
) >> page_shift
;
1090 pid
= mm
->context
.id
;
1091 if (unlikely(pid
== MMU_NO_CONTEXT
))
1095 smp_mb(); /* see radix__flush_tlb_mm */
1096 if (!mm_is_thread_local(mm
)) {
1097 if (unlikely(mm_is_singlethreaded(mm
))) {
1098 if (end
!= TLB_FLUSH_ALL
) {
1099 exit_flush_lazy_tlbs(mm
);
1104 full
= (end
== TLB_FLUSH_ALL
||
1105 nr_pages
> tlb_single_page_flush_ceiling
);
1109 full
= (end
== TLB_FLUSH_ALL
||
1110 nr_pages
> tlb_local_single_page_flush_ceiling
);
1113 if (!mmu_has_feature(MMU_FTR_GTSE
) && !local
) {
1114 unsigned long tgt
= H_RPTI_TARGET_CMMU
;
1115 unsigned long type
= H_RPTI_TYPE_TLB
;
1116 unsigned long pg_sizes
= psize_to_rpti_pgsize(psize
);
1119 type
|= H_RPTI_TYPE_PWC
;
1120 if (atomic_read(&mm
->context
.copros
) > 0)
1121 tgt
|= H_RPTI_TARGET_NMMU
;
1122 pseries_rpt_invalidate(pid
, tgt
, type
, pg_sizes
, start
, end
);
1125 _tlbiel_pid(pid
, also_pwc
? RIC_FLUSH_ALL
: RIC_FLUSH_TLB
);
1127 if (cputlb_use_tlbie()) {
1128 if (mm_needs_flush_escalation(mm
))
1132 also_pwc
? RIC_FLUSH_ALL
: RIC_FLUSH_TLB
);
1134 _tlbiel_pid_multicast(mm
, pid
,
1135 also_pwc
? RIC_FLUSH_ALL
: RIC_FLUSH_TLB
);
1141 _tlbiel_va_range(start
, end
, pid
, page_size
, psize
, also_pwc
);
1142 else if (cputlb_use_tlbie())
1143 _tlbie_va_range(start
, end
, pid
, page_size
, psize
, also_pwc
);
1145 _tlbiel_va_range_multicast(mm
,
1146 start
, end
, pid
, page_size
, psize
, also_pwc
);
1151 void radix__flush_tlb_range_psize(struct mm_struct
*mm
, unsigned long start
,
1152 unsigned long end
, int psize
)
1154 return __radix__flush_tlb_range_psize(mm
, start
, end
, psize
, false);
1157 static void radix__flush_tlb_pwc_range_psize(struct mm_struct
*mm
, unsigned long start
,
1158 unsigned long end
, int psize
)
1160 __radix__flush_tlb_range_psize(mm
, start
, end
, psize
, true);
1163 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1164 void radix__flush_tlb_collapsed_pmd(struct mm_struct
*mm
, unsigned long addr
)
1166 unsigned long pid
, end
;
1168 pid
= mm
->context
.id
;
1169 if (unlikely(pid
== MMU_NO_CONTEXT
))
1172 /* 4k page size, just blow the world */
1173 if (PAGE_SIZE
== 0x1000) {
1174 radix__flush_all_mm(mm
);
1178 end
= addr
+ HPAGE_PMD_SIZE
;
1180 /* Otherwise first do the PWC, then iterate the pages. */
1182 smp_mb(); /* see radix__flush_tlb_mm */
1183 if (!mm_is_thread_local(mm
)) {
1184 if (unlikely(mm_is_singlethreaded(mm
))) {
1185 exit_flush_lazy_tlbs(mm
);
1188 if (!mmu_has_feature(MMU_FTR_GTSE
)) {
1189 unsigned long tgt
, type
, pg_sizes
;
1191 tgt
= H_RPTI_TARGET_CMMU
;
1192 type
= H_RPTI_TYPE_TLB
| H_RPTI_TYPE_PWC
|
1194 pg_sizes
= psize_to_rpti_pgsize(mmu_virtual_psize
);
1196 if (atomic_read(&mm
->context
.copros
) > 0)
1197 tgt
|= H_RPTI_TARGET_NMMU
;
1198 pseries_rpt_invalidate(pid
, tgt
, type
, pg_sizes
,
1200 } else if (cputlb_use_tlbie())
1201 _tlbie_va_range(addr
, end
, pid
, PAGE_SIZE
, mmu_virtual_psize
, true);
1203 _tlbiel_va_range_multicast(mm
,
1204 addr
, end
, pid
, PAGE_SIZE
, mmu_virtual_psize
, true);
1207 _tlbiel_va_range(addr
, end
, pid
, PAGE_SIZE
, mmu_virtual_psize
, true);
1212 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1214 void radix__flush_pmd_tlb_range(struct vm_area_struct
*vma
,
1215 unsigned long start
, unsigned long end
)
1217 radix__flush_tlb_range_psize(vma
->vm_mm
, start
, end
, MMU_PAGE_2M
);
1219 EXPORT_SYMBOL(radix__flush_pmd_tlb_range
);
1221 void radix__flush_tlb_all(void)
1223 unsigned long rb
,prs
,r
,rs
;
1224 unsigned long ric
= RIC_FLUSH_ALL
;
1226 rb
= 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1227 prs
= 0; /* partition scoped */
1228 r
= 1; /* radix format */
1229 rs
= 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1231 asm volatile("ptesync": : :"memory");
1233 * now flush guest entries by passing PRS = 1 and LPID != 0
1235 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1236 : : "r"(rb
), "i"(r
), "i"(1), "i"(ric
), "r"(rs
) : "memory");
1238 * now flush host entires by passing PRS = 0 and LPID == 0
1240 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1241 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(0) : "memory");
1242 asm volatile("eieio; tlbsync; ptesync": : :"memory");
1245 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1246 extern void radix_kvm_prefetch_workaround(struct mm_struct
*mm
)
1248 unsigned long pid
= mm
->context
.id
;
1250 if (unlikely(pid
== MMU_NO_CONTEXT
))
1253 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG
))
1257 * If this context hasn't run on that CPU before and KVM is
1258 * around, there's a slim chance that the guest on another
1259 * CPU just brought in obsolete translation into the TLB of
1260 * this CPU due to a bad prefetch using the guest PID on
1261 * the way into the hypervisor.
1263 * We work around this here. If KVM is possible, we check if
1264 * any sibling thread is in KVM. If it is, the window may exist
1265 * and thus we flush that PID from the core.
1267 * A potential future improvement would be to mark which PIDs
1268 * have never been used on the system and avoid it if the PID
1269 * is new and the process has no other cpumask bit set.
1271 if (cpu_has_feature(CPU_FTR_HVMODE
) && radix_enabled()) {
1272 int cpu
= smp_processor_id();
1273 int sib
= cpu_first_thread_sibling(cpu
);
1276 for (; sib
<= cpu_last_thread_sibling(cpu
) && !flush
; sib
++) {
1279 if (!cpu_possible(sib
))
1281 if (paca_ptrs
[sib
]->kvm_hstate
.kvm_vcpu
)
1285 _tlbiel_pid(pid
, RIC_FLUSH_ALL
);
1288 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround
);
1289 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */