Merge tag 'linux-kselftest-kunit-fixes-5.11-rc3' of git://git.kernel.org/pub/scm...
[linux/fpc-iii.git] / arch / powerpc / mm / book3s64 / radix_tlb.c
blobfb66d154b26cf90f3a6d7093116021f1402f3387
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TLB flush routines for radix kernels.
5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6 */
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
14 #include <asm/ppc-opcode.h>
15 #include <asm/tlb.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 #include <asm/plpar_wrappers.h>
21 #define RIC_FLUSH_TLB 0
22 #define RIC_FLUSH_PWC 1
23 #define RIC_FLUSH_ALL 2
26 * tlbiel instruction for radix, set invalidation
27 * i.e., r=1 and is=01 or is=10 or is=11
29 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
30 unsigned int pid,
31 unsigned int ric, unsigned int prs)
33 unsigned long rb;
34 unsigned long rs;
36 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
37 rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
39 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
40 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
41 : "memory");
44 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
46 unsigned int set;
48 asm volatile("ptesync": : :"memory");
51 * Flush the first set of the TLB, and the entire Page Walk Cache
52 * and partition table entries. Then flush the remaining sets of the
53 * TLB.
56 if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
57 /* MSR[HV] should flush partition scope translations first. */
58 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
60 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
61 for (set = 1; set < num_sets; set++)
62 tlbiel_radix_set_isa300(set, is, 0,
63 RIC_FLUSH_TLB, 0);
67 /* Flush process scoped entries. */
68 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
70 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
71 for (set = 1; set < num_sets; set++)
72 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
75 ppc_after_tlbiel_barrier();
78 void radix__tlbiel_all(unsigned int action)
80 unsigned int is;
82 switch (action) {
83 case TLB_INVAL_SCOPE_GLOBAL:
84 is = 3;
85 break;
86 case TLB_INVAL_SCOPE_LPID:
87 is = 2;
88 break;
89 default:
90 BUG();
93 if (early_cpu_has_feature(CPU_FTR_ARCH_300))
94 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
95 else
96 WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
98 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
101 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
102 unsigned long ric)
104 unsigned long rb,rs,prs,r;
106 rb = PPC_BIT(53); /* IS = 1 */
107 rb |= set << PPC_BITLSHIFT(51);
108 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
109 prs = 1; /* process scoped */
110 r = 1; /* radix format */
112 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
113 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
114 trace_tlbie(0, 1, rb, rs, ric, prs, r);
117 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
119 unsigned long rb,rs,prs,r;
121 rb = PPC_BIT(53); /* IS = 1 */
122 rs = pid << PPC_BITLSHIFT(31);
123 prs = 1; /* process scoped */
124 r = 1; /* radix format */
126 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
127 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
128 trace_tlbie(0, 0, rb, rs, ric, prs, r);
131 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
133 unsigned long rb,rs,prs,r;
135 rb = PPC_BIT(52); /* IS = 2 */
136 rs = lpid;
137 prs = 0; /* partition scoped */
138 r = 1; /* radix format */
140 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
141 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
142 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
145 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
147 unsigned long rb,rs,prs,r;
149 rb = PPC_BIT(52); /* IS = 2 */
150 rs = lpid;
151 prs = 1; /* process scoped */
152 r = 1; /* radix format */
154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
155 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
156 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
159 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
160 unsigned long ap, unsigned long ric)
162 unsigned long rb,rs,prs,r;
164 rb = va & ~(PPC_BITMASK(52, 63));
165 rb |= ap << PPC_BITLSHIFT(58);
166 rs = pid << PPC_BITLSHIFT(31);
167 prs = 1; /* process scoped */
168 r = 1; /* radix format */
170 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
171 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
172 trace_tlbie(0, 1, rb, rs, ric, prs, r);
175 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
176 unsigned long ap, unsigned long ric)
178 unsigned long rb,rs,prs,r;
180 rb = va & ~(PPC_BITMASK(52, 63));
181 rb |= ap << PPC_BITLSHIFT(58);
182 rs = pid << PPC_BITLSHIFT(31);
183 prs = 1; /* process scoped */
184 r = 1; /* radix format */
186 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
187 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
188 trace_tlbie(0, 0, rb, rs, ric, prs, r);
191 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
192 unsigned long ap, unsigned long ric)
194 unsigned long rb,rs,prs,r;
196 rb = va & ~(PPC_BITMASK(52, 63));
197 rb |= ap << PPC_BITLSHIFT(58);
198 rs = lpid;
199 prs = 0; /* partition scoped */
200 r = 1; /* radix format */
202 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
203 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
204 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
208 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
209 unsigned long ap)
211 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
212 asm volatile("ptesync": : :"memory");
213 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
216 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
217 asm volatile("ptesync": : :"memory");
218 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
222 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
223 unsigned long ap)
225 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
226 asm volatile("ptesync": : :"memory");
227 __tlbie_pid(0, RIC_FLUSH_TLB);
230 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
231 asm volatile("ptesync": : :"memory");
232 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
236 static inline void fixup_tlbie_pid(unsigned long pid)
239 * We can use any address for the invalidation, pick one which is
240 * probably unused as an optimisation.
242 unsigned long va = ((1UL << 52) - 1);
244 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
245 asm volatile("ptesync": : :"memory");
246 __tlbie_pid(0, RIC_FLUSH_TLB);
249 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
250 asm volatile("ptesync": : :"memory");
251 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
256 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
257 unsigned long ap)
259 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
260 asm volatile("ptesync": : :"memory");
261 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
264 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
265 asm volatile("ptesync": : :"memory");
266 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
270 static inline void fixup_tlbie_lpid(unsigned long lpid)
273 * We can use any address for the invalidation, pick one which is
274 * probably unused as an optimisation.
276 unsigned long va = ((1UL << 52) - 1);
278 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
279 asm volatile("ptesync": : :"memory");
280 __tlbie_lpid(0, RIC_FLUSH_TLB);
283 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
284 asm volatile("ptesync": : :"memory");
285 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
290 * We use 128 set in radix mode and 256 set in hpt mode.
292 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
294 int set;
296 asm volatile("ptesync": : :"memory");
299 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
300 * also flush the entire Page Walk Cache.
302 __tlbiel_pid(pid, 0, ric);
304 /* For PWC, only one flush is needed */
305 if (ric == RIC_FLUSH_PWC) {
306 ppc_after_tlbiel_barrier();
307 return;
310 if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
311 /* For the remaining sets, just flush the TLB */
312 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
313 __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
316 ppc_after_tlbiel_barrier();
317 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
320 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
322 asm volatile("ptesync": : :"memory");
325 * Workaround the fact that the "ric" argument to __tlbie_pid
326 * must be a compile-time contraint to match the "i" constraint
327 * in the asm statement.
329 switch (ric) {
330 case RIC_FLUSH_TLB:
331 __tlbie_pid(pid, RIC_FLUSH_TLB);
332 fixup_tlbie_pid(pid);
333 break;
334 case RIC_FLUSH_PWC:
335 __tlbie_pid(pid, RIC_FLUSH_PWC);
336 break;
337 case RIC_FLUSH_ALL:
338 default:
339 __tlbie_pid(pid, RIC_FLUSH_ALL);
340 fixup_tlbie_pid(pid);
342 asm volatile("eieio; tlbsync; ptesync": : :"memory");
345 struct tlbiel_pid {
346 unsigned long pid;
347 unsigned long ric;
350 static void do_tlbiel_pid(void *info)
352 struct tlbiel_pid *t = info;
354 if (t->ric == RIC_FLUSH_TLB)
355 _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
356 else if (t->ric == RIC_FLUSH_PWC)
357 _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
358 else
359 _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
362 static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
363 unsigned long pid, unsigned long ric)
365 struct cpumask *cpus = mm_cpumask(mm);
366 struct tlbiel_pid t = { .pid = pid, .ric = ric };
368 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
370 * Always want the CPU translations to be invalidated with tlbiel in
371 * these paths, so while coprocessors must use tlbie, we can not
372 * optimise away the tlbiel component.
374 if (atomic_read(&mm->context.copros) > 0)
375 _tlbie_pid(pid, RIC_FLUSH_ALL);
378 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
380 asm volatile("ptesync": : :"memory");
383 * Workaround the fact that the "ric" argument to __tlbie_pid
384 * must be a compile-time contraint to match the "i" constraint
385 * in the asm statement.
387 switch (ric) {
388 case RIC_FLUSH_TLB:
389 __tlbie_lpid(lpid, RIC_FLUSH_TLB);
390 fixup_tlbie_lpid(lpid);
391 break;
392 case RIC_FLUSH_PWC:
393 __tlbie_lpid(lpid, RIC_FLUSH_PWC);
394 break;
395 case RIC_FLUSH_ALL:
396 default:
397 __tlbie_lpid(lpid, RIC_FLUSH_ALL);
398 fixup_tlbie_lpid(lpid);
400 asm volatile("eieio; tlbsync; ptesync": : :"memory");
403 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
406 * Workaround the fact that the "ric" argument to __tlbie_pid
407 * must be a compile-time contraint to match the "i" constraint
408 * in the asm statement.
410 switch (ric) {
411 case RIC_FLUSH_TLB:
412 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
413 break;
414 case RIC_FLUSH_PWC:
415 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
416 break;
417 case RIC_FLUSH_ALL:
418 default:
419 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
421 fixup_tlbie_lpid(lpid);
422 asm volatile("eieio; tlbsync; ptesync": : :"memory");
425 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
426 unsigned long pid, unsigned long page_size,
427 unsigned long psize)
429 unsigned long addr;
430 unsigned long ap = mmu_get_ap(psize);
432 for (addr = start; addr < end; addr += page_size)
433 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
436 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
437 unsigned long psize, unsigned long ric)
439 unsigned long ap = mmu_get_ap(psize);
441 asm volatile("ptesync": : :"memory");
442 __tlbiel_va(va, pid, ap, ric);
443 ppc_after_tlbiel_barrier();
446 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
447 unsigned long pid, unsigned long page_size,
448 unsigned long psize, bool also_pwc)
450 asm volatile("ptesync": : :"memory");
451 if (also_pwc)
452 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
453 __tlbiel_va_range(start, end, pid, page_size, psize);
454 ppc_after_tlbiel_barrier();
457 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
458 unsigned long pid, unsigned long page_size,
459 unsigned long psize)
461 unsigned long addr;
462 unsigned long ap = mmu_get_ap(psize);
464 for (addr = start; addr < end; addr += page_size)
465 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
467 fixup_tlbie_va_range(addr - page_size, pid, ap);
470 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
471 unsigned long psize, unsigned long ric)
473 unsigned long ap = mmu_get_ap(psize);
475 asm volatile("ptesync": : :"memory");
476 __tlbie_va(va, pid, ap, ric);
477 fixup_tlbie_va(va, pid, ap);
478 asm volatile("eieio; tlbsync; ptesync": : :"memory");
481 struct tlbiel_va {
482 unsigned long pid;
483 unsigned long va;
484 unsigned long psize;
485 unsigned long ric;
488 static void do_tlbiel_va(void *info)
490 struct tlbiel_va *t = info;
492 if (t->ric == RIC_FLUSH_TLB)
493 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
494 else if (t->ric == RIC_FLUSH_PWC)
495 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
496 else
497 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
500 static inline void _tlbiel_va_multicast(struct mm_struct *mm,
501 unsigned long va, unsigned long pid,
502 unsigned long psize, unsigned long ric)
504 struct cpumask *cpus = mm_cpumask(mm);
505 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
506 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
507 if (atomic_read(&mm->context.copros) > 0)
508 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
511 struct tlbiel_va_range {
512 unsigned long pid;
513 unsigned long start;
514 unsigned long end;
515 unsigned long page_size;
516 unsigned long psize;
517 bool also_pwc;
520 static void do_tlbiel_va_range(void *info)
522 struct tlbiel_va_range *t = info;
524 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
525 t->psize, t->also_pwc);
528 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
529 unsigned long psize, unsigned long ric)
531 unsigned long ap = mmu_get_ap(psize);
533 asm volatile("ptesync": : :"memory");
534 __tlbie_lpid_va(va, lpid, ap, ric);
535 fixup_tlbie_lpid_va(va, lpid, ap);
536 asm volatile("eieio; tlbsync; ptesync": : :"memory");
539 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
540 unsigned long pid, unsigned long page_size,
541 unsigned long psize, bool also_pwc)
543 asm volatile("ptesync": : :"memory");
544 if (also_pwc)
545 __tlbie_pid(pid, RIC_FLUSH_PWC);
546 __tlbie_va_range(start, end, pid, page_size, psize);
547 asm volatile("eieio; tlbsync; ptesync": : :"memory");
550 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
551 unsigned long start, unsigned long end,
552 unsigned long pid, unsigned long page_size,
553 unsigned long psize, bool also_pwc)
555 struct cpumask *cpus = mm_cpumask(mm);
556 struct tlbiel_va_range t = { .start = start, .end = end,
557 .pid = pid, .page_size = page_size,
558 .psize = psize, .also_pwc = also_pwc };
560 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
561 if (atomic_read(&mm->context.copros) > 0)
562 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
566 * Base TLB flushing operations:
568 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
569 * - flush_tlb_page(vma, vmaddr) flushes one page
570 * - flush_tlb_range(vma, start, end) flushes a range of pages
571 * - flush_tlb_kernel_range(start, end) flushes kernel pages
573 * - local_* variants of page and mm only apply to the current
574 * processor
576 void radix__local_flush_tlb_mm(struct mm_struct *mm)
578 unsigned long pid;
580 preempt_disable();
581 pid = mm->context.id;
582 if (pid != MMU_NO_CONTEXT)
583 _tlbiel_pid(pid, RIC_FLUSH_TLB);
584 preempt_enable();
586 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
588 #ifndef CONFIG_SMP
589 void radix__local_flush_all_mm(struct mm_struct *mm)
591 unsigned long pid;
593 preempt_disable();
594 pid = mm->context.id;
595 if (pid != MMU_NO_CONTEXT)
596 _tlbiel_pid(pid, RIC_FLUSH_ALL);
597 preempt_enable();
599 EXPORT_SYMBOL(radix__local_flush_all_mm);
601 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
603 radix__local_flush_all_mm(mm);
605 #endif /* CONFIG_SMP */
607 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
608 int psize)
610 unsigned long pid;
612 preempt_disable();
613 pid = mm->context.id;
614 if (pid != MMU_NO_CONTEXT)
615 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
616 preempt_enable();
619 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
621 #ifdef CONFIG_HUGETLB_PAGE
622 /* need the return fix for nohash.c */
623 if (is_vm_hugetlb_page(vma))
624 return radix__local_flush_hugetlb_page(vma, vmaddr);
625 #endif
626 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
628 EXPORT_SYMBOL(radix__local_flush_tlb_page);
630 static bool mm_is_singlethreaded(struct mm_struct *mm)
632 if (atomic_read(&mm->context.copros) > 0)
633 return false;
634 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
635 return true;
636 return false;
639 static bool mm_needs_flush_escalation(struct mm_struct *mm)
642 * P9 nest MMU has issues with the page walk cache
643 * caching PTEs and not flushing them properly when
644 * RIC = 0 for a PID/LPID invalidate
646 if (atomic_read(&mm->context.copros) > 0)
647 return true;
648 return false;
651 #ifdef CONFIG_SMP
652 static void do_exit_flush_lazy_tlb(void *arg)
654 struct mm_struct *mm = arg;
655 unsigned long pid = mm->context.id;
658 * A kthread could have done a mmget_not_zero() after the flushing CPU
659 * checked mm_is_singlethreaded, and be in the process of
660 * kthread_use_mm when interrupted here. In that case, current->mm will
661 * be set to mm, because kthread_use_mm() setting ->mm and switching to
662 * the mm is done with interrupts off.
664 if (current->mm == mm)
665 goto out_flush;
667 if (current->active_mm == mm) {
668 WARN_ON_ONCE(current->mm != NULL);
669 /* Is a kernel thread and is using mm as the lazy tlb */
670 mmgrab(&init_mm);
671 current->active_mm = &init_mm;
672 switch_mm_irqs_off(mm, &init_mm, current);
673 mmdrop(mm);
676 atomic_dec(&mm->context.active_cpus);
677 cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm));
679 out_flush:
680 _tlbiel_pid(pid, RIC_FLUSH_ALL);
683 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
686 * Would be nice if this was async so it could be run in
687 * parallel with our local flush, but generic code does not
688 * give a good API for it. Could extend the generic code or
689 * make a special powerpc IPI for flushing TLBs.
690 * For now it's not too performance critical.
692 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
693 (void *)mm, 1);
696 void radix__flush_tlb_mm(struct mm_struct *mm)
698 unsigned long pid;
700 pid = mm->context.id;
701 if (unlikely(pid == MMU_NO_CONTEXT))
702 return;
704 preempt_disable();
706 * Order loads of mm_cpumask vs previous stores to clear ptes before
707 * the invalidate. See barrier in switch_mm_irqs_off
709 smp_mb();
710 if (!mm_is_thread_local(mm)) {
711 if (unlikely(mm_is_singlethreaded(mm))) {
712 exit_flush_lazy_tlbs(mm);
713 goto local;
716 if (!mmu_has_feature(MMU_FTR_GTSE)) {
717 unsigned long tgt = H_RPTI_TARGET_CMMU;
719 if (atomic_read(&mm->context.copros) > 0)
720 tgt |= H_RPTI_TARGET_NMMU;
721 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
722 H_RPTI_PAGE_ALL, 0, -1UL);
723 } else if (cputlb_use_tlbie()) {
724 if (mm_needs_flush_escalation(mm))
725 _tlbie_pid(pid, RIC_FLUSH_ALL);
726 else
727 _tlbie_pid(pid, RIC_FLUSH_TLB);
728 } else {
729 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
731 } else {
732 local:
733 _tlbiel_pid(pid, RIC_FLUSH_TLB);
735 preempt_enable();
737 EXPORT_SYMBOL(radix__flush_tlb_mm);
739 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
741 unsigned long pid;
743 pid = mm->context.id;
744 if (unlikely(pid == MMU_NO_CONTEXT))
745 return;
747 preempt_disable();
748 smp_mb(); /* see radix__flush_tlb_mm */
749 if (!mm_is_thread_local(mm)) {
750 if (unlikely(mm_is_singlethreaded(mm))) {
751 if (!fullmm) {
752 exit_flush_lazy_tlbs(mm);
753 goto local;
756 if (!mmu_has_feature(MMU_FTR_GTSE)) {
757 unsigned long tgt = H_RPTI_TARGET_CMMU;
758 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
759 H_RPTI_TYPE_PRT;
761 if (atomic_read(&mm->context.copros) > 0)
762 tgt |= H_RPTI_TARGET_NMMU;
763 pseries_rpt_invalidate(pid, tgt, type,
764 H_RPTI_PAGE_ALL, 0, -1UL);
765 } else if (cputlb_use_tlbie())
766 _tlbie_pid(pid, RIC_FLUSH_ALL);
767 else
768 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
769 } else {
770 local:
771 _tlbiel_pid(pid, RIC_FLUSH_ALL);
773 preempt_enable();
776 void radix__flush_all_mm(struct mm_struct *mm)
778 __flush_all_mm(mm, false);
780 EXPORT_SYMBOL(radix__flush_all_mm);
782 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
783 int psize)
785 unsigned long pid;
787 pid = mm->context.id;
788 if (unlikely(pid == MMU_NO_CONTEXT))
789 return;
791 preempt_disable();
792 smp_mb(); /* see radix__flush_tlb_mm */
793 if (!mm_is_thread_local(mm)) {
794 if (unlikely(mm_is_singlethreaded(mm))) {
795 exit_flush_lazy_tlbs(mm);
796 goto local;
798 if (!mmu_has_feature(MMU_FTR_GTSE)) {
799 unsigned long tgt, pg_sizes, size;
801 tgt = H_RPTI_TARGET_CMMU;
802 pg_sizes = psize_to_rpti_pgsize(psize);
803 size = 1UL << mmu_psize_to_shift(psize);
805 if (atomic_read(&mm->context.copros) > 0)
806 tgt |= H_RPTI_TARGET_NMMU;
807 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
808 pg_sizes, vmaddr,
809 vmaddr + size);
810 } else if (cputlb_use_tlbie())
811 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
812 else
813 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
814 } else {
815 local:
816 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
818 preempt_enable();
821 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
823 #ifdef CONFIG_HUGETLB_PAGE
824 if (is_vm_hugetlb_page(vma))
825 return radix__flush_hugetlb_page(vma, vmaddr);
826 #endif
827 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
829 EXPORT_SYMBOL(radix__flush_tlb_page);
831 #else /* CONFIG_SMP */
832 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
833 #endif /* CONFIG_SMP */
835 static void do_tlbiel_kernel(void *info)
837 _tlbiel_pid(0, RIC_FLUSH_ALL);
840 static inline void _tlbiel_kernel_broadcast(void)
842 on_each_cpu(do_tlbiel_kernel, NULL, 1);
843 if (tlbie_capable) {
845 * Coherent accelerators don't refcount kernel memory mappings,
846 * so have to always issue a tlbie for them. This is quite a
847 * slow path anyway.
849 _tlbie_pid(0, RIC_FLUSH_ALL);
854 * If kernel TLBIs ever become local rather than global, then
855 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
856 * assumes kernel TLBIs are global.
858 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
860 if (!mmu_has_feature(MMU_FTR_GTSE)) {
861 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
862 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
863 H_RPTI_TYPE_PRT;
865 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
866 start, end);
867 } else if (cputlb_use_tlbie())
868 _tlbie_pid(0, RIC_FLUSH_ALL);
869 else
870 _tlbiel_kernel_broadcast();
872 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
874 #define TLB_FLUSH_ALL -1UL
877 * Number of pages above which we invalidate the entire PID rather than
878 * flush individual pages, for local and global flushes respectively.
880 * tlbie goes out to the interconnect and individual ops are more costly.
881 * It also does not iterate over sets like the local tlbiel variant when
882 * invalidating a full PID, so it has a far lower threshold to change from
883 * individual page flushes to full-pid flushes.
885 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
886 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
888 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
889 unsigned long start, unsigned long end)
892 unsigned long pid;
893 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
894 unsigned long page_size = 1UL << page_shift;
895 unsigned long nr_pages = (end - start) >> page_shift;
896 bool local, full;
898 pid = mm->context.id;
899 if (unlikely(pid == MMU_NO_CONTEXT))
900 return;
902 preempt_disable();
903 smp_mb(); /* see radix__flush_tlb_mm */
904 if (!mm_is_thread_local(mm)) {
905 if (unlikely(mm_is_singlethreaded(mm))) {
906 if (end != TLB_FLUSH_ALL) {
907 exit_flush_lazy_tlbs(mm);
908 goto is_local;
911 local = false;
912 full = (end == TLB_FLUSH_ALL ||
913 nr_pages > tlb_single_page_flush_ceiling);
914 } else {
915 is_local:
916 local = true;
917 full = (end == TLB_FLUSH_ALL ||
918 nr_pages > tlb_local_single_page_flush_ceiling);
921 if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
922 unsigned long tgt = H_RPTI_TARGET_CMMU;
923 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
925 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
926 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
927 if (atomic_read(&mm->context.copros) > 0)
928 tgt |= H_RPTI_TARGET_NMMU;
929 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
930 start, end);
931 } else if (full) {
932 if (local) {
933 _tlbiel_pid(pid, RIC_FLUSH_TLB);
934 } else {
935 if (cputlb_use_tlbie()) {
936 if (mm_needs_flush_escalation(mm))
937 _tlbie_pid(pid, RIC_FLUSH_ALL);
938 else
939 _tlbie_pid(pid, RIC_FLUSH_TLB);
940 } else {
941 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
944 } else {
945 bool hflush = false;
946 unsigned long hstart, hend;
948 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
949 hstart = (start + PMD_SIZE - 1) & PMD_MASK;
950 hend = end & PMD_MASK;
951 if (hstart < hend)
952 hflush = true;
955 if (local) {
956 asm volatile("ptesync": : :"memory");
957 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
958 if (hflush)
959 __tlbiel_va_range(hstart, hend, pid,
960 PMD_SIZE, MMU_PAGE_2M);
961 ppc_after_tlbiel_barrier();
962 } else if (cputlb_use_tlbie()) {
963 asm volatile("ptesync": : :"memory");
964 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
965 if (hflush)
966 __tlbie_va_range(hstart, hend, pid,
967 PMD_SIZE, MMU_PAGE_2M);
968 asm volatile("eieio; tlbsync; ptesync": : :"memory");
969 } else {
970 _tlbiel_va_range_multicast(mm,
971 start, end, pid, page_size, mmu_virtual_psize, false);
972 if (hflush)
973 _tlbiel_va_range_multicast(mm,
974 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
977 preempt_enable();
980 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
981 unsigned long end)
984 #ifdef CONFIG_HUGETLB_PAGE
985 if (is_vm_hugetlb_page(vma))
986 return radix__flush_hugetlb_tlb_range(vma, start, end);
987 #endif
989 __radix__flush_tlb_range(vma->vm_mm, start, end);
991 EXPORT_SYMBOL(radix__flush_tlb_range);
993 static int radix_get_mmu_psize(int page_size)
995 int psize;
997 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
998 psize = mmu_virtual_psize;
999 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1000 psize = MMU_PAGE_2M;
1001 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1002 psize = MMU_PAGE_1G;
1003 else
1004 return -1;
1005 return psize;
1009 * Flush partition scoped LPID address translation for all CPUs.
1011 void radix__flush_tlb_lpid_page(unsigned int lpid,
1012 unsigned long addr,
1013 unsigned long page_size)
1015 int psize = radix_get_mmu_psize(page_size);
1017 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1019 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1022 * Flush partition scoped PWC from LPID for all CPUs.
1024 void radix__flush_pwc_lpid(unsigned int lpid)
1026 _tlbie_lpid(lpid, RIC_FLUSH_PWC);
1028 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1031 * Flush partition scoped translations from LPID (=LPIDR)
1033 void radix__flush_all_lpid(unsigned int lpid)
1035 _tlbie_lpid(lpid, RIC_FLUSH_ALL);
1037 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1040 * Flush process scoped translations from LPID (=LPIDR)
1042 void radix__flush_all_lpid_guest(unsigned int lpid)
1044 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1047 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1048 unsigned long end, int psize);
1050 void radix__tlb_flush(struct mmu_gather *tlb)
1052 int psize = 0;
1053 struct mm_struct *mm = tlb->mm;
1054 int page_size = tlb->page_size;
1055 unsigned long start = tlb->start;
1056 unsigned long end = tlb->end;
1059 * if page size is not something we understand, do a full mm flush
1061 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1062 * that flushes the process table entry cache upon process teardown.
1063 * See the comment for radix in arch_exit_mmap().
1065 if (tlb->fullmm || tlb->need_flush_all) {
1066 __flush_all_mm(mm, true);
1067 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1068 if (!tlb->freed_tables)
1069 radix__flush_tlb_mm(mm);
1070 else
1071 radix__flush_all_mm(mm);
1072 } else {
1073 if (!tlb->freed_tables)
1074 radix__flush_tlb_range_psize(mm, start, end, psize);
1075 else
1076 radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1080 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1081 unsigned long start, unsigned long end,
1082 int psize, bool also_pwc)
1084 unsigned long pid;
1085 unsigned int page_shift = mmu_psize_defs[psize].shift;
1086 unsigned long page_size = 1UL << page_shift;
1087 unsigned long nr_pages = (end - start) >> page_shift;
1088 bool local, full;
1090 pid = mm->context.id;
1091 if (unlikely(pid == MMU_NO_CONTEXT))
1092 return;
1094 preempt_disable();
1095 smp_mb(); /* see radix__flush_tlb_mm */
1096 if (!mm_is_thread_local(mm)) {
1097 if (unlikely(mm_is_singlethreaded(mm))) {
1098 if (end != TLB_FLUSH_ALL) {
1099 exit_flush_lazy_tlbs(mm);
1100 goto is_local;
1103 local = false;
1104 full = (end == TLB_FLUSH_ALL ||
1105 nr_pages > tlb_single_page_flush_ceiling);
1106 } else {
1107 is_local:
1108 local = true;
1109 full = (end == TLB_FLUSH_ALL ||
1110 nr_pages > tlb_local_single_page_flush_ceiling);
1113 if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
1114 unsigned long tgt = H_RPTI_TARGET_CMMU;
1115 unsigned long type = H_RPTI_TYPE_TLB;
1116 unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1118 if (also_pwc)
1119 type |= H_RPTI_TYPE_PWC;
1120 if (atomic_read(&mm->context.copros) > 0)
1121 tgt |= H_RPTI_TARGET_NMMU;
1122 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1123 } else if (full) {
1124 if (local) {
1125 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1126 } else {
1127 if (cputlb_use_tlbie()) {
1128 if (mm_needs_flush_escalation(mm))
1129 also_pwc = true;
1131 _tlbie_pid(pid,
1132 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1133 } else {
1134 _tlbiel_pid_multicast(mm, pid,
1135 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1139 } else {
1140 if (local)
1141 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1142 else if (cputlb_use_tlbie())
1143 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1144 else
1145 _tlbiel_va_range_multicast(mm,
1146 start, end, pid, page_size, psize, also_pwc);
1148 preempt_enable();
1151 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1152 unsigned long end, int psize)
1154 return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1157 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1158 unsigned long end, int psize)
1160 __radix__flush_tlb_range_psize(mm, start, end, psize, true);
1163 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1164 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1166 unsigned long pid, end;
1168 pid = mm->context.id;
1169 if (unlikely(pid == MMU_NO_CONTEXT))
1170 return;
1172 /* 4k page size, just blow the world */
1173 if (PAGE_SIZE == 0x1000) {
1174 radix__flush_all_mm(mm);
1175 return;
1178 end = addr + HPAGE_PMD_SIZE;
1180 /* Otherwise first do the PWC, then iterate the pages. */
1181 preempt_disable();
1182 smp_mb(); /* see radix__flush_tlb_mm */
1183 if (!mm_is_thread_local(mm)) {
1184 if (unlikely(mm_is_singlethreaded(mm))) {
1185 exit_flush_lazy_tlbs(mm);
1186 goto local;
1188 if (!mmu_has_feature(MMU_FTR_GTSE)) {
1189 unsigned long tgt, type, pg_sizes;
1191 tgt = H_RPTI_TARGET_CMMU;
1192 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1193 H_RPTI_TYPE_PRT;
1194 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1196 if (atomic_read(&mm->context.copros) > 0)
1197 tgt |= H_RPTI_TARGET_NMMU;
1198 pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1199 addr, end);
1200 } else if (cputlb_use_tlbie())
1201 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1202 else
1203 _tlbiel_va_range_multicast(mm,
1204 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1205 } else {
1206 local:
1207 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1210 preempt_enable();
1212 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1214 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1215 unsigned long start, unsigned long end)
1217 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1219 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1221 void radix__flush_tlb_all(void)
1223 unsigned long rb,prs,r,rs;
1224 unsigned long ric = RIC_FLUSH_ALL;
1226 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1227 prs = 0; /* partition scoped */
1228 r = 1; /* radix format */
1229 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1231 asm volatile("ptesync": : :"memory");
1233 * now flush guest entries by passing PRS = 1 and LPID != 0
1235 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1236 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1238 * now flush host entires by passing PRS = 0 and LPID == 0
1240 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1241 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1242 asm volatile("eieio; tlbsync; ptesync": : :"memory");
1245 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1246 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
1248 unsigned long pid = mm->context.id;
1250 if (unlikely(pid == MMU_NO_CONTEXT))
1251 return;
1253 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
1254 return;
1257 * If this context hasn't run on that CPU before and KVM is
1258 * around, there's a slim chance that the guest on another
1259 * CPU just brought in obsolete translation into the TLB of
1260 * this CPU due to a bad prefetch using the guest PID on
1261 * the way into the hypervisor.
1263 * We work around this here. If KVM is possible, we check if
1264 * any sibling thread is in KVM. If it is, the window may exist
1265 * and thus we flush that PID from the core.
1267 * A potential future improvement would be to mark which PIDs
1268 * have never been used on the system and avoid it if the PID
1269 * is new and the process has no other cpumask bit set.
1271 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
1272 int cpu = smp_processor_id();
1273 int sib = cpu_first_thread_sibling(cpu);
1274 bool flush = false;
1276 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
1277 if (sib == cpu)
1278 continue;
1279 if (!cpu_possible(sib))
1280 continue;
1281 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
1282 flush = true;
1284 if (flush)
1285 _tlbiel_pid(pid, RIC_FLUSH_ALL);
1288 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
1289 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */