2 * TLB flush routines for radix kernels.
4 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/hugetlb.h>
14 #include <linux/memblock.h>
16 #include <asm/ppc-opcode.h>
18 #include <asm/tlbflush.h>
19 #include <asm/trace.h>
20 #include <asm/cputhreads.h>
22 #define RIC_FLUSH_TLB 0
23 #define RIC_FLUSH_PWC 1
24 #define RIC_FLUSH_ALL 2
26 static inline void __tlbiel_pid(unsigned long pid
, int set
,
29 unsigned long rb
,rs
,prs
,r
;
31 rb
= PPC_BIT(53); /* IS = 1 */
32 rb
|= set
<< PPC_BITLSHIFT(51);
33 rs
= ((unsigned long)pid
) << PPC_BITLSHIFT(31);
34 prs
= 1; /* process scoped */
35 r
= 1; /* raidx format */
37 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
38 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
39 trace_tlbie(0, 1, rb
, rs
, ric
, prs
, r
);
43 * We use 128 set in radix mode and 256 set in hpt mode.
45 static inline void _tlbiel_pid(unsigned long pid
, unsigned long ric
)
49 asm volatile("ptesync": : :"memory");
52 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
53 * also flush the entire Page Walk Cache.
55 __tlbiel_pid(pid
, 0, ric
);
57 /* For PWC, only one flush is needed */
58 if (ric
== RIC_FLUSH_PWC
) {
59 asm volatile("ptesync": : :"memory");
63 /* For the remaining sets, just flush the TLB */
64 for (set
= 1; set
< POWER9_TLB_SETS_RADIX
; set
++)
65 __tlbiel_pid(pid
, set
, RIC_FLUSH_TLB
);
67 asm volatile("ptesync": : :"memory");
68 asm volatile(PPC_INVALIDATE_ERAT
"; isync" : : :"memory");
71 static inline void _tlbie_pid(unsigned long pid
, unsigned long ric
)
73 unsigned long rb
,rs
,prs
,r
;
75 rb
= PPC_BIT(53); /* IS = 1 */
76 rs
= pid
<< PPC_BITLSHIFT(31);
77 prs
= 1; /* process scoped */
78 r
= 1; /* raidx format */
80 asm volatile("ptesync": : :"memory");
81 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
82 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
83 asm volatile("eieio; tlbsync; ptesync": : :"memory");
84 trace_tlbie(0, 0, rb
, rs
, ric
, prs
, r
);
87 static inline void _tlbiel_va(unsigned long va
, unsigned long pid
,
88 unsigned long ap
, unsigned long ric
)
90 unsigned long rb
,rs
,prs
,r
;
92 rb
= va
& ~(PPC_BITMASK(52, 63));
93 rb
|= ap
<< PPC_BITLSHIFT(58);
94 rs
= pid
<< PPC_BITLSHIFT(31);
95 prs
= 1; /* process scoped */
96 r
= 1; /* raidx format */
98 asm volatile("ptesync": : :"memory");
99 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
100 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
101 asm volatile("ptesync": : :"memory");
102 trace_tlbie(0, 1, rb
, rs
, ric
, prs
, r
);
105 static inline void _tlbie_va(unsigned long va
, unsigned long pid
,
106 unsigned long ap
, unsigned long ric
)
108 unsigned long rb
,rs
,prs
,r
;
110 rb
= va
& ~(PPC_BITMASK(52, 63));
111 rb
|= ap
<< PPC_BITLSHIFT(58);
112 rs
= pid
<< PPC_BITLSHIFT(31);
113 prs
= 1; /* process scoped */
114 r
= 1; /* raidx format */
116 asm volatile("ptesync": : :"memory");
117 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
118 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
119 asm volatile("eieio; tlbsync; ptesync": : :"memory");
120 trace_tlbie(0, 0, rb
, rs
, ric
, prs
, r
);
124 * Base TLB flushing operations:
126 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
127 * - flush_tlb_page(vma, vmaddr) flushes one page
128 * - flush_tlb_range(vma, start, end) flushes a range of pages
129 * - flush_tlb_kernel_range(start, end) flushes kernel pages
131 * - local_* variants of page and mm only apply to the current
134 void radix__local_flush_tlb_mm(struct mm_struct
*mm
)
139 pid
= mm
->context
.id
;
140 if (pid
!= MMU_NO_CONTEXT
)
141 _tlbiel_pid(pid
, RIC_FLUSH_TLB
);
144 EXPORT_SYMBOL(radix__local_flush_tlb_mm
);
147 static void radix__local_flush_all_mm(struct mm_struct
*mm
)
152 pid
= mm
->context
.id
;
153 if (pid
!= MMU_NO_CONTEXT
)
154 _tlbiel_pid(pid
, RIC_FLUSH_ALL
);
157 #endif /* CONFIG_SMP */
159 void radix__local_flush_tlb_page_psize(struct mm_struct
*mm
, unsigned long vmaddr
,
163 unsigned long ap
= mmu_get_ap(psize
);
166 pid
= mm
? mm
->context
.id
: 0;
167 if (pid
!= MMU_NO_CONTEXT
)
168 _tlbiel_va(vmaddr
, pid
, ap
, RIC_FLUSH_TLB
);
172 void radix__local_flush_tlb_page(struct vm_area_struct
*vma
, unsigned long vmaddr
)
174 #ifdef CONFIG_HUGETLB_PAGE
175 /* need the return fix for nohash.c */
176 if (vma
&& is_vm_hugetlb_page(vma
))
177 return __local_flush_hugetlb_page(vma
, vmaddr
);
179 radix__local_flush_tlb_page_psize(vma
? vma
->vm_mm
: NULL
, vmaddr
,
182 EXPORT_SYMBOL(radix__local_flush_tlb_page
);
185 void radix__flush_tlb_mm(struct mm_struct
*mm
)
190 pid
= mm
->context
.id
;
191 if (unlikely(pid
== MMU_NO_CONTEXT
))
194 if (!mm_is_thread_local(mm
))
195 _tlbie_pid(pid
, RIC_FLUSH_TLB
);
197 _tlbiel_pid(pid
, RIC_FLUSH_TLB
);
201 EXPORT_SYMBOL(radix__flush_tlb_mm
);
203 static void radix__flush_all_mm(struct mm_struct
*mm
)
208 pid
= mm
->context
.id
;
209 if (unlikely(pid
== MMU_NO_CONTEXT
))
212 if (!mm_is_thread_local(mm
))
213 _tlbie_pid(pid
, RIC_FLUSH_ALL
);
215 _tlbiel_pid(pid
, RIC_FLUSH_ALL
);
220 void radix__flush_tlb_pwc(struct mmu_gather
*tlb
, unsigned long addr
)
222 tlb
->need_flush_all
= 1;
224 EXPORT_SYMBOL(radix__flush_tlb_pwc
);
226 void radix__flush_tlb_page_psize(struct mm_struct
*mm
, unsigned long vmaddr
,
230 unsigned long ap
= mmu_get_ap(psize
);
233 pid
= mm
? mm
->context
.id
: 0;
234 if (unlikely(pid
== MMU_NO_CONTEXT
))
236 if (!mm_is_thread_local(mm
))
237 _tlbie_va(vmaddr
, pid
, ap
, RIC_FLUSH_TLB
);
239 _tlbiel_va(vmaddr
, pid
, ap
, RIC_FLUSH_TLB
);
244 void radix__flush_tlb_page(struct vm_area_struct
*vma
, unsigned long vmaddr
)
246 #ifdef CONFIG_HUGETLB_PAGE
247 if (vma
&& is_vm_hugetlb_page(vma
))
248 return flush_hugetlb_page(vma
, vmaddr
);
250 radix__flush_tlb_page_psize(vma
? vma
->vm_mm
: NULL
, vmaddr
,
253 EXPORT_SYMBOL(radix__flush_tlb_page
);
255 #else /* CONFIG_SMP */
256 #define radix__flush_all_mm radix__local_flush_all_mm
257 #endif /* CONFIG_SMP */
259 void radix__flush_tlb_kernel_range(unsigned long start
, unsigned long end
)
261 _tlbie_pid(0, RIC_FLUSH_ALL
);
263 EXPORT_SYMBOL(radix__flush_tlb_kernel_range
);
266 * Currently, for range flushing, we just do a full mm flush. Because
267 * we use this in code path where we don' track the page size.
269 void radix__flush_tlb_range(struct vm_area_struct
*vma
, unsigned long start
,
273 struct mm_struct
*mm
= vma
->vm_mm
;
275 radix__flush_tlb_mm(mm
);
277 EXPORT_SYMBOL(radix__flush_tlb_range
);
279 static int radix_get_mmu_psize(int page_size
)
283 if (page_size
== (1UL << mmu_psize_defs
[mmu_virtual_psize
].shift
))
284 psize
= mmu_virtual_psize
;
285 else if (page_size
== (1UL << mmu_psize_defs
[MMU_PAGE_2M
].shift
))
287 else if (page_size
== (1UL << mmu_psize_defs
[MMU_PAGE_1G
].shift
))
294 void radix__tlb_flush(struct mmu_gather
*tlb
)
297 struct mm_struct
*mm
= tlb
->mm
;
298 int page_size
= tlb
->page_size
;
300 psize
= radix_get_mmu_psize(page_size
);
302 * if page size is not something we understand, do a full mm flush
304 if (psize
!= -1 && !tlb
->fullmm
&& !tlb
->need_flush_all
)
305 radix__flush_tlb_range_psize(mm
, tlb
->start
, tlb
->end
, psize
);
306 else if (tlb
->need_flush_all
) {
307 tlb
->need_flush_all
= 0;
308 radix__flush_all_mm(mm
);
310 radix__flush_tlb_mm(mm
);
313 #define TLB_FLUSH_ALL -1UL
315 * Number of pages above which we will do a bcast tlbie. Just a
316 * number at this point copied from x86
318 static unsigned long tlb_single_page_flush_ceiling __read_mostly
= 33;
320 void radix__flush_tlb_range_psize(struct mm_struct
*mm
, unsigned long start
,
321 unsigned long end
, int psize
)
325 int local
= mm_is_thread_local(mm
);
326 unsigned long ap
= mmu_get_ap(psize
);
327 unsigned long page_size
= 1UL << mmu_psize_defs
[psize
].shift
;
331 pid
= mm
? mm
->context
.id
: 0;
332 if (unlikely(pid
== MMU_NO_CONTEXT
))
335 if (end
== TLB_FLUSH_ALL
||
336 (end
- start
) > tlb_single_page_flush_ceiling
* page_size
) {
338 _tlbiel_pid(pid
, RIC_FLUSH_TLB
);
340 _tlbie_pid(pid
, RIC_FLUSH_TLB
);
343 for (addr
= start
; addr
< end
; addr
+= page_size
) {
346 _tlbiel_va(addr
, pid
, ap
, RIC_FLUSH_TLB
);
348 _tlbie_va(addr
, pid
, ap
, RIC_FLUSH_TLB
);
354 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
355 void radix__flush_tlb_collapsed_pmd(struct mm_struct
*mm
, unsigned long addr
)
357 int local
= mm_is_thread_local(mm
);
358 unsigned long ap
= mmu_get_ap(mmu_virtual_psize
);
359 unsigned long pid
, end
;
362 pid
= mm
? mm
->context
.id
: 0;
363 if (unlikely(pid
== MMU_NO_CONTEXT
))
366 /* 4k page size, just blow the world */
367 if (PAGE_SIZE
== 0x1000) {
368 radix__flush_all_mm(mm
);
372 /* Otherwise first do the PWC */
374 _tlbiel_pid(pid
, RIC_FLUSH_PWC
);
376 _tlbie_pid(pid
, RIC_FLUSH_PWC
);
378 /* Then iterate the pages */
379 end
= addr
+ HPAGE_PMD_SIZE
;
380 for (; addr
< end
; addr
+= PAGE_SIZE
) {
382 _tlbiel_va(addr
, pid
, ap
, RIC_FLUSH_TLB
);
384 _tlbie_va(addr
, pid
, ap
, RIC_FLUSH_TLB
);
389 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
391 void radix__flush_tlb_lpid_va(unsigned long lpid
, unsigned long gpa
,
392 unsigned long page_size
)
394 unsigned long rb
,rs
,prs
,r
;
396 unsigned long ric
= RIC_FLUSH_TLB
;
398 ap
= mmu_get_ap(radix_get_mmu_psize(page_size
));
399 rb
= gpa
& ~(PPC_BITMASK(52, 63));
400 rb
|= ap
<< PPC_BITLSHIFT(58);
401 rs
= lpid
& ((1UL << 32) - 1);
402 prs
= 0; /* process scoped */
403 r
= 1; /* raidx format */
405 asm volatile("ptesync": : :"memory");
406 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
407 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
408 asm volatile("eieio; tlbsync; ptesync": : :"memory");
409 trace_tlbie(lpid
, 0, rb
, rs
, ric
, prs
, r
);
411 EXPORT_SYMBOL(radix__flush_tlb_lpid_va
);
413 void radix__flush_tlb_lpid(unsigned long lpid
)
415 unsigned long rb
,rs
,prs
,r
;
416 unsigned long ric
= RIC_FLUSH_ALL
;
418 rb
= 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
419 rs
= lpid
& ((1UL << 32) - 1);
420 prs
= 0; /* partition scoped */
421 r
= 1; /* raidx format */
423 asm volatile("ptesync": : :"memory");
424 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
425 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(rs
) : "memory");
426 asm volatile("eieio; tlbsync; ptesync": : :"memory");
427 trace_tlbie(lpid
, 0, rb
, rs
, ric
, prs
, r
);
429 EXPORT_SYMBOL(radix__flush_tlb_lpid
);
431 void radix__flush_pmd_tlb_range(struct vm_area_struct
*vma
,
432 unsigned long start
, unsigned long end
)
434 radix__flush_tlb_range_psize(vma
->vm_mm
, start
, end
, MMU_PAGE_2M
);
436 EXPORT_SYMBOL(radix__flush_pmd_tlb_range
);
438 void radix__flush_tlb_all(void)
440 unsigned long rb
,prs
,r
,rs
;
441 unsigned long ric
= RIC_FLUSH_ALL
;
443 rb
= 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
444 prs
= 0; /* partition scoped */
445 r
= 1; /* raidx format */
446 rs
= 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
448 asm volatile("ptesync": : :"memory");
450 * now flush guest entries by passing PRS = 1 and LPID != 0
452 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
453 : : "r"(rb
), "i"(r
), "i"(1), "i"(ric
), "r"(rs
) : "memory");
454 trace_tlbie(0, 0, rb
, rs
, ric
, prs
, r
);
456 * now flush host entires by passing PRS = 0 and LPID == 0
458 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
459 : : "r"(rb
), "i"(r
), "i"(prs
), "i"(ric
), "r"(0) : "memory");
460 asm volatile("eieio; tlbsync; ptesync": : :"memory");
461 trace_tlbie(0, 0, rb
, 0, ric
, prs
, r
);
464 void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte
, struct mm_struct
*mm
,
465 unsigned long address
)
468 * We track page size in pte only for DD1, So we can
469 * call this only on DD1.
471 if (!cpu_has_feature(CPU_FTR_POWER9_DD1
)) {
476 if (old_pte
& R_PAGE_LARGE
)
477 radix__flush_tlb_page_psize(mm
, address
, MMU_PAGE_2M
);
479 radix__flush_tlb_page_psize(mm
, address
, mmu_virtual_psize
);
482 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
483 extern void radix_kvm_prefetch_workaround(struct mm_struct
*mm
)
485 unsigned int pid
= mm
->context
.id
;
487 if (unlikely(pid
== MMU_NO_CONTEXT
))
491 * If this context hasn't run on that CPU before and KVM is
492 * around, there's a slim chance that the guest on another
493 * CPU just brought in obsolete translation into the TLB of
494 * this CPU due to a bad prefetch using the guest PID on
495 * the way into the hypervisor.
497 * We work around this here. If KVM is possible, we check if
498 * any sibling thread is in KVM. If it is, the window may exist
499 * and thus we flush that PID from the core.
501 * A potential future improvement would be to mark which PIDs
502 * have never been used on the system and avoid it if the PID
503 * is new and the process has no other cpumask bit set.
505 if (cpu_has_feature(CPU_FTR_HVMODE
) && radix_enabled()) {
506 int cpu
= smp_processor_id();
507 int sib
= cpu_first_thread_sibling(cpu
);
510 for (; sib
<= cpu_last_thread_sibling(cpu
) && !flush
; sib
++) {
513 if (paca
[sib
].kvm_hstate
.kvm_vcpu
)
517 _tlbiel_pid(pid
, RIC_FLUSH_ALL
);
520 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround
);
521 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */