1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/rmap.h>
4 #include <linux/hugetlb.h>
5 #include <linux/swap.h>
6 #include <linux/swapops.h>
10 static inline bool not_found(struct page_vma_mapped_walk
*pvmw
)
12 page_vma_mapped_walk_done(pvmw
);
16 static bool map_pte(struct page_vma_mapped_walk
*pvmw
, pmd_t
*pmdvalp
,
21 if (pvmw
->flags
& PVMW_SYNC
) {
22 /* Use the stricter lookup */
23 pvmw
->pte
= pte_offset_map_lock(pvmw
->vma
->vm_mm
, pvmw
->pmd
,
24 pvmw
->address
, &pvmw
->ptl
);
31 * It is important to return the ptl corresponding to pte,
32 * in case *pvmw->pmd changes underneath us; so we need to
33 * return it even when choosing not to lock, in case caller
34 * proceeds to loop over next ptes, and finds a match later.
35 * Though, in most cases, page lock already protects this.
37 pvmw
->pte
= pte_offset_map_rw_nolock(pvmw
->vma
->vm_mm
, pvmw
->pmd
,
38 pvmw
->address
, pmdvalp
, ptlp
);
42 ptent
= ptep_get(pvmw
->pte
);
44 if (pvmw
->flags
& PVMW_MIGRATION
) {
45 if (!is_swap_pte(ptent
))
47 } else if (is_swap_pte(ptent
)) {
50 * Handle un-addressable ZONE_DEVICE memory.
52 * We get here when we are trying to unmap a private
53 * device page from the process address space. Such
54 * page is not CPU accessible and thus is mapped as
55 * a special swap entry, nonetheless it still does
56 * count as a valid regular mapping for the page
57 * (and is accounted as such in page maps count).
59 * So handle this special case as if it was a normal
60 * page mapping ie lock CPU page table and return true.
62 * For more details on device private memory see HMM
63 * (include/linux/hmm.h or mm/hmm.c).
65 entry
= pte_to_swp_entry(ptent
);
66 if (!is_device_private_entry(entry
) &&
67 !is_device_exclusive_entry(entry
))
69 } else if (!pte_present(ptent
)) {
73 if (unlikely(!pmd_same(*pmdvalp
, pmdp_get_lockless(pvmw
->pmd
)))) {
74 pte_unmap_unlock(pvmw
->pte
, *ptlp
);
83 * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is
84 * mapped at the @pvmw->pte
85 * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range
88 * page_vma_mapped_walk() found a place where pfn range is *potentially*
89 * mapped. check_pte() has to validate this.
91 * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to
94 * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
95 * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
97 * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to
98 * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
100 * Otherwise, return false.
103 static bool check_pte(struct page_vma_mapped_walk
*pvmw
)
106 pte_t ptent
= ptep_get(pvmw
->pte
);
108 if (pvmw
->flags
& PVMW_MIGRATION
) {
110 if (!is_swap_pte(ptent
))
112 entry
= pte_to_swp_entry(ptent
);
114 if (!is_migration_entry(entry
) &&
115 !is_device_exclusive_entry(entry
))
118 pfn
= swp_offset_pfn(entry
);
119 } else if (is_swap_pte(ptent
)) {
122 /* Handle un-addressable ZONE_DEVICE memory */
123 entry
= pte_to_swp_entry(ptent
);
124 if (!is_device_private_entry(entry
) &&
125 !is_device_exclusive_entry(entry
))
128 pfn
= swp_offset_pfn(entry
);
130 if (!pte_present(ptent
))
133 pfn
= pte_pfn(ptent
);
136 return (pfn
- pvmw
->pfn
) < pvmw
->nr_pages
;
139 /* Returns true if the two ranges overlap. Careful to not overflow. */
140 static bool check_pmd(unsigned long pfn
, struct page_vma_mapped_walk
*pvmw
)
142 if ((pfn
+ HPAGE_PMD_NR
- 1) < pvmw
->pfn
)
144 if (pfn
> pvmw
->pfn
+ pvmw
->nr_pages
- 1)
149 static void step_forward(struct page_vma_mapped_walk
*pvmw
, unsigned long size
)
151 pvmw
->address
= (pvmw
->address
+ size
) & ~(size
- 1);
153 pvmw
->address
= ULONG_MAX
;
157 * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
159 * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
160 * must be set. pmd, pte and ptl must be NULL.
162 * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
163 * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
164 * adjusted if needed (for PTE-mapped THPs).
166 * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
167 * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
168 * a loop to find all PTEs that map the THP.
170 * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
171 * regardless of which page table level the page is mapped at. @pvmw->pmd is
174 * Returns false if there are no more page table entries for the page in
175 * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
177 * If you need to stop the walk before page_vma_mapped_walk() returned false,
178 * use page_vma_mapped_walk_done(). It will do the housekeeping.
180 bool page_vma_mapped_walk(struct page_vma_mapped_walk
*pvmw
)
182 struct vm_area_struct
*vma
= pvmw
->vma
;
183 struct mm_struct
*mm
= vma
->vm_mm
;
191 /* The only possible pmd mapping has been handled on last iteration */
192 if (pvmw
->pmd
&& !pvmw
->pte
)
193 return not_found(pvmw
);
195 if (unlikely(is_vm_hugetlb_page(vma
))) {
196 struct hstate
*hstate
= hstate_vma(vma
);
197 unsigned long size
= huge_page_size(hstate
);
198 /* The only possible mapping was handled on last iteration */
200 return not_found(pvmw
);
202 * All callers that get here will already hold the
203 * i_mmap_rwsem. Therefore, no additional locks need to be
204 * taken before calling hugetlb_walk().
206 pvmw
->pte
= hugetlb_walk(vma
, pvmw
->address
, size
);
210 pvmw
->ptl
= huge_pte_lock(hstate
, mm
, pvmw
->pte
);
211 if (!check_pte(pvmw
))
212 return not_found(pvmw
);
216 end
= vma_address_end(pvmw
);
221 pgd
= pgd_offset(mm
, pvmw
->address
);
222 if (!pgd_present(*pgd
)) {
223 step_forward(pvmw
, PGDIR_SIZE
);
226 p4d
= p4d_offset(pgd
, pvmw
->address
);
227 if (!p4d_present(*p4d
)) {
228 step_forward(pvmw
, P4D_SIZE
);
231 pud
= pud_offset(p4d
, pvmw
->address
);
232 if (!pud_present(*pud
)) {
233 step_forward(pvmw
, PUD_SIZE
);
237 pvmw
->pmd
= pmd_offset(pud
, pvmw
->address
);
239 * Make sure the pmd value isn't cached in a register by the
240 * compiler and used as a stale value after we've observed a
243 pmde
= pmdp_get_lockless(pvmw
->pmd
);
245 if (pmd_trans_huge(pmde
) || is_pmd_migration_entry(pmde
) ||
246 (pmd_present(pmde
) && pmd_devmap(pmde
))) {
247 pvmw
->ptl
= pmd_lock(mm
, pvmw
->pmd
);
249 if (!pmd_present(pmde
)) {
252 if (!thp_migration_supported() ||
253 !(pvmw
->flags
& PVMW_MIGRATION
))
254 return not_found(pvmw
);
255 entry
= pmd_to_swp_entry(pmde
);
256 if (!is_migration_entry(entry
) ||
257 !check_pmd(swp_offset_pfn(entry
), pvmw
))
258 return not_found(pvmw
);
261 if (likely(pmd_trans_huge(pmde
) || pmd_devmap(pmde
))) {
262 if (pvmw
->flags
& PVMW_MIGRATION
)
263 return not_found(pvmw
);
264 if (!check_pmd(pmd_pfn(pmde
), pvmw
))
265 return not_found(pvmw
);
268 /* THP pmd was split under us: handle on pte level */
269 spin_unlock(pvmw
->ptl
);
271 } else if (!pmd_present(pmde
)) {
273 * If PVMW_SYNC, take and drop THP pmd lock so that we
274 * cannot return prematurely, while zap_huge_pmd() has
275 * cleared *pmd but not decremented compound_mapcount().
277 if ((pvmw
->flags
& PVMW_SYNC
) &&
278 thp_vma_suitable_order(vma
, pvmw
->address
,
280 (pvmw
->nr_pages
>= HPAGE_PMD_NR
)) {
281 spinlock_t
*ptl
= pmd_lock(mm
, pvmw
->pmd
);
285 step_forward(pvmw
, PMD_SIZE
);
288 if (!map_pte(pvmw
, &pmde
, &ptl
)) {
298 pvmw
->address
+= PAGE_SIZE
;
299 if (pvmw
->address
>= end
)
300 return not_found(pvmw
);
301 /* Did we cross page table boundary? */
302 if ((pvmw
->address
& (PMD_SIZE
- PAGE_SIZE
)) == 0) {
304 spin_unlock(pvmw
->ptl
);
307 pte_unmap(pvmw
->pte
);
312 } while (pte_none(ptep_get(pvmw
->pte
)));
316 if (unlikely(!pmd_same(pmde
, pmdp_get_lockless(pvmw
->pmd
)))) {
317 pte_unmap_unlock(pvmw
->pte
, ptl
);
324 } while (pvmw
->address
< end
);
329 #ifdef CONFIG_MEMORY_FAILURE
331 * page_mapped_in_vma - check whether a page is really mapped in a VMA
332 * @page: the page to test
333 * @vma: the VMA to test
335 * Return: The address the page is mapped at if the page is in the range
336 * covered by the VMA and present in the page table. If the page is
337 * outside the VMA or not present, returns -EFAULT.
338 * Only valid for normal file or anonymous VMAs.
340 unsigned long page_mapped_in_vma(const struct page
*page
,
341 struct vm_area_struct
*vma
)
343 const struct folio
*folio
= page_folio(page
);
344 struct page_vma_mapped_walk pvmw
= {
345 .pfn
= page_to_pfn(page
),
351 pvmw
.address
= vma_address(vma
, page_pgoff(folio
, page
), 1);
352 if (pvmw
.address
== -EFAULT
)
354 if (!page_vma_mapped_walk(&pvmw
))
356 page_vma_mapped_walk_done(&pvmw
);