1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_PAGEWALK_H
3 #define _LINUX_PAGEWALK_H
9 /* Locking requirement during a page walk. */
11 /* mmap_lock should be locked for read to stabilize the vma tree */
13 /* vma will be write-locked during the walk */
15 /* vma is expected to be already write-locked during the walk */
16 PGWALK_WRLOCK_VERIFY
= 2,
20 * struct mm_walk_ops - callbacks for walk_page_range
21 * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
22 * @p4d_entry: if set, called for each non-empty P4D entry
23 * @pud_entry: if set, called for each non-empty PUD entry
24 * @pmd_entry: if set, called for each non-empty PMD entry
25 * this handler is required to be able to handle
26 * pmd_trans_huge() pmds. They may simply choose to
27 * split_huge_page() instead of handling it explicitly.
28 * @pte_entry: if set, called for each PTE (lowest-level) entry
29 * including empty ones, except if @install_pte is set.
30 * If @install_pte is set, @pte_entry is called only for
32 * @pte_hole: if set, called for each hole at all levels,
33 * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD.
34 * Any folded depths (where PTRS_PER_P?D is equal to 1)
35 * are skipped. If @install_pte is specified, this will
36 * not trigger for any populated ranges.
37 * @hugetlb_entry: if set, called for each hugetlb entry. This hook
38 * function is called with the vma lock held, in order to
39 * protect against a concurrent freeing of the pte_t* or
40 * the ptl. In some cases, the hook function needs to drop
41 * and retake the vma lock in order to avoid deadlocks
42 * while calling other functions. In such cases the hook
43 * function must either refrain from accessing the pte or
44 * ptl after dropping the vma lock, or else revalidate
45 * those items after re-acquiring the vma lock and before
47 * @test_walk: caller specific callback function to determine whether
48 * we walk over the current vma or not. Returning 0 means
49 * "do page table walk over the current vma", returning
50 * a negative value means "abort current page table walk
51 * right now" and returning 1 means "skip the current vma"
52 * Note that this callback is not called when the caller
53 * passes in a single VMA as for walk_page_vma().
54 * @pre_vma: if set, called before starting walk on a non-null vma.
55 * @post_vma: if set, called after a walk on a non-null vma, provided
56 * that @pre_vma and the vma walk succeeded.
57 * @install_pte: if set, missing page table entries are installed and
58 * thus all levels are always walked in the specified
59 * range. This callback is then invoked at the PTE level
60 * (having split any THP pages prior), providing the PTE to
61 * install. If allocations fail, the walk is aborted. This
62 * operation is only available for userland memory. Not
63 * usable for hugetlb ranges.
65 * p?d_entry callbacks are called even if those levels are folded on a
66 * particular architecture/configuration.
69 int (*pgd_entry
)(pgd_t
*pgd
, unsigned long addr
,
70 unsigned long next
, struct mm_walk
*walk
);
71 int (*p4d_entry
)(p4d_t
*p4d
, unsigned long addr
,
72 unsigned long next
, struct mm_walk
*walk
);
73 int (*pud_entry
)(pud_t
*pud
, unsigned long addr
,
74 unsigned long next
, struct mm_walk
*walk
);
75 int (*pmd_entry
)(pmd_t
*pmd
, unsigned long addr
,
76 unsigned long next
, struct mm_walk
*walk
);
77 int (*pte_entry
)(pte_t
*pte
, unsigned long addr
,
78 unsigned long next
, struct mm_walk
*walk
);
79 int (*pte_hole
)(unsigned long addr
, unsigned long next
,
80 int depth
, struct mm_walk
*walk
);
81 int (*hugetlb_entry
)(pte_t
*pte
, unsigned long hmask
,
82 unsigned long addr
, unsigned long next
,
83 struct mm_walk
*walk
);
84 int (*test_walk
)(unsigned long addr
, unsigned long next
,
85 struct mm_walk
*walk
);
86 int (*pre_vma
)(unsigned long start
, unsigned long end
,
87 struct mm_walk
*walk
);
88 void (*post_vma
)(struct mm_walk
*walk
);
89 int (*install_pte
)(unsigned long addr
, unsigned long next
,
90 pte_t
*ptep
, struct mm_walk
*walk
);
91 enum page_walk_lock walk_lock
;
95 * Action for pud_entry / pmd_entry callbacks.
96 * ACTION_SUBTREE is the default
98 enum page_walk_action
{
99 /* Descend to next level, splitting huge pages if needed and possible */
101 /* Continue to next entry at this level (ignoring any subtree) */
103 /* Call again for this entry */
108 * struct mm_walk - walk_page_range data
109 * @ops: operation to call during the walk
110 * @mm: mm_struct representing the target process of page table walk
111 * @pgd: pointer to PGD; only valid with no_vma (otherwise set to NULL)
112 * @vma: vma currently walked (NULL if walking outside vmas)
113 * @action: next action to perform (see enum page_walk_action)
114 * @no_vma: walk ignoring vmas (vma will always be NULL)
115 * @private: private data for callbacks' usage
117 * (see the comment on walk_page_range() for more details)
120 const struct mm_walk_ops
*ops
;
121 struct mm_struct
*mm
;
123 struct vm_area_struct
*vma
;
124 enum page_walk_action action
;
129 int walk_page_range(struct mm_struct
*mm
, unsigned long start
,
130 unsigned long end
, const struct mm_walk_ops
*ops
,
132 int walk_page_range_novma(struct mm_struct
*mm
, unsigned long start
,
133 unsigned long end
, const struct mm_walk_ops
*ops
,
136 int walk_page_range_vma(struct vm_area_struct
*vma
, unsigned long start
,
137 unsigned long end
, const struct mm_walk_ops
*ops
,
139 int walk_page_vma(struct vm_area_struct
*vma
, const struct mm_walk_ops
*ops
,
141 int walk_page_mapping(struct address_space
*mapping
, pgoff_t first_index
,
142 pgoff_t nr
, const struct mm_walk_ops
*ops
,
145 typedef int __bitwise folio_walk_flags_t
;
148 * Walk migration entries as well. Careful: a large folio might get split
151 #define FW_MIGRATION ((__force folio_walk_flags_t)BIT(0))
153 /* Walk shared zeropages (small + huge) as well. */
154 #define FW_ZEROPAGE ((__force folio_walk_flags_t)BIT(1))
156 enum folio_walk_level
{
163 * struct folio_walk - folio_walk_start() / folio_walk_end() data
164 * @page: exact folio page referenced (if applicable)
165 * @level: page table level identifying the entry type
166 * @pte: pointer to the page table entry (FW_LEVEL_PTE).
167 * @pmd: pointer to the page table entry (FW_LEVEL_PMD).
168 * @pud: pointer to the page table entry (FW_LEVEL_PUD).
169 * @ptl: pointer to the page table lock.
171 * (see folio_walk_start() documentation for more details)
176 enum folio_walk_level level
;
188 struct vm_area_struct
*vma
;
192 struct folio
*folio_walk_start(struct folio_walk
*fw
,
193 struct vm_area_struct
*vma
, unsigned long addr
,
194 folio_walk_flags_t flags
);
196 #define folio_walk_end(__fw, __vma) do { \
197 spin_unlock((__fw)->ptl); \
198 if (likely((__fw)->level == FW_LEVEL_PTE)) \
199 pte_unmap((__fw)->ptep); \
200 vma_pgtable_walk_end(__vma); \
203 #endif /* _LINUX_PAGEWALK_H */