1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 * Core VMA manipulation API implemented in vma.c.
11 * VMA lock generalization
14 struct vm_area_struct
*vma
;
15 struct vm_area_struct
*adj_next
;
17 struct address_space
*mapping
;
18 struct anon_vma
*anon_vma
;
19 struct vm_area_struct
*insert
;
20 struct vm_area_struct
*remove
;
21 struct vm_area_struct
*remove2
;
24 struct unlink_vma_file_batch
{
26 struct vm_area_struct
*vmas
[8];
30 * vma munmap operation
32 struct vma_munmap_struct
{
33 struct vma_iterator
*vmi
;
34 struct vm_area_struct
*vma
; /* The first vma to munmap */
35 struct vm_area_struct
*prev
; /* vma before the munmap area */
36 struct vm_area_struct
*next
; /* vma after the munmap area */
37 struct list_head
*uf
; /* Userfaultfd list_head */
38 unsigned long start
; /* Aligned start addr (inclusive) */
39 unsigned long end
; /* Aligned end addr (exclusive) */
40 unsigned long unmap_start
; /* Unmap PTE start */
41 unsigned long unmap_end
; /* Unmap PTE end */
42 int vma_count
; /* Number of vmas that will be removed */
43 bool unlock
; /* Unlock after the munmap */
44 bool clear_ptes
; /* If there are outstanding PTE to be cleared */
46 unsigned long nr_pages
; /* Number of pages being removed */
47 unsigned long locked_vm
; /* Number of locked pages */
48 unsigned long nr_accounted
; /* Number of VM_ACCOUNT pages */
49 unsigned long exec_vm
;
50 unsigned long stack_vm
;
51 unsigned long data_vm
;
54 enum vma_merge_state
{
56 VMA_MERGE_ERROR_NOMEM
,
61 enum vma_merge_flags
{
64 * If we can expand, simply do so. We know there is nothing to merge to
65 * the right. Does not reset state upon failure to merge. The VMA
66 * iterator is assumed to be positioned at the previous VMA, rather than
69 VMG_FLAG_JUST_EXPAND
= 1 << 0,
72 /* Represents a VMA merge operation. */
73 struct vma_merge_struct
{
75 struct vma_iterator
*vmi
;
77 struct vm_area_struct
*prev
;
78 struct vm_area_struct
*next
; /* Modified by vma_merge(). */
79 struct vm_area_struct
*vma
; /* Either a new VMA or the one being modified. */
84 struct anon_vma
*anon_vma
;
85 struct mempolicy
*policy
;
86 struct vm_userfaultfd_ctx uffd_ctx
;
87 struct anon_vma_name
*anon_name
;
88 enum vma_merge_flags merge_flags
;
89 enum vma_merge_state state
;
92 static inline bool vmg_nomem(struct vma_merge_struct
*vmg
)
94 return vmg
->state
== VMA_MERGE_ERROR_NOMEM
;
97 /* Assumes addr >= vma->vm_start. */
98 static inline pgoff_t
vma_pgoff_offset(struct vm_area_struct
*vma
,
101 return vma
->vm_pgoff
+ PHYS_PFN(addr
- vma
->vm_start
);
104 #define VMG_STATE(name, mm_, vmi_, start_, end_, flags_, pgoff_) \
105 struct vma_merge_struct name = { \
112 .state = VMA_MERGE_START, \
113 .merge_flags = VMG_FLAG_DEFAULT, \
116 #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \
117 struct vma_merge_struct name = { \
125 .flags = vma_->vm_flags, \
126 .pgoff = vma_pgoff_offset(vma_, start_), \
127 .file = vma_->vm_file, \
128 .anon_vma = vma_->anon_vma, \
129 .policy = vma_policy(vma_), \
130 .uffd_ctx = vma_->vm_userfaultfd_ctx, \
131 .anon_name = anon_vma_name(vma_), \
132 .state = VMA_MERGE_START, \
133 .merge_flags = VMG_FLAG_DEFAULT, \
136 #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
137 void validate_mm(struct mm_struct
*mm
);
139 #define validate_mm(mm) do { } while (0)
142 /* Required for expand_downwards(). */
143 void anon_vma_interval_tree_pre_update_vma(struct vm_area_struct
*vma
);
145 /* Required for expand_downwards(). */
146 void anon_vma_interval_tree_post_update_vma(struct vm_area_struct
*vma
);
148 int vma_expand(struct vma_merge_struct
*vmg
);
149 int vma_shrink(struct vma_iterator
*vmi
, struct vm_area_struct
*vma
,
150 unsigned long start
, unsigned long end
, pgoff_t pgoff
);
152 static inline int vma_iter_store_gfp(struct vma_iterator
*vmi
,
153 struct vm_area_struct
*vma
, gfp_t gfp
)
156 if (vmi
->mas
.status
!= ma_start
&&
157 ((vmi
->mas
.index
> vma
->vm_start
) || (vmi
->mas
.last
< vma
->vm_start
)))
158 vma_iter_invalidate(vmi
);
160 __mas_set_range(&vmi
->mas
, vma
->vm_start
, vma
->vm_end
- 1);
161 mas_store_gfp(&vmi
->mas
, vma
, gfp
);
162 if (unlikely(mas_is_err(&vmi
->mas
)))
169 do_vmi_align_munmap(struct vma_iterator
*vmi
, struct vm_area_struct
*vma
,
170 struct mm_struct
*mm
, unsigned long start
,
171 unsigned long end
, struct list_head
*uf
, bool unlock
);
173 int do_vmi_munmap(struct vma_iterator
*vmi
, struct mm_struct
*mm
,
174 unsigned long start
, size_t len
, struct list_head
*uf
,
177 void remove_vma(struct vm_area_struct
*vma
, bool unreachable
);
179 void unmap_region(struct ma_state
*mas
, struct vm_area_struct
*vma
,
180 struct vm_area_struct
*prev
, struct vm_area_struct
*next
);
182 /* We are about to modify the VMA's flags. */
183 struct vm_area_struct
*vma_modify_flags(struct vma_iterator
*vmi
,
184 struct vm_area_struct
*prev
, struct vm_area_struct
*vma
,
185 unsigned long start
, unsigned long end
,
186 unsigned long new_flags
);
188 /* We are about to modify the VMA's flags and/or anon_name. */
189 struct vm_area_struct
190 *vma_modify_flags_name(struct vma_iterator
*vmi
,
191 struct vm_area_struct
*prev
,
192 struct vm_area_struct
*vma
,
195 unsigned long new_flags
,
196 struct anon_vma_name
*new_name
);
198 /* We are about to modify the VMA's memory policy. */
199 struct vm_area_struct
200 *vma_modify_policy(struct vma_iterator
*vmi
,
201 struct vm_area_struct
*prev
,
202 struct vm_area_struct
*vma
,
203 unsigned long start
, unsigned long end
,
204 struct mempolicy
*new_pol
);
206 /* We are about to modify the VMA's flags and/or uffd context. */
207 struct vm_area_struct
208 *vma_modify_flags_uffd(struct vma_iterator
*vmi
,
209 struct vm_area_struct
*prev
,
210 struct vm_area_struct
*vma
,
211 unsigned long start
, unsigned long end
,
212 unsigned long new_flags
,
213 struct vm_userfaultfd_ctx new_ctx
);
215 struct vm_area_struct
*vma_merge_new_range(struct vma_merge_struct
*vmg
);
217 struct vm_area_struct
*vma_merge_extend(struct vma_iterator
*vmi
,
218 struct vm_area_struct
*vma
,
219 unsigned long delta
);
221 void unlink_file_vma_batch_init(struct unlink_vma_file_batch
*vb
);
223 void unlink_file_vma_batch_final(struct unlink_vma_file_batch
*vb
);
225 void unlink_file_vma_batch_add(struct unlink_vma_file_batch
*vb
,
226 struct vm_area_struct
*vma
);
228 void unlink_file_vma(struct vm_area_struct
*vma
);
230 void vma_link_file(struct vm_area_struct
*vma
);
232 int vma_link(struct mm_struct
*mm
, struct vm_area_struct
*vma
);
234 struct vm_area_struct
*copy_vma(struct vm_area_struct
**vmap
,
235 unsigned long addr
, unsigned long len
, pgoff_t pgoff
,
236 bool *need_rmap_locks
);
238 struct anon_vma
*find_mergeable_anon_vma(struct vm_area_struct
*vma
);
240 bool vma_needs_dirty_tracking(struct vm_area_struct
*vma
);
241 bool vma_wants_writenotify(struct vm_area_struct
*vma
, pgprot_t vm_page_prot
);
243 int mm_take_all_locks(struct mm_struct
*mm
);
244 void mm_drop_all_locks(struct mm_struct
*mm
);
246 unsigned long __mmap_region(struct file
*file
, unsigned long addr
,
247 unsigned long len
, vm_flags_t vm_flags
, unsigned long pgoff
,
248 struct list_head
*uf
);
250 static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct
*vma
)
253 * We want to check manually if we can change individual PTEs writable
254 * if we can't do that automatically for all PTEs in a mapping. For
255 * private mappings, that's always the case when we have write
256 * permissions as we properly have to handle COW.
258 if (vma
->vm_flags
& VM_SHARED
)
259 return vma_wants_writenotify(vma
, vma
->vm_page_prot
);
260 return !!(vma
->vm_flags
& VM_WRITE
);
264 static inline pgprot_t
vm_pgprot_modify(pgprot_t oldprot
, unsigned long vm_flags
)
266 return pgprot_modify(oldprot
, vm_get_page_prot(vm_flags
));
270 static inline struct vm_area_struct
*vma_prev_limit(struct vma_iterator
*vmi
,
273 return mas_prev(&vmi
->mas
, min
);
277 * These three helpers classifies VMAs for virtual memory accounting.
281 * Executable code area - executable, not writable, not stack
283 static inline bool is_exec_mapping(vm_flags_t flags
)
285 return (flags
& (VM_EXEC
| VM_WRITE
| VM_STACK
)) == VM_EXEC
;
289 * Stack area (including shadow stacks)
291 * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
292 * do_mmap() forbids all other combinations.
294 static inline bool is_stack_mapping(vm_flags_t flags
)
296 return ((flags
& VM_STACK
) == VM_STACK
) || (flags
& VM_SHADOW_STACK
);
300 * Data area - private, writable, not stack
302 static inline bool is_data_mapping(vm_flags_t flags
)
304 return (flags
& (VM_WRITE
| VM_SHARED
| VM_STACK
)) == VM_WRITE
;
308 static inline void vma_iter_config(struct vma_iterator
*vmi
,
309 unsigned long index
, unsigned long last
)
311 __mas_set_range(&vmi
->mas
, index
, last
- 1);
314 static inline void vma_iter_reset(struct vma_iterator
*vmi
)
316 mas_reset(&vmi
->mas
);
320 struct vm_area_struct
*vma_iter_prev_range_limit(struct vma_iterator
*vmi
, unsigned long min
)
322 return mas_prev_range(&vmi
->mas
, min
);
326 struct vm_area_struct
*vma_iter_next_range_limit(struct vma_iterator
*vmi
, unsigned long max
)
328 return mas_next_range(&vmi
->mas
, max
);
331 static inline int vma_iter_area_lowest(struct vma_iterator
*vmi
, unsigned long min
,
332 unsigned long max
, unsigned long size
)
334 return mas_empty_area(&vmi
->mas
, min
, max
- 1, size
);
337 static inline int vma_iter_area_highest(struct vma_iterator
*vmi
, unsigned long min
,
338 unsigned long max
, unsigned long size
)
340 return mas_empty_area_rev(&vmi
->mas
, min
, max
- 1, size
);
344 * VMA Iterator functions shared between nommu and mmap
346 static inline int vma_iter_prealloc(struct vma_iterator
*vmi
,
347 struct vm_area_struct
*vma
)
349 return mas_preallocate(&vmi
->mas
, vma
, GFP_KERNEL
);
352 static inline void vma_iter_clear(struct vma_iterator
*vmi
)
354 mas_store_prealloc(&vmi
->mas
, NULL
);
357 static inline struct vm_area_struct
*vma_iter_load(struct vma_iterator
*vmi
)
359 return mas_walk(&vmi
->mas
);
362 /* Store a VMA with preallocated memory */
363 static inline void vma_iter_store(struct vma_iterator
*vmi
,
364 struct vm_area_struct
*vma
)
367 #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
368 if (MAS_WARN_ON(&vmi
->mas
, vmi
->mas
.status
!= ma_start
&&
369 vmi
->mas
.index
> vma
->vm_start
)) {
370 pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n",
371 vmi
->mas
.index
, vma
->vm_start
, vma
->vm_start
,
372 vma
->vm_end
, vmi
->mas
.index
, vmi
->mas
.last
);
374 if (MAS_WARN_ON(&vmi
->mas
, vmi
->mas
.status
!= ma_start
&&
375 vmi
->mas
.last
< vma
->vm_start
)) {
376 pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n",
377 vmi
->mas
.last
, vma
->vm_start
, vma
->vm_start
, vma
->vm_end
,
378 vmi
->mas
.index
, vmi
->mas
.last
);
382 if (vmi
->mas
.status
!= ma_start
&&
383 ((vmi
->mas
.index
> vma
->vm_start
) || (vmi
->mas
.last
< vma
->vm_start
)))
384 vma_iter_invalidate(vmi
);
386 __mas_set_range(&vmi
->mas
, vma
->vm_start
, vma
->vm_end
- 1);
387 mas_store_prealloc(&vmi
->mas
, vma
);
390 static inline unsigned long vma_iter_addr(struct vma_iterator
*vmi
)
392 return vmi
->mas
.index
;
395 static inline unsigned long vma_iter_end(struct vma_iterator
*vmi
)
397 return vmi
->mas
.last
+ 1;
400 static inline int vma_iter_bulk_alloc(struct vma_iterator
*vmi
,
403 return mas_expected_entries(&vmi
->mas
, count
);
407 struct vm_area_struct
*vma_iter_prev_range(struct vma_iterator
*vmi
)
409 return mas_prev_range(&vmi
->mas
, 0);
413 * Retrieve the next VMA and rewind the iterator to end of the previous VMA, or
414 * if no previous VMA, to index 0.
417 struct vm_area_struct
*vma_iter_next_rewind(struct vma_iterator
*vmi
,
418 struct vm_area_struct
**pprev
)
420 struct vm_area_struct
*next
= vma_next(vmi
);
421 struct vm_area_struct
*prev
= vma_prev(vmi
);
424 * Consider the case where no previous VMA exists. We advance to the
425 * next VMA, skipping any gap, then rewind to the start of the range.
427 * If we were to unconditionally advance to the next range we'd wind up
428 * at the next VMA again, so we check to ensure there is a previous VMA
432 vma_iter_next_range(vmi
);
442 static inline bool vma_is_sealed(struct vm_area_struct
*vma
)
444 return (vma
->vm_flags
& VM_SEALED
);
448 * check if a vma is sealed for modification.
449 * return true, if modification is allowed.
451 static inline bool can_modify_vma(struct vm_area_struct
*vma
)
453 if (unlikely(vma_is_sealed(vma
)))
459 bool can_modify_vma_madv(struct vm_area_struct
*vma
, int behavior
);
463 static inline bool can_modify_vma(struct vm_area_struct
*vma
)
468 static inline bool can_modify_vma_madv(struct vm_area_struct
*vma
, int behavior
)
475 #endif /* __MM_VMA_H */