1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 * Core VMA manipulation API implemented in vma.c.
11 * VMA lock generalization
14 struct vm_area_struct
*vma
;
15 struct vm_area_struct
*adj_next
;
17 struct address_space
*mapping
;
18 struct anon_vma
*anon_vma
;
19 struct vm_area_struct
*insert
;
20 struct vm_area_struct
*remove
;
21 struct vm_area_struct
*remove2
;
24 struct unlink_vma_file_batch
{
26 struct vm_area_struct
*vmas
[8];
30 * vma munmap operation
32 struct vma_munmap_struct
{
33 struct vma_iterator
*vmi
;
34 struct vm_area_struct
*vma
; /* The first vma to munmap */
35 struct vm_area_struct
*prev
; /* vma before the munmap area */
36 struct vm_area_struct
*next
; /* vma after the munmap area */
37 struct list_head
*uf
; /* Userfaultfd list_head */
38 unsigned long start
; /* Aligned start addr (inclusive) */
39 unsigned long end
; /* Aligned end addr (exclusive) */
40 unsigned long unmap_start
; /* Unmap PTE start */
41 unsigned long unmap_end
; /* Unmap PTE end */
42 int vma_count
; /* Number of vmas that will be removed */
43 bool unlock
; /* Unlock after the munmap */
44 bool clear_ptes
; /* If there are outstanding PTE to be cleared */
46 unsigned long nr_pages
; /* Number of pages being removed */
47 unsigned long locked_vm
; /* Number of locked pages */
48 unsigned long nr_accounted
; /* Number of VM_ACCOUNT pages */
49 unsigned long exec_vm
;
50 unsigned long stack_vm
;
51 unsigned long data_vm
;
54 enum vma_merge_state
{
56 VMA_MERGE_ERROR_NOMEM
,
61 enum vma_merge_flags
{
64 * If we can expand, simply do so. We know there is nothing to merge to
65 * the right. Does not reset state upon failure to merge. The VMA
66 * iterator is assumed to be positioned at the previous VMA, rather than
69 VMG_FLAG_JUST_EXPAND
= 1 << 0,
72 /* Represents a VMA merge operation. */
73 struct vma_merge_struct
{
75 struct vma_iterator
*vmi
;
77 struct vm_area_struct
*prev
;
78 struct vm_area_struct
*next
; /* Modified by vma_merge(). */
79 struct vm_area_struct
*vma
; /* Either a new VMA or the one being modified. */
84 struct anon_vma
*anon_vma
;
85 struct mempolicy
*policy
;
86 struct vm_userfaultfd_ctx uffd_ctx
;
87 struct anon_vma_name
*anon_name
;
88 enum vma_merge_flags merge_flags
;
89 enum vma_merge_state state
;
92 static inline bool vmg_nomem(struct vma_merge_struct
*vmg
)
94 return vmg
->state
== VMA_MERGE_ERROR_NOMEM
;
97 /* Assumes addr >= vma->vm_start. */
98 static inline pgoff_t
vma_pgoff_offset(struct vm_area_struct
*vma
,
101 return vma
->vm_pgoff
+ PHYS_PFN(addr
- vma
->vm_start
);
104 #define VMG_STATE(name, mm_, vmi_, start_, end_, flags_, pgoff_) \
105 struct vma_merge_struct name = { \
112 .state = VMA_MERGE_START, \
113 .merge_flags = VMG_FLAG_DEFAULT, \
116 #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \
117 struct vma_merge_struct name = { \
125 .flags = vma_->vm_flags, \
126 .pgoff = vma_pgoff_offset(vma_, start_), \
127 .file = vma_->vm_file, \
128 .anon_vma = vma_->anon_vma, \
129 .policy = vma_policy(vma_), \
130 .uffd_ctx = vma_->vm_userfaultfd_ctx, \
131 .anon_name = anon_vma_name(vma_), \
132 .state = VMA_MERGE_START, \
133 .merge_flags = VMG_FLAG_DEFAULT, \
136 #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
137 void validate_mm(struct mm_struct
*mm
);
139 #define validate_mm(mm) do { } while (0)
142 __must_check
int vma_expand(struct vma_merge_struct
*vmg
);
143 __must_check
int vma_shrink(struct vma_iterator
*vmi
,
144 struct vm_area_struct
*vma
,
145 unsigned long start
, unsigned long end
, pgoff_t pgoff
);
147 static inline int vma_iter_store_gfp(struct vma_iterator
*vmi
,
148 struct vm_area_struct
*vma
, gfp_t gfp
)
151 if (vmi
->mas
.status
!= ma_start
&&
152 ((vmi
->mas
.index
> vma
->vm_start
) || (vmi
->mas
.last
< vma
->vm_start
)))
153 vma_iter_invalidate(vmi
);
155 __mas_set_range(&vmi
->mas
, vma
->vm_start
, vma
->vm_end
- 1);
156 mas_store_gfp(&vmi
->mas
, vma
, gfp
);
157 if (unlikely(mas_is_err(&vmi
->mas
)))
164 do_vmi_align_munmap(struct vma_iterator
*vmi
, struct vm_area_struct
*vma
,
165 struct mm_struct
*mm
, unsigned long start
,
166 unsigned long end
, struct list_head
*uf
, bool unlock
);
168 int do_vmi_munmap(struct vma_iterator
*vmi
, struct mm_struct
*mm
,
169 unsigned long start
, size_t len
, struct list_head
*uf
,
172 void remove_vma(struct vm_area_struct
*vma
, bool unreachable
);
174 void unmap_region(struct ma_state
*mas
, struct vm_area_struct
*vma
,
175 struct vm_area_struct
*prev
, struct vm_area_struct
*next
);
177 /* We are about to modify the VMA's flags. */
178 __must_check
struct vm_area_struct
179 *vma_modify_flags(struct vma_iterator
*vmi
,
180 struct vm_area_struct
*prev
, struct vm_area_struct
*vma
,
181 unsigned long start
, unsigned long end
,
182 unsigned long new_flags
);
184 /* We are about to modify the VMA's flags and/or anon_name. */
185 __must_check
struct vm_area_struct
186 *vma_modify_flags_name(struct vma_iterator
*vmi
,
187 struct vm_area_struct
*prev
,
188 struct vm_area_struct
*vma
,
191 unsigned long new_flags
,
192 struct anon_vma_name
*new_name
);
194 /* We are about to modify the VMA's memory policy. */
195 __must_check
struct vm_area_struct
196 *vma_modify_policy(struct vma_iterator
*vmi
,
197 struct vm_area_struct
*prev
,
198 struct vm_area_struct
*vma
,
199 unsigned long start
, unsigned long end
,
200 struct mempolicy
*new_pol
);
202 /* We are about to modify the VMA's flags and/or uffd context. */
203 __must_check
struct vm_area_struct
204 *vma_modify_flags_uffd(struct vma_iterator
*vmi
,
205 struct vm_area_struct
*prev
,
206 struct vm_area_struct
*vma
,
207 unsigned long start
, unsigned long end
,
208 unsigned long new_flags
,
209 struct vm_userfaultfd_ctx new_ctx
);
211 __must_check
struct vm_area_struct
212 *vma_merge_new_range(struct vma_merge_struct
*vmg
);
214 __must_check
struct vm_area_struct
215 *vma_merge_extend(struct vma_iterator
*vmi
,
216 struct vm_area_struct
*vma
,
217 unsigned long delta
);
219 void unlink_file_vma_batch_init(struct unlink_vma_file_batch
*vb
);
221 void unlink_file_vma_batch_final(struct unlink_vma_file_batch
*vb
);
223 void unlink_file_vma_batch_add(struct unlink_vma_file_batch
*vb
,
224 struct vm_area_struct
*vma
);
226 void unlink_file_vma(struct vm_area_struct
*vma
);
228 void vma_link_file(struct vm_area_struct
*vma
);
230 int vma_link(struct mm_struct
*mm
, struct vm_area_struct
*vma
);
232 struct vm_area_struct
*copy_vma(struct vm_area_struct
**vmap
,
233 unsigned long addr
, unsigned long len
, pgoff_t pgoff
,
234 bool *need_rmap_locks
);
236 struct anon_vma
*find_mergeable_anon_vma(struct vm_area_struct
*vma
);
238 bool vma_needs_dirty_tracking(struct vm_area_struct
*vma
);
239 bool vma_wants_writenotify(struct vm_area_struct
*vma
, pgprot_t vm_page_prot
);
241 int mm_take_all_locks(struct mm_struct
*mm
);
242 void mm_drop_all_locks(struct mm_struct
*mm
);
244 unsigned long mmap_region(struct file
*file
, unsigned long addr
,
245 unsigned long len
, vm_flags_t vm_flags
, unsigned long pgoff
,
246 struct list_head
*uf
);
248 int do_brk_flags(struct vma_iterator
*vmi
, struct vm_area_struct
*brkvma
,
249 unsigned long addr
, unsigned long request
, unsigned long flags
);
251 unsigned long unmapped_area(struct vm_unmapped_area_info
*info
);
252 unsigned long unmapped_area_topdown(struct vm_unmapped_area_info
*info
);
254 static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct
*vma
)
257 * We want to check manually if we can change individual PTEs writable
258 * if we can't do that automatically for all PTEs in a mapping. For
259 * private mappings, that's always the case when we have write
260 * permissions as we properly have to handle COW.
262 if (vma
->vm_flags
& VM_SHARED
)
263 return vma_wants_writenotify(vma
, vma
->vm_page_prot
);
264 return !!(vma
->vm_flags
& VM_WRITE
);
268 static inline pgprot_t
vm_pgprot_modify(pgprot_t oldprot
, unsigned long vm_flags
)
270 return pgprot_modify(oldprot
, vm_get_page_prot(vm_flags
));
274 static inline struct vm_area_struct
*vma_prev_limit(struct vma_iterator
*vmi
,
277 return mas_prev(&vmi
->mas
, min
);
281 * These three helpers classifies VMAs for virtual memory accounting.
285 * Executable code area - executable, not writable, not stack
287 static inline bool is_exec_mapping(vm_flags_t flags
)
289 return (flags
& (VM_EXEC
| VM_WRITE
| VM_STACK
)) == VM_EXEC
;
293 * Stack area (including shadow stacks)
295 * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
296 * do_mmap() forbids all other combinations.
298 static inline bool is_stack_mapping(vm_flags_t flags
)
300 return ((flags
& VM_STACK
) == VM_STACK
) || (flags
& VM_SHADOW_STACK
);
304 * Data area - private, writable, not stack
306 static inline bool is_data_mapping(vm_flags_t flags
)
308 return (flags
& (VM_WRITE
| VM_SHARED
| VM_STACK
)) == VM_WRITE
;
312 static inline void vma_iter_config(struct vma_iterator
*vmi
,
313 unsigned long index
, unsigned long last
)
315 __mas_set_range(&vmi
->mas
, index
, last
- 1);
318 static inline void vma_iter_reset(struct vma_iterator
*vmi
)
320 mas_reset(&vmi
->mas
);
324 struct vm_area_struct
*vma_iter_prev_range_limit(struct vma_iterator
*vmi
, unsigned long min
)
326 return mas_prev_range(&vmi
->mas
, min
);
330 struct vm_area_struct
*vma_iter_next_range_limit(struct vma_iterator
*vmi
, unsigned long max
)
332 return mas_next_range(&vmi
->mas
, max
);
335 static inline int vma_iter_area_lowest(struct vma_iterator
*vmi
, unsigned long min
,
336 unsigned long max
, unsigned long size
)
338 return mas_empty_area(&vmi
->mas
, min
, max
- 1, size
);
341 static inline int vma_iter_area_highest(struct vma_iterator
*vmi
, unsigned long min
,
342 unsigned long max
, unsigned long size
)
344 return mas_empty_area_rev(&vmi
->mas
, min
, max
- 1, size
);
348 * VMA Iterator functions shared between nommu and mmap
350 static inline int vma_iter_prealloc(struct vma_iterator
*vmi
,
351 struct vm_area_struct
*vma
)
353 return mas_preallocate(&vmi
->mas
, vma
, GFP_KERNEL
);
356 static inline void vma_iter_clear(struct vma_iterator
*vmi
)
358 mas_store_prealloc(&vmi
->mas
, NULL
);
361 static inline struct vm_area_struct
*vma_iter_load(struct vma_iterator
*vmi
)
363 return mas_walk(&vmi
->mas
);
366 /* Store a VMA with preallocated memory */
367 static inline void vma_iter_store(struct vma_iterator
*vmi
,
368 struct vm_area_struct
*vma
)
371 #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
372 if (MAS_WARN_ON(&vmi
->mas
, vmi
->mas
.status
!= ma_start
&&
373 vmi
->mas
.index
> vma
->vm_start
)) {
374 pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n",
375 vmi
->mas
.index
, vma
->vm_start
, vma
->vm_start
,
376 vma
->vm_end
, vmi
->mas
.index
, vmi
->mas
.last
);
378 if (MAS_WARN_ON(&vmi
->mas
, vmi
->mas
.status
!= ma_start
&&
379 vmi
->mas
.last
< vma
->vm_start
)) {
380 pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n",
381 vmi
->mas
.last
, vma
->vm_start
, vma
->vm_start
, vma
->vm_end
,
382 vmi
->mas
.index
, vmi
->mas
.last
);
386 if (vmi
->mas
.status
!= ma_start
&&
387 ((vmi
->mas
.index
> vma
->vm_start
) || (vmi
->mas
.last
< vma
->vm_start
)))
388 vma_iter_invalidate(vmi
);
390 __mas_set_range(&vmi
->mas
, vma
->vm_start
, vma
->vm_end
- 1);
391 mas_store_prealloc(&vmi
->mas
, vma
);
394 static inline unsigned long vma_iter_addr(struct vma_iterator
*vmi
)
396 return vmi
->mas
.index
;
399 static inline unsigned long vma_iter_end(struct vma_iterator
*vmi
)
401 return vmi
->mas
.last
+ 1;
404 static inline int vma_iter_bulk_alloc(struct vma_iterator
*vmi
,
407 return mas_expected_entries(&vmi
->mas
, count
);
411 struct vm_area_struct
*vma_iter_prev_range(struct vma_iterator
*vmi
)
413 return mas_prev_range(&vmi
->mas
, 0);
417 * Retrieve the next VMA and rewind the iterator to end of the previous VMA, or
418 * if no previous VMA, to index 0.
421 struct vm_area_struct
*vma_iter_next_rewind(struct vma_iterator
*vmi
,
422 struct vm_area_struct
**pprev
)
424 struct vm_area_struct
*next
= vma_next(vmi
);
425 struct vm_area_struct
*prev
= vma_prev(vmi
);
428 * Consider the case where no previous VMA exists. We advance to the
429 * next VMA, skipping any gap, then rewind to the start of the range.
431 * If we were to unconditionally advance to the next range we'd wind up
432 * at the next VMA again, so we check to ensure there is a previous VMA
436 vma_iter_next_range(vmi
);
446 static inline bool vma_is_sealed(struct vm_area_struct
*vma
)
448 return (vma
->vm_flags
& VM_SEALED
);
452 * check if a vma is sealed for modification.
453 * return true, if modification is allowed.
455 static inline bool can_modify_vma(struct vm_area_struct
*vma
)
457 if (unlikely(vma_is_sealed(vma
)))
463 bool can_modify_vma_madv(struct vm_area_struct
*vma
, int behavior
);
467 static inline bool can_modify_vma(struct vm_area_struct
*vma
)
472 static inline bool can_modify_vma_madv(struct vm_area_struct
*vma
, int behavior
)
479 #if defined(CONFIG_STACK_GROWSUP)
480 int expand_upwards(struct vm_area_struct
*vma
, unsigned long address
);
483 int expand_downwards(struct vm_area_struct
*vma
, unsigned long address
);
485 int __vm_munmap(unsigned long start
, size_t len
, bool unlock
);
487 #endif /* __MM_VMA_H */