1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_HUGE_MM_H
3 #define _LINUX_HUGE_MM_H
5 #include <linux/mm_types.h>
7 #include <linux/fs.h> /* only for vma_is_dax() */
8 #include <linux/kobject.h>
10 vm_fault_t
do_huge_pmd_anonymous_page(struct vm_fault
*vmf
);
11 int copy_huge_pmd(struct mm_struct
*dst_mm
, struct mm_struct
*src_mm
,
12 pmd_t
*dst_pmd
, pmd_t
*src_pmd
, unsigned long addr
,
13 struct vm_area_struct
*dst_vma
, struct vm_area_struct
*src_vma
);
14 void huge_pmd_set_accessed(struct vm_fault
*vmf
);
15 int copy_huge_pud(struct mm_struct
*dst_mm
, struct mm_struct
*src_mm
,
16 pud_t
*dst_pud
, pud_t
*src_pud
, unsigned long addr
,
17 struct vm_area_struct
*vma
);
19 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
20 void huge_pud_set_accessed(struct vm_fault
*vmf
, pud_t orig_pud
);
22 static inline void huge_pud_set_accessed(struct vm_fault
*vmf
, pud_t orig_pud
)
27 vm_fault_t
do_huge_pmd_wp_page(struct vm_fault
*vmf
);
28 bool madvise_free_huge_pmd(struct mmu_gather
*tlb
, struct vm_area_struct
*vma
,
29 pmd_t
*pmd
, unsigned long addr
, unsigned long next
);
30 int zap_huge_pmd(struct mmu_gather
*tlb
, struct vm_area_struct
*vma
, pmd_t
*pmd
,
32 int zap_huge_pud(struct mmu_gather
*tlb
, struct vm_area_struct
*vma
, pud_t
*pud
,
34 bool move_huge_pmd(struct vm_area_struct
*vma
, unsigned long old_addr
,
35 unsigned long new_addr
, pmd_t
*old_pmd
, pmd_t
*new_pmd
);
36 int change_huge_pmd(struct mmu_gather
*tlb
, struct vm_area_struct
*vma
,
37 pmd_t
*pmd
, unsigned long addr
, pgprot_t newprot
,
38 unsigned long cp_flags
);
40 vm_fault_t
vmf_insert_pfn_pmd(struct vm_fault
*vmf
, pfn_t pfn
, bool write
);
41 vm_fault_t
vmf_insert_pfn_pud(struct vm_fault
*vmf
, pfn_t pfn
, bool write
);
43 enum transparent_hugepage_flag
{
44 TRANSPARENT_HUGEPAGE_UNSUPPORTED
,
45 TRANSPARENT_HUGEPAGE_FLAG
,
46 TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG
,
47 TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG
,
48 TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG
,
49 TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG
,
50 TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG
,
51 TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG
,
52 TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG
,
56 struct kobj_attribute
;
58 ssize_t
single_hugepage_flag_store(struct kobject
*kobj
,
59 struct kobj_attribute
*attr
,
60 const char *buf
, size_t count
,
61 enum transparent_hugepage_flag flag
);
62 ssize_t
single_hugepage_flag_show(struct kobject
*kobj
,
63 struct kobj_attribute
*attr
, char *buf
,
64 enum transparent_hugepage_flag flag
);
65 extern struct kobj_attribute shmem_enabled_attr
;
66 extern struct kobj_attribute thpsize_shmem_enabled_attr
;
69 * Mask of all large folio orders supported for anonymous THP; all orders up to
70 * and including PMD_ORDER, except order-0 (which is not "huge") and order-1
71 * (which is a limitation of the THP implementation).
73 #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1)))
76 * Mask of all large folio orders supported for file THP. Folios in a DAX
77 * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to
78 * it. Same to PFNMAPs where there's neither page* nor pagecache.
80 #define THP_ORDERS_ALL_SPECIAL \
81 (BIT(PMD_ORDER) | BIT(PUD_ORDER))
82 #define THP_ORDERS_ALL_FILE_DEFAULT \
83 ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0))
86 * Mask of all large folio orders supported for THP.
88 #define THP_ORDERS_ALL \
89 (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT)
91 #define TVA_SMAPS (1 << 0) /* Will be used for procfs */
92 #define TVA_IN_PF (1 << 1) /* Page fault handler */
93 #define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */
95 #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
96 (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
98 #define split_folio(f) split_folio_to_list(f, NULL)
100 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
101 #define HPAGE_PMD_SHIFT PMD_SHIFT
102 #define HPAGE_PUD_SHIFT PUD_SHIFT
104 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
105 #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
108 #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
109 #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
110 #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
111 #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
113 #define HPAGE_PUD_ORDER (HPAGE_PUD_SHIFT-PAGE_SHIFT)
114 #define HPAGE_PUD_NR (1<<HPAGE_PUD_ORDER)
115 #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
116 #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
118 enum mthp_stat_item
{
119 MTHP_STAT_ANON_FAULT_ALLOC
,
120 MTHP_STAT_ANON_FAULT_FALLBACK
,
121 MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE
,
125 MTHP_STAT_SWPOUT_FALLBACK
,
126 MTHP_STAT_SHMEM_ALLOC
,
127 MTHP_STAT_SHMEM_FALLBACK
,
128 MTHP_STAT_SHMEM_FALLBACK_CHARGE
,
130 MTHP_STAT_SPLIT_FAILED
,
131 MTHP_STAT_SPLIT_DEFERRED
,
133 MTHP_STAT_NR_ANON_PARTIALLY_MAPPED
,
137 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
139 unsigned long stats
[ilog2(MAX_PTRS_PER_PTE
) + 1][__MTHP_STAT_COUNT
];
142 DECLARE_PER_CPU(struct mthp_stat
, mthp_stats
);
144 static inline void mod_mthp_stat(int order
, enum mthp_stat_item item
, int delta
)
146 if (order
<= 0 || order
> PMD_ORDER
)
149 this_cpu_add(mthp_stats
.stats
[order
][item
], delta
);
152 static inline void count_mthp_stat(int order
, enum mthp_stat_item item
)
154 mod_mthp_stat(order
, item
, 1);
158 static inline void mod_mthp_stat(int order
, enum mthp_stat_item item
, int delta
)
162 static inline void count_mthp_stat(int order
, enum mthp_stat_item item
)
167 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
169 extern unsigned long transparent_hugepage_flags
;
170 extern unsigned long huge_anon_orders_always
;
171 extern unsigned long huge_anon_orders_madvise
;
172 extern unsigned long huge_anon_orders_inherit
;
174 static inline bool hugepage_global_enabled(void)
176 return transparent_hugepage_flags
&
177 ((1<<TRANSPARENT_HUGEPAGE_FLAG
) |
178 (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG
));
181 static inline bool hugepage_global_always(void)
183 return transparent_hugepage_flags
&
184 (1<<TRANSPARENT_HUGEPAGE_FLAG
);
187 static inline int highest_order(unsigned long orders
)
189 return fls_long(orders
) - 1;
192 static inline int next_order(unsigned long *orders
, int prev
)
194 *orders
&= ~BIT(prev
);
195 return highest_order(*orders
);
199 * Do the below checks:
200 * - For file vma, check if the linear page offset of vma is
201 * order-aligned within the file. The hugepage is
202 * guaranteed to be order-aligned within the file, but we must
203 * check that the order-aligned addresses in the VMA map to
204 * order-aligned offsets within the file, else the hugepage will
206 * - For all vmas, check if the haddr is in an aligned hugepage
209 static inline bool thp_vma_suitable_order(struct vm_area_struct
*vma
,
210 unsigned long addr
, int order
)
212 unsigned long hpage_size
= PAGE_SIZE
<< order
;
215 /* Don't have to check pgoff for anonymous vma */
216 if (!vma_is_anonymous(vma
)) {
217 if (!IS_ALIGNED((vma
->vm_start
>> PAGE_SHIFT
) - vma
->vm_pgoff
,
218 hpage_size
>> PAGE_SHIFT
))
222 haddr
= ALIGN_DOWN(addr
, hpage_size
);
224 if (haddr
< vma
->vm_start
|| haddr
+ hpage_size
> vma
->vm_end
)
230 * Filter the bitfield of input orders to the ones suitable for use in the vma.
231 * See thp_vma_suitable_order().
232 * All orders that pass the checks are returned as a bitfield.
234 static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct
*vma
,
235 unsigned long addr
, unsigned long orders
)
240 * Iterate over orders, highest to lowest, removing orders that don't
241 * meet alignment requirements from the set. Exit loop at first order
242 * that meets requirements, since all lower orders must also meet
246 order
= highest_order(orders
);
249 if (thp_vma_suitable_order(vma
, addr
, order
))
251 order
= next_order(&orders
, order
);
257 unsigned long __thp_vma_allowable_orders(struct vm_area_struct
*vma
,
258 unsigned long vm_flags
,
259 unsigned long tva_flags
,
260 unsigned long orders
);
263 * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma
264 * @vma: the vm area to check
265 * @vm_flags: use these vm_flags instead of vma->vm_flags
266 * @tva_flags: Which TVA flags to honour
267 * @orders: bitfield of all orders to consider
269 * Calculates the intersection of the requested hugepage orders and the allowed
270 * hugepage orders for the provided vma. Permitted orders are encoded as a set
271 * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3
272 * corresponds to order-3, etc). Order-0 is never considered a hugepage order.
274 * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage
275 * orders are allowed.
278 unsigned long thp_vma_allowable_orders(struct vm_area_struct
*vma
,
279 unsigned long vm_flags
,
280 unsigned long tva_flags
,
281 unsigned long orders
)
283 /* Optimization to check if required orders are enabled early. */
284 if ((tva_flags
& TVA_ENFORCE_SYSFS
) && vma_is_anonymous(vma
)) {
285 unsigned long mask
= READ_ONCE(huge_anon_orders_always
);
287 if (vm_flags
& VM_HUGEPAGE
)
288 mask
|= READ_ONCE(huge_anon_orders_madvise
);
289 if (hugepage_global_always() ||
290 ((vm_flags
& VM_HUGEPAGE
) && hugepage_global_enabled()))
291 mask
|= READ_ONCE(huge_anon_orders_inherit
);
298 return __thp_vma_allowable_orders(vma
, vm_flags
, tva_flags
, orders
);
303 struct list_head node
;
307 #define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
309 #define transparent_hugepage_use_zero_page() \
310 (transparent_hugepage_flags & \
311 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
313 static inline bool vma_thp_disabled(struct vm_area_struct
*vma
,
314 unsigned long vm_flags
)
317 * Explicitly disabled through madvise or prctl, or some
318 * architectures may disable THP for some mappings, for
321 return (vm_flags
& VM_NOHUGEPAGE
) ||
322 test_bit(MMF_DISABLE_THP
, &vma
->vm_mm
->flags
);
325 static inline bool thp_disabled_by_hw(void)
327 /* If the hardware/firmware marked hugepage support disabled. */
328 return transparent_hugepage_flags
& (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED
);
331 unsigned long thp_get_unmapped_area(struct file
*filp
, unsigned long addr
,
332 unsigned long len
, unsigned long pgoff
, unsigned long flags
);
333 unsigned long thp_get_unmapped_area_vmflags(struct file
*filp
, unsigned long addr
,
334 unsigned long len
, unsigned long pgoff
, unsigned long flags
,
335 vm_flags_t vm_flags
);
337 bool can_split_folio(struct folio
*folio
, int caller_pins
, int *pextra_pins
);
338 int split_huge_page_to_list_to_order(struct page
*page
, struct list_head
*list
,
339 unsigned int new_order
);
340 int min_order_for_split(struct folio
*folio
);
341 int split_folio_to_list(struct folio
*folio
, struct list_head
*list
);
342 static inline int split_huge_page(struct page
*page
)
344 struct folio
*folio
= page_folio(page
);
345 int ret
= min_order_for_split(folio
);
351 * split_huge_page() locks the page before splitting and
352 * expects the same page that has been split to be locked when
353 * returned. split_folio(page_folio(page)) cannot be used here
354 * because it converts the page to folio and passes the head
357 return split_huge_page_to_list_to_order(page
, NULL
, ret
);
359 void deferred_split_folio(struct folio
*folio
, bool partially_mapped
);
361 void __split_huge_pmd(struct vm_area_struct
*vma
, pmd_t
*pmd
,
362 unsigned long address
, bool freeze
, struct folio
*folio
);
364 #define split_huge_pmd(__vma, __pmd, __address) \
366 pmd_t *____pmd = (__pmd); \
367 if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \
368 || pmd_devmap(*____pmd)) \
369 __split_huge_pmd(__vma, __pmd, __address, \
374 void split_huge_pmd_address(struct vm_area_struct
*vma
, unsigned long address
,
375 bool freeze
, struct folio
*folio
);
377 void __split_huge_pud(struct vm_area_struct
*vma
, pud_t
*pud
,
378 unsigned long address
);
380 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
381 int change_huge_pud(struct mmu_gather
*tlb
, struct vm_area_struct
*vma
,
382 pud_t
*pudp
, unsigned long addr
, pgprot_t newprot
,
383 unsigned long cp_flags
);
386 change_huge_pud(struct mmu_gather
*tlb
, struct vm_area_struct
*vma
,
387 pud_t
*pudp
, unsigned long addr
, pgprot_t newprot
,
388 unsigned long cp_flags
) { return 0; }
391 #define split_huge_pud(__vma, __pud, __address) \
393 pud_t *____pud = (__pud); \
394 if (pud_trans_huge(*____pud) \
395 || pud_devmap(*____pud)) \
396 __split_huge_pud(__vma, __pud, __address); \
399 int hugepage_madvise(struct vm_area_struct
*vma
, unsigned long *vm_flags
,
401 int madvise_collapse(struct vm_area_struct
*vma
,
402 struct vm_area_struct
**prev
,
403 unsigned long start
, unsigned long end
);
404 void vma_adjust_trans_huge(struct vm_area_struct
*vma
, unsigned long start
,
405 unsigned long end
, long adjust_next
);
406 spinlock_t
*__pmd_trans_huge_lock(pmd_t
*pmd
, struct vm_area_struct
*vma
);
407 spinlock_t
*__pud_trans_huge_lock(pud_t
*pud
, struct vm_area_struct
*vma
);
409 static inline int is_swap_pmd(pmd_t pmd
)
411 return !pmd_none(pmd
) && !pmd_present(pmd
);
414 /* mmap_lock must be held on entry */
415 static inline spinlock_t
*pmd_trans_huge_lock(pmd_t
*pmd
,
416 struct vm_area_struct
*vma
)
418 if (is_swap_pmd(*pmd
) || pmd_trans_huge(*pmd
) || pmd_devmap(*pmd
))
419 return __pmd_trans_huge_lock(pmd
, vma
);
423 static inline spinlock_t
*pud_trans_huge_lock(pud_t
*pud
,
424 struct vm_area_struct
*vma
)
426 if (pud_trans_huge(*pud
) || pud_devmap(*pud
))
427 return __pud_trans_huge_lock(pud
, vma
);
433 * folio_test_pmd_mappable - Can we map this folio with a PMD?
434 * @folio: The folio to test
436 static inline bool folio_test_pmd_mappable(struct folio
*folio
)
438 return folio_order(folio
) >= HPAGE_PMD_ORDER
;
441 struct page
*follow_devmap_pmd(struct vm_area_struct
*vma
, unsigned long addr
,
442 pmd_t
*pmd
, int flags
, struct dev_pagemap
**pgmap
);
444 vm_fault_t
do_huge_pmd_numa_page(struct vm_fault
*vmf
);
446 extern struct folio
*huge_zero_folio
;
447 extern unsigned long huge_zero_pfn
;
449 static inline bool is_huge_zero_folio(const struct folio
*folio
)
451 return READ_ONCE(huge_zero_folio
) == folio
;
454 static inline bool is_huge_zero_pmd(pmd_t pmd
)
456 return pmd_present(pmd
) && READ_ONCE(huge_zero_pfn
) == pmd_pfn(pmd
);
459 struct folio
*mm_get_huge_zero_folio(struct mm_struct
*mm
);
460 void mm_put_huge_zero_folio(struct mm_struct
*mm
);
462 #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
464 static inline bool thp_migration_supported(void)
466 return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION
);
469 void split_huge_pmd_locked(struct vm_area_struct
*vma
, unsigned long address
,
470 pmd_t
*pmd
, bool freeze
, struct folio
*folio
);
471 bool unmap_huge_pmd_locked(struct vm_area_struct
*vma
, unsigned long addr
,
472 pmd_t
*pmdp
, struct folio
*folio
);
474 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
476 static inline bool folio_test_pmd_mappable(struct folio
*folio
)
481 static inline bool thp_vma_suitable_order(struct vm_area_struct
*vma
,
482 unsigned long addr
, int order
)
487 static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct
*vma
,
488 unsigned long addr
, unsigned long orders
)
493 static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct
*vma
,
494 unsigned long vm_flags
,
495 unsigned long tva_flags
,
496 unsigned long orders
)
501 #define transparent_hugepage_flags 0UL
503 #define thp_get_unmapped_area NULL
505 static inline unsigned long
506 thp_get_unmapped_area_vmflags(struct file
*filp
, unsigned long addr
,
507 unsigned long len
, unsigned long pgoff
,
508 unsigned long flags
, vm_flags_t vm_flags
)
514 can_split_folio(struct folio
*folio
, int caller_pins
, int *pextra_pins
)
519 split_huge_page_to_list_to_order(struct page
*page
, struct list_head
*list
,
520 unsigned int new_order
)
524 static inline int split_huge_page(struct page
*page
)
529 static inline int split_folio_to_list(struct folio
*folio
, struct list_head
*list
)
534 static inline void deferred_split_folio(struct folio
*folio
, bool partially_mapped
) {}
535 #define split_huge_pmd(__vma, __pmd, __address) \
538 static inline void __split_huge_pmd(struct vm_area_struct
*vma
, pmd_t
*pmd
,
539 unsigned long address
, bool freeze
, struct folio
*folio
) {}
540 static inline void split_huge_pmd_address(struct vm_area_struct
*vma
,
541 unsigned long address
, bool freeze
, struct folio
*folio
) {}
542 static inline void split_huge_pmd_locked(struct vm_area_struct
*vma
,
543 unsigned long address
, pmd_t
*pmd
,
544 bool freeze
, struct folio
*folio
) {}
546 static inline bool unmap_huge_pmd_locked(struct vm_area_struct
*vma
,
547 unsigned long addr
, pmd_t
*pmdp
,
553 #define split_huge_pud(__vma, __pmd, __address) \
556 static inline int hugepage_madvise(struct vm_area_struct
*vma
,
557 unsigned long *vm_flags
, int advice
)
562 static inline int madvise_collapse(struct vm_area_struct
*vma
,
563 struct vm_area_struct
**prev
,
564 unsigned long start
, unsigned long end
)
569 static inline void vma_adjust_trans_huge(struct vm_area_struct
*vma
,
575 static inline int is_swap_pmd(pmd_t pmd
)
579 static inline spinlock_t
*pmd_trans_huge_lock(pmd_t
*pmd
,
580 struct vm_area_struct
*vma
)
584 static inline spinlock_t
*pud_trans_huge_lock(pud_t
*pud
,
585 struct vm_area_struct
*vma
)
590 static inline vm_fault_t
do_huge_pmd_numa_page(struct vm_fault
*vmf
)
595 static inline bool is_huge_zero_folio(const struct folio
*folio
)
600 static inline bool is_huge_zero_pmd(pmd_t pmd
)
605 static inline void mm_put_huge_zero_folio(struct mm_struct
*mm
)
610 static inline struct page
*follow_devmap_pmd(struct vm_area_struct
*vma
,
611 unsigned long addr
, pmd_t
*pmd
, int flags
, struct dev_pagemap
**pgmap
)
616 static inline bool thp_migration_supported(void)
621 static inline int highest_order(unsigned long orders
)
626 static inline int next_order(unsigned long *orders
, int prev
)
631 static inline void __split_huge_pud(struct vm_area_struct
*vma
, pud_t
*pud
,
632 unsigned long address
)
636 static inline int change_huge_pud(struct mmu_gather
*tlb
,
637 struct vm_area_struct
*vma
, pud_t
*pudp
,
638 unsigned long addr
, pgprot_t newprot
,
639 unsigned long cp_flags
)
643 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
645 static inline int split_folio_to_list_to_order(struct folio
*folio
,
646 struct list_head
*list
, int new_order
)
648 return split_huge_page_to_list_to_order(&folio
->page
, list
, new_order
);
651 static inline int split_folio_to_order(struct folio
*folio
, int new_order
)
653 return split_folio_to_list_to_order(folio
, NULL
, new_order
);
656 #endif /* _LINUX_HUGE_MM_H */