1 /* SPDX-License-Identifier: GPL-2.0 */
5 #include <linux/gfp_types.h>
7 #include <linux/mmzone.h>
8 #include <linux/topology.h>
9 #include <linux/alloc_tag.h>
10 #include <linux/sched.h>
12 struct vm_area_struct
;
15 /* Convert GFP flags to their corresponding migrate type */
16 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
17 #define GFP_MOVABLE_SHIFT 3
19 static inline int gfp_migratetype(const gfp_t gfp_flags
)
21 VM_WARN_ON((gfp_flags
& GFP_MOVABLE_MASK
) == GFP_MOVABLE_MASK
);
22 BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT
) != ___GFP_MOVABLE
);
23 BUILD_BUG_ON((___GFP_MOVABLE
>> GFP_MOVABLE_SHIFT
) != MIGRATE_MOVABLE
);
24 BUILD_BUG_ON((___GFP_RECLAIMABLE
>> GFP_MOVABLE_SHIFT
) != MIGRATE_RECLAIMABLE
);
25 BUILD_BUG_ON(((___GFP_MOVABLE
| ___GFP_RECLAIMABLE
) >>
26 GFP_MOVABLE_SHIFT
) != MIGRATE_HIGHATOMIC
);
28 if (unlikely(page_group_by_mobility_disabled
))
29 return MIGRATE_UNMOVABLE
;
31 /* Group based on mobility */
32 return (__force
unsigned long)(gfp_flags
& GFP_MOVABLE_MASK
) >> GFP_MOVABLE_SHIFT
;
34 #undef GFP_MOVABLE_MASK
35 #undef GFP_MOVABLE_SHIFT
37 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags
)
39 return !!(gfp_flags
& __GFP_DIRECT_RECLAIM
);
43 #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
45 #define OPT_ZONE_HIGHMEM ZONE_NORMAL
48 #ifdef CONFIG_ZONE_DMA
49 #define OPT_ZONE_DMA ZONE_DMA
51 #define OPT_ZONE_DMA ZONE_NORMAL
54 #ifdef CONFIG_ZONE_DMA32
55 #define OPT_ZONE_DMA32 ZONE_DMA32
57 #define OPT_ZONE_DMA32 ZONE_NORMAL
61 * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
62 * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT
63 * bits long and there are 16 of them to cover all possible combinations of
64 * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
66 * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
67 * But GFP_MOVABLE is not only a zone specifier but also an allocation
68 * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
69 * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
74 * 0x1 => DMA or NORMAL
75 * 0x2 => HIGHMEM or NORMAL
76 * 0x3 => BAD (DMA+HIGHMEM)
77 * 0x4 => DMA32 or NORMAL
78 * 0x5 => BAD (DMA+DMA32)
79 * 0x6 => BAD (HIGHMEM+DMA32)
80 * 0x7 => BAD (HIGHMEM+DMA32+DMA)
81 * 0x8 => NORMAL (MOVABLE+0)
82 * 0x9 => DMA or NORMAL (MOVABLE+DMA)
83 * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too)
84 * 0xb => BAD (MOVABLE+HIGHMEM+DMA)
85 * 0xc => DMA32 or NORMAL (MOVABLE+DMA32)
86 * 0xd => BAD (MOVABLE+DMA32+DMA)
87 * 0xe => BAD (MOVABLE+DMA32+HIGHMEM)
88 * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
90 * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms.
93 #if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4
94 /* ZONE_DEVICE is not a valid GFP zone specifier */
95 #define GFP_ZONES_SHIFT 2
97 #define GFP_ZONES_SHIFT ZONES_SHIFT
100 #if 16 * GFP_ZONES_SHIFT > BITS_PER_LONG
101 #error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
104 #define GFP_ZONE_TABLE ( \
105 (ZONE_NORMAL << 0 * GFP_ZONES_SHIFT) \
106 | (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT) \
107 | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * GFP_ZONES_SHIFT) \
108 | (OPT_ZONE_DMA32 << ___GFP_DMA32 * GFP_ZONES_SHIFT) \
109 | (ZONE_NORMAL << ___GFP_MOVABLE * GFP_ZONES_SHIFT) \
110 | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT) \
111 | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)\
112 | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)\
116 * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32
117 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
118 * entry starting with bit 0. Bit is set if the combination is not
121 #define GFP_ZONE_BAD ( \
122 1 << (___GFP_DMA | ___GFP_HIGHMEM) \
123 | 1 << (___GFP_DMA | ___GFP_DMA32) \
124 | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM) \
125 | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM) \
126 | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA) \
127 | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA) \
128 | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM) \
129 | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \
132 static inline enum zone_type
gfp_zone(gfp_t flags
)
135 int bit
= (__force
int) (flags
& GFP_ZONEMASK
);
137 z
= (GFP_ZONE_TABLE
>> (bit
* GFP_ZONES_SHIFT
)) &
138 ((1 << GFP_ZONES_SHIFT
) - 1);
139 VM_BUG_ON((GFP_ZONE_BAD
>> bit
) & 1);
144 * There is only one page-allocator function, and two main namespaces to
145 * it. The alloc_page*() variants return 'struct page *' and as such
146 * can allocate highmem pages, the *get*page*() variants return
147 * virtual kernel addresses to the allocated page(s).
150 static inline int gfp_zonelist(gfp_t flags
)
153 if (unlikely(flags
& __GFP_THISNODE
))
154 return ZONELIST_NOFALLBACK
;
156 return ZONELIST_FALLBACK
;
160 * gfp flag masking for nested internal allocations.
162 * For code that needs to do allocations inside the public allocation API (e.g.
163 * memory allocation tracking code) the allocations need to obey the caller
164 * allocation context constrains to prevent allocation context mismatches (e.g.
165 * GFP_KERNEL allocations in GFP_NOFS contexts) from potential deadlock
168 * It is also assumed that these nested allocations are for internal kernel
169 * object storage purposes only and are not going to be used for DMA, etc. Hence
170 * we strip out all the zone information and leave just the context information
173 * Further, internal allocations must fail before the higher level allocation
174 * can fail, so we must make them fail faster and fail silently. We also don't
175 * want them to deplete emergency reserves. Hence nested allocations must be
176 * prepared for these allocations to fail.
178 static inline gfp_t
gfp_nested_mask(gfp_t flags
)
180 return ((flags
& (GFP_KERNEL
| GFP_ATOMIC
| __GFP_NOLOCKDEP
)) |
181 (__GFP_NORETRY
| __GFP_NOMEMALLOC
| __GFP_NOWARN
));
185 * We get the zone list from the current node and the gfp_mask.
186 * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones.
187 * There are two zonelists per node, one for all zones with memory and
188 * one containing just zones from the node the zonelist belongs to.
190 * For the case of non-NUMA systems the NODE_DATA() gets optimized to
191 * &contig_page_data at compile-time.
193 static inline struct zonelist
*node_zonelist(int nid
, gfp_t flags
)
195 return NODE_DATA(nid
)->node_zonelists
+ gfp_zonelist(flags
);
198 #ifndef HAVE_ARCH_FREE_PAGE
199 static inline void arch_free_page(struct page
*page
, int order
) { }
201 #ifndef HAVE_ARCH_ALLOC_PAGE
202 static inline void arch_alloc_page(struct page
*page
, int order
) { }
205 struct page
*__alloc_pages_noprof(gfp_t gfp
, unsigned int order
, int preferred_nid
,
206 nodemask_t
*nodemask
);
207 #define __alloc_pages(...) alloc_hooks(__alloc_pages_noprof(__VA_ARGS__))
209 struct folio
*__folio_alloc_noprof(gfp_t gfp
, unsigned int order
, int preferred_nid
,
210 nodemask_t
*nodemask
);
211 #define __folio_alloc(...) alloc_hooks(__folio_alloc_noprof(__VA_ARGS__))
213 unsigned long alloc_pages_bulk_noprof(gfp_t gfp
, int preferred_nid
,
214 nodemask_t
*nodemask
, int nr_pages
,
215 struct list_head
*page_list
,
216 struct page
**page_array
);
217 #define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__))
219 unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp
,
220 unsigned long nr_pages
,
221 struct page
**page_array
);
222 #define alloc_pages_bulk_array_mempolicy(...) \
223 alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__))
225 /* Bulk allocate order-0 pages */
226 #define alloc_pages_bulk_list(_gfp, _nr_pages, _list) \
227 __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL)
229 #define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array) \
230 __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array)
232 static inline unsigned long
233 alloc_pages_bulk_array_node_noprof(gfp_t gfp
, int nid
, unsigned long nr_pages
,
234 struct page
**page_array
)
236 if (nid
== NUMA_NO_NODE
)
239 return alloc_pages_bulk_noprof(gfp
, nid
, NULL
, nr_pages
, NULL
, page_array
);
242 #define alloc_pages_bulk_array_node(...) \
243 alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__))
245 static inline void warn_if_node_offline(int this_node
, gfp_t gfp_mask
)
247 gfp_t warn_gfp
= gfp_mask
& (__GFP_THISNODE
|__GFP_NOWARN
);
249 if (warn_gfp
!= (__GFP_THISNODE
|__GFP_NOWARN
))
252 if (node_online(this_node
))
255 pr_warn("%pGg allocation from offline node %d\n", &gfp_mask
, this_node
);
260 * Allocate pages, preferring the node given as nid. The node must be valid and
261 * online. For more general interface, see alloc_pages_node().
263 static inline struct page
*
264 __alloc_pages_node_noprof(int nid
, gfp_t gfp_mask
, unsigned int order
)
266 VM_BUG_ON(nid
< 0 || nid
>= MAX_NUMNODES
);
267 warn_if_node_offline(nid
, gfp_mask
);
269 return __alloc_pages_noprof(gfp_mask
, order
, nid
, NULL
);
272 #define __alloc_pages_node(...) alloc_hooks(__alloc_pages_node_noprof(__VA_ARGS__))
275 struct folio
*__folio_alloc_node_noprof(gfp_t gfp
, unsigned int order
, int nid
)
277 VM_BUG_ON(nid
< 0 || nid
>= MAX_NUMNODES
);
278 warn_if_node_offline(nid
, gfp
);
280 return __folio_alloc_noprof(gfp
, order
, nid
, NULL
);
283 #define __folio_alloc_node(...) alloc_hooks(__folio_alloc_node_noprof(__VA_ARGS__))
286 * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
287 * prefer the current CPU's closest node. Otherwise node must be valid and
290 static inline struct page
*alloc_pages_node_noprof(int nid
, gfp_t gfp_mask
,
293 if (nid
== NUMA_NO_NODE
)
296 return __alloc_pages_node_noprof(nid
, gfp_mask
, order
);
299 #define alloc_pages_node(...) alloc_hooks(alloc_pages_node_noprof(__VA_ARGS__))
302 struct page
*alloc_pages_noprof(gfp_t gfp
, unsigned int order
);
303 struct page
*alloc_pages_mpol_noprof(gfp_t gfp
, unsigned int order
,
304 struct mempolicy
*mpol
, pgoff_t ilx
, int nid
);
305 struct folio
*folio_alloc_noprof(gfp_t gfp
, unsigned int order
);
306 struct folio
*folio_alloc_mpol_noprof(gfp_t gfp
, unsigned int order
,
307 struct mempolicy
*mpol
, pgoff_t ilx
, int nid
);
308 struct folio
*vma_alloc_folio_noprof(gfp_t gfp
, int order
, struct vm_area_struct
*vma
,
311 static inline struct page
*alloc_pages_noprof(gfp_t gfp_mask
, unsigned int order
)
313 return alloc_pages_node_noprof(numa_node_id(), gfp_mask
, order
);
315 static inline struct page
*alloc_pages_mpol_noprof(gfp_t gfp
, unsigned int order
,
316 struct mempolicy
*mpol
, pgoff_t ilx
, int nid
)
318 return alloc_pages_noprof(gfp
, order
);
320 static inline struct folio
*folio_alloc_noprof(gfp_t gfp
, unsigned int order
)
322 return __folio_alloc_node_noprof(gfp
, order
, numa_node_id());
324 static inline struct folio
*folio_alloc_mpol_noprof(gfp_t gfp
, unsigned int order
,
325 struct mempolicy
*mpol
, pgoff_t ilx
, int nid
)
327 return folio_alloc_noprof(gfp
, order
);
329 #define vma_alloc_folio_noprof(gfp, order, vma, addr) \
330 folio_alloc_noprof(gfp, order)
333 #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
334 #define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__))
335 #define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
336 #define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__))
337 #define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
339 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
341 static inline struct page
*alloc_page_vma_noprof(gfp_t gfp
,
342 struct vm_area_struct
*vma
, unsigned long addr
)
344 struct folio
*folio
= vma_alloc_folio_noprof(gfp
, 0, vma
, addr
);
348 #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
350 extern unsigned long get_free_pages_noprof(gfp_t gfp_mask
, unsigned int order
);
351 #define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__))
353 extern unsigned long get_zeroed_page_noprof(gfp_t gfp_mask
);
354 #define get_zeroed_page(...) alloc_hooks(get_zeroed_page_noprof(__VA_ARGS__))
356 void *alloc_pages_exact_noprof(size_t size
, gfp_t gfp_mask
) __alloc_size(1);
357 #define alloc_pages_exact(...) alloc_hooks(alloc_pages_exact_noprof(__VA_ARGS__))
359 void free_pages_exact(void *virt
, size_t size
);
361 __meminit
void *alloc_pages_exact_nid_noprof(int nid
, size_t size
, gfp_t gfp_mask
) __alloc_size(2);
362 #define alloc_pages_exact_nid(...) \
363 alloc_hooks(alloc_pages_exact_nid_noprof(__VA_ARGS__))
365 #define __get_free_page(gfp_mask) \
366 __get_free_pages((gfp_mask), 0)
368 #define __get_dma_pages(gfp_mask, order) \
369 __get_free_pages((gfp_mask) | GFP_DMA, (order))
371 extern void __free_pages(struct page
*page
, unsigned int order
);
372 extern void free_pages(unsigned long addr
, unsigned int order
);
374 #define __free_page(page) __free_pages((page), 0)
375 #define free_page(addr) free_pages((addr), 0)
377 void page_alloc_init_cpuhp(void);
378 int decay_pcp_high(struct zone
*zone
, struct per_cpu_pages
*pcp
);
379 void drain_zone_pages(struct zone
*zone
, struct per_cpu_pages
*pcp
);
380 void drain_all_pages(struct zone
*zone
);
381 void drain_local_pages(struct zone
*zone
);
383 void page_alloc_init_late(void);
384 void setup_pcp_cacheinfo(unsigned int cpu
);
387 * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
388 * GFP flags are used before interrupts are enabled. Once interrupts are
389 * enabled, it is set to __GFP_BITS_MASK while the system is running. During
390 * hibernation, it is used by PM to avoid I/O during memory allocation while
391 * devices are suspended.
393 extern gfp_t gfp_allowed_mask
;
395 /* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
396 bool gfp_pfmemalloc_allowed(gfp_t gfp_mask
);
398 static inline bool gfp_has_io_fs(gfp_t gfp
)
400 return (gfp
& (__GFP_IO
| __GFP_FS
)) == (__GFP_IO
| __GFP_FS
);
404 * Check if the gfp flags allow compaction - GFP_NOIO is a really
405 * tricky context because the migration might require IO.
407 static inline bool gfp_compaction_allowed(gfp_t gfp_mask
)
409 return IS_ENABLED(CONFIG_COMPACTION
) && (gfp_mask
& __GFP_IO
);
412 extern gfp_t
vma_thp_gfp_mask(struct vm_area_struct
*vma
);
414 #ifdef CONFIG_CONTIG_ALLOC
415 /* The below functions must be run on a range from a single zone. */
416 extern int alloc_contig_range_noprof(unsigned long start
, unsigned long end
,
417 unsigned migratetype
, gfp_t gfp_mask
);
418 #define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
420 extern struct page
*alloc_contig_pages_noprof(unsigned long nr_pages
, gfp_t gfp_mask
,
421 int nid
, nodemask_t
*nodemask
);
422 #define alloc_contig_pages(...) alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
425 void free_contig_range(unsigned long pfn
, unsigned long nr_pages
);
427 #ifdef CONFIG_CONTIG_ALLOC
428 static inline struct folio
*folio_alloc_gigantic_noprof(int order
, gfp_t gfp
,
429 int nid
, nodemask_t
*node
)
433 if (WARN_ON(!order
|| !(gfp
& __GFP_COMP
)))
436 page
= alloc_contig_pages_noprof(1 << order
, gfp
, nid
, node
);
438 return page
? page_folio(page
) : NULL
;
441 static inline struct folio
*folio_alloc_gigantic_noprof(int order
, gfp_t gfp
,
442 int nid
, nodemask_t
*node
)
447 /* This should be paired with folio_put() rather than free_contig_range(). */
448 #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
450 #endif /* __LINUX_GFP_H */