2 * linux/mm/page_isolation.c
6 #include <linux/page-isolation.h>
7 #include <linux/pageblock-flags.h>
8 #include <linux/memory.h>
9 #include <linux/hugetlb.h>
12 #define CREATE_TRACE_POINTS
13 #include <trace/events/page_isolation.h>
15 static int set_migratetype_isolate(struct page
*page
,
16 bool skip_hwpoisoned_pages
)
19 unsigned long flags
, pfn
;
20 struct memory_isolate_notify arg
;
24 zone
= page_zone(page
);
26 spin_lock_irqsave(&zone
->lock
, flags
);
28 pfn
= page_to_pfn(page
);
30 arg
.nr_pages
= pageblock_nr_pages
;
34 * It may be possible to isolate a pageblock even if the
35 * migratetype is not MIGRATE_MOVABLE. The memory isolation
36 * notifier chain is used by balloon drivers to return the
37 * number of pages in a range that are held by the balloon
38 * driver to shrink memory. If all the pages are accounted for
39 * by balloons, are free, or on the LRU, isolation can continue.
40 * Later, for example, when memory hotplug notifier runs, these
41 * pages reported as "can be isolated" should be isolated(freed)
42 * by the balloon driver through the memory notifier chain.
44 notifier_ret
= memory_isolate_notify(MEM_ISOLATE_COUNT
, &arg
);
45 notifier_ret
= notifier_to_errno(notifier_ret
);
49 * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
50 * We just check MOVABLE pages.
52 if (!has_unmovable_pages(zone
, page
, arg
.pages_found
,
53 skip_hwpoisoned_pages
))
57 * immobile means "not-on-lru" paes. If immobile is larger than
58 * removable-by-driver pages reported by notifier, we'll fail.
63 unsigned long nr_pages
;
64 int migratetype
= get_pageblock_migratetype(page
);
66 set_pageblock_migratetype(page
, MIGRATE_ISOLATE
);
67 zone
->nr_isolate_pageblock
++;
68 nr_pages
= move_freepages_block(zone
, page
, MIGRATE_ISOLATE
);
70 __mod_zone_freepage_state(zone
, -nr_pages
, migratetype
);
73 spin_unlock_irqrestore(&zone
->lock
, flags
);
75 drain_all_pages(zone
);
79 static void unset_migratetype_isolate(struct page
*page
, unsigned migratetype
)
82 unsigned long flags
, nr_pages
;
83 struct page
*isolated_page
= NULL
;
85 unsigned long page_idx
, buddy_idx
;
88 zone
= page_zone(page
);
89 spin_lock_irqsave(&zone
->lock
, flags
);
90 if (get_pageblock_migratetype(page
) != MIGRATE_ISOLATE
)
94 * Because freepage with more than pageblock_order on isolated
95 * pageblock is restricted to merge due to freepage counting problem,
96 * it is possible that there is free buddy page.
97 * move_freepages_block() doesn't care of merge so we need other
98 * approach in order to merge them. Isolation and free will make
99 * these pages to be merged.
101 if (PageBuddy(page
)) {
102 order
= page_order(page
);
103 if (order
>= pageblock_order
) {
104 page_idx
= page_to_pfn(page
) & ((1 << MAX_ORDER
) - 1);
105 buddy_idx
= __find_buddy_index(page_idx
, order
);
106 buddy
= page
+ (buddy_idx
- page_idx
);
108 if (pfn_valid_within(page_to_pfn(buddy
)) &&
109 !is_migrate_isolate_page(buddy
)) {
110 __isolate_free_page(page
, order
);
111 kernel_map_pages(page
, (1 << order
), 1);
112 set_page_refcounted(page
);
113 isolated_page
= page
;
119 * If we isolate freepage with more than pageblock_order, there
120 * should be no freepage in the range, so we could avoid costly
121 * pageblock scanning for freepage moving.
123 if (!isolated_page
) {
124 nr_pages
= move_freepages_block(zone
, page
, migratetype
);
125 __mod_zone_freepage_state(zone
, nr_pages
, migratetype
);
127 set_pageblock_migratetype(page
, migratetype
);
128 zone
->nr_isolate_pageblock
--;
130 spin_unlock_irqrestore(&zone
->lock
, flags
);
132 __free_pages(isolated_page
, order
);
135 static inline struct page
*
136 __first_valid_page(unsigned long pfn
, unsigned long nr_pages
)
139 for (i
= 0; i
< nr_pages
; i
++)
140 if (pfn_valid_within(pfn
+ i
))
142 if (unlikely(i
== nr_pages
))
144 return pfn_to_page(pfn
+ i
);
148 * start_isolate_page_range() -- make page-allocation-type of range of pages
149 * to be MIGRATE_ISOLATE.
150 * @start_pfn: The lower PFN of the range to be isolated.
151 * @end_pfn: The upper PFN of the range to be isolated.
152 * @migratetype: migrate type to set in error recovery.
154 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
155 * the range will never be allocated. Any free pages and pages freed in the
156 * future will not be allocated again.
158 * start_pfn/end_pfn must be aligned to pageblock_order.
159 * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
161 int start_isolate_page_range(unsigned long start_pfn
, unsigned long end_pfn
,
162 unsigned migratetype
, bool skip_hwpoisoned_pages
)
165 unsigned long undo_pfn
;
168 BUG_ON(!IS_ALIGNED(start_pfn
, pageblock_nr_pages
));
169 BUG_ON(!IS_ALIGNED(end_pfn
, pageblock_nr_pages
));
171 for (pfn
= start_pfn
;
173 pfn
+= pageblock_nr_pages
) {
174 page
= __first_valid_page(pfn
, pageblock_nr_pages
);
176 set_migratetype_isolate(page
, skip_hwpoisoned_pages
)) {
183 for (pfn
= start_pfn
;
185 pfn
+= pageblock_nr_pages
)
186 unset_migratetype_isolate(pfn_to_page(pfn
), migratetype
);
192 * Make isolated pages available again.
194 int undo_isolate_page_range(unsigned long start_pfn
, unsigned long end_pfn
,
195 unsigned migratetype
)
199 BUG_ON((start_pfn
) & (pageblock_nr_pages
- 1));
200 BUG_ON((end_pfn
) & (pageblock_nr_pages
- 1));
201 for (pfn
= start_pfn
;
203 pfn
+= pageblock_nr_pages
) {
204 page
= __first_valid_page(pfn
, pageblock_nr_pages
);
205 if (!page
|| get_pageblock_migratetype(page
) != MIGRATE_ISOLATE
)
207 unset_migratetype_isolate(page
, migratetype
);
212 * Test all pages in the range is free(means isolated) or not.
213 * all pages in [start_pfn...end_pfn) must be in the same zone.
214 * zone->lock must be held before call this.
216 * Returns 1 if all pages in the range are isolated.
219 __test_page_isolated_in_pageblock(unsigned long pfn
, unsigned long end_pfn
,
220 bool skip_hwpoisoned_pages
)
224 while (pfn
< end_pfn
) {
225 if (!pfn_valid_within(pfn
)) {
229 page
= pfn_to_page(pfn
);
232 * If the page is on a free list, it has to be on
233 * the correct MIGRATE_ISOLATE freelist. There is no
234 * simple way to verify that as VM_BUG_ON(), though.
236 pfn
+= 1 << page_order(page
);
237 else if (skip_hwpoisoned_pages
&& PageHWPoison(page
))
238 /* A HWPoisoned page cannot be also PageBuddy */
247 int test_pages_isolated(unsigned long start_pfn
, unsigned long end_pfn
,
248 bool skip_hwpoisoned_pages
)
250 unsigned long pfn
, flags
;
255 * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages
256 * are not aligned to pageblock_nr_pages.
257 * Then we just check migratetype first.
259 for (pfn
= start_pfn
; pfn
< end_pfn
; pfn
+= pageblock_nr_pages
) {
260 page
= __first_valid_page(pfn
, pageblock_nr_pages
);
261 if (page
&& get_pageblock_migratetype(page
) != MIGRATE_ISOLATE
)
264 page
= __first_valid_page(start_pfn
, end_pfn
- start_pfn
);
265 if ((pfn
< end_pfn
) || !page
)
267 /* Check all pages are free or marked as ISOLATED */
268 zone
= page_zone(page
);
269 spin_lock_irqsave(&zone
->lock
, flags
);
270 pfn
= __test_page_isolated_in_pageblock(start_pfn
, end_pfn
,
271 skip_hwpoisoned_pages
);
272 spin_unlock_irqrestore(&zone
->lock
, flags
);
274 trace_test_pages_isolated(start_pfn
, end_pfn
, pfn
);
276 return pfn
< end_pfn
? -EBUSY
: 0;
279 struct page
*alloc_migrate_target(struct page
*page
, unsigned long private,
282 gfp_t gfp_mask
= GFP_USER
| __GFP_MOVABLE
;
285 * TODO: allocate a destination hugepage from a nearest neighbor node,
286 * accordance with memory policy of the user process if possible. For
287 * now as a simple work-around, we use the next node for destination.
289 if (PageHuge(page
)) {
290 nodemask_t src
= nodemask_of_node(page_to_nid(page
));
292 nodes_complement(dst
, src
);
293 return alloc_huge_page_node(page_hstate(compound_head(page
)),
294 next_node(page_to_nid(page
), dst
));
297 if (PageHighMem(page
))
298 gfp_mask
|= __GFP_HIGHMEM
;
300 return alloc_page(gfp_mask
);