drm/ast: Only warn about unsupported TX chips on Gen4 and later
[drm/drm-misc.git] / mm / numa_memblks.c
bloba3877e9bc878ad6257b658facbfab00b9f33f6ef
1 // SPDX-License-Identifier: GPL-2.0-or-later
3 #include <linux/array_size.h>
4 #include <linux/sort.h>
5 #include <linux/printk.h>
6 #include <linux/memblock.h>
7 #include <linux/numa.h>
8 #include <linux/numa_memblks.h>
10 static int numa_distance_cnt;
11 static u8 *numa_distance;
13 nodemask_t numa_nodes_parsed __initdata;
15 static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
16 static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
19 * Set nodes, which have memory in @mi, in *@nodemask.
21 static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
22 const struct numa_meminfo *mi)
24 int i;
26 for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
27 if (mi->blk[i].start != mi->blk[i].end &&
28 mi->blk[i].nid != NUMA_NO_NODE)
29 node_set(mi->blk[i].nid, *nodemask);
32 /**
33 * numa_reset_distance - Reset NUMA distance table
35 * The current table is freed. The next numa_set_distance() call will
36 * create a new one.
38 void __init numa_reset_distance(void)
40 size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
42 /* numa_distance could be 1LU marking allocation failure, test cnt */
43 if (numa_distance_cnt)
44 memblock_free(numa_distance, size);
45 numa_distance_cnt = 0;
46 numa_distance = NULL; /* enable table creation */
49 static int __init numa_alloc_distance(void)
51 nodemask_t nodes_parsed;
52 size_t size;
53 int i, j, cnt = 0;
55 /* size the new table and allocate it */
56 nodes_parsed = numa_nodes_parsed;
57 numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
59 for_each_node_mask(i, nodes_parsed)
60 cnt = i;
61 cnt++;
62 size = cnt * cnt * sizeof(numa_distance[0]);
64 numa_distance = memblock_alloc(size, PAGE_SIZE);
65 if (!numa_distance) {
66 pr_warn("Warning: can't allocate distance table!\n");
67 /* don't retry until explicitly reset */
68 numa_distance = (void *)1LU;
69 return -ENOMEM;
72 numa_distance_cnt = cnt;
74 /* fill with the default distances */
75 for (i = 0; i < cnt; i++)
76 for (j = 0; j < cnt; j++)
77 numa_distance[i * cnt + j] = i == j ?
78 LOCAL_DISTANCE : REMOTE_DISTANCE;
79 printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
81 return 0;
84 /**
85 * numa_set_distance - Set NUMA distance from one NUMA to another
86 * @from: the 'from' node to set distance
87 * @to: the 'to' node to set distance
88 * @distance: NUMA distance
90 * Set the distance from node @from to @to to @distance. If distance table
91 * doesn't exist, one which is large enough to accommodate all the currently
92 * known nodes will be created.
94 * If such table cannot be allocated, a warning is printed and further
95 * calls are ignored until the distance table is reset with
96 * numa_reset_distance().
98 * If @from or @to is higher than the highest known node or lower than zero
99 * at the time of table creation or @distance doesn't make sense, the call
100 * is ignored.
101 * This is to allow simplification of specific NUMA config implementations.
103 void __init numa_set_distance(int from, int to, int distance)
105 if (!numa_distance && numa_alloc_distance() < 0)
106 return;
108 if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
109 from < 0 || to < 0) {
110 pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
111 from, to, distance);
112 return;
115 if ((u8)distance != distance ||
116 (from == to && distance != LOCAL_DISTANCE)) {
117 pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
118 from, to, distance);
119 return;
122 numa_distance[from * numa_distance_cnt + to] = distance;
125 int __node_distance(int from, int to)
127 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
128 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
129 return numa_distance[from * numa_distance_cnt + to];
131 EXPORT_SYMBOL(__node_distance);
133 static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
134 struct numa_meminfo *mi)
136 /* ignore zero length blks */
137 if (start == end)
138 return 0;
140 /* whine about and ignore invalid blks */
141 if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
142 pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
143 nid, start, end - 1);
144 return 0;
147 if (mi->nr_blks >= NR_NODE_MEMBLKS) {
148 pr_err("too many memblk ranges\n");
149 return -EINVAL;
152 mi->blk[mi->nr_blks].start = start;
153 mi->blk[mi->nr_blks].end = end;
154 mi->blk[mi->nr_blks].nid = nid;
155 mi->nr_blks++;
156 return 0;
160 * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
161 * @idx: Index of memblk to remove
162 * @mi: numa_meminfo to remove memblk from
164 * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
165 * decrementing @mi->nr_blks.
167 void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
169 mi->nr_blks--;
170 memmove(&mi->blk[idx], &mi->blk[idx + 1],
171 (mi->nr_blks - idx) * sizeof(mi->blk[0]));
175 * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
176 * @dst: numa_meminfo to append block to
177 * @idx: Index of memblk to remove
178 * @src: numa_meminfo to remove memblk from
180 static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
181 struct numa_meminfo *src)
183 dst->blk[dst->nr_blks++] = src->blk[idx];
184 numa_remove_memblk_from(idx, src);
188 * numa_add_memblk - Add one numa_memblk to numa_meminfo
189 * @nid: NUMA node ID of the new memblk
190 * @start: Start address of the new memblk
191 * @end: End address of the new memblk
193 * Add a new memblk to the default numa_meminfo.
195 * RETURNS:
196 * 0 on success, -errno on failure.
198 int __init numa_add_memblk(int nid, u64 start, u64 end)
200 return numa_add_memblk_to(nid, start, end, &numa_meminfo);
204 * numa_cleanup_meminfo - Cleanup a numa_meminfo
205 * @mi: numa_meminfo to clean up
207 * Sanitize @mi by merging and removing unnecessary memblks. Also check for
208 * conflicts and clear unused memblks.
210 * RETURNS:
211 * 0 on success, -errno on failure.
213 int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
215 const u64 low = memblock_start_of_DRAM();
216 const u64 high = memblock_end_of_DRAM();
217 int i, j, k;
219 /* first, trim all entries */
220 for (i = 0; i < mi->nr_blks; i++) {
221 struct numa_memblk *bi = &mi->blk[i];
223 /* move / save reserved memory ranges */
224 if (!memblock_overlaps_region(&memblock.memory,
225 bi->start, bi->end - bi->start)) {
226 numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
227 continue;
230 /* make sure all non-reserved blocks are inside the limits */
231 bi->start = max(bi->start, low);
233 /* preserve info for non-RAM areas above 'max_pfn': */
234 if (bi->end > high) {
235 numa_add_memblk_to(bi->nid, high, bi->end,
236 &numa_reserved_meminfo);
237 bi->end = high;
240 /* and there's no empty block */
241 if (bi->start >= bi->end)
242 numa_remove_memblk_from(i--, mi);
245 /* merge neighboring / overlapping entries */
246 for (i = 0; i < mi->nr_blks; i++) {
247 struct numa_memblk *bi = &mi->blk[i];
249 for (j = i + 1; j < mi->nr_blks; j++) {
250 struct numa_memblk *bj = &mi->blk[j];
251 u64 start, end;
254 * See whether there are overlapping blocks. Whine
255 * about but allow overlaps of the same nid. They
256 * will be merged below.
258 if (bi->end > bj->start && bi->start < bj->end) {
259 if (bi->nid != bj->nid) {
260 pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
261 bi->nid, bi->start, bi->end - 1,
262 bj->nid, bj->start, bj->end - 1);
263 return -EINVAL;
265 pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
266 bi->nid, bi->start, bi->end - 1,
267 bj->start, bj->end - 1);
271 * Join together blocks on the same node, holes
272 * between which don't overlap with memory on other
273 * nodes.
275 if (bi->nid != bj->nid)
276 continue;
277 start = min(bi->start, bj->start);
278 end = max(bi->end, bj->end);
279 for (k = 0; k < mi->nr_blks; k++) {
280 struct numa_memblk *bk = &mi->blk[k];
282 if (bi->nid == bk->nid)
283 continue;
284 if (start < bk->end && end > bk->start)
285 break;
287 if (k < mi->nr_blks)
288 continue;
289 pr_info("NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
290 bi->nid, bi->start, bi->end - 1, bj->start,
291 bj->end - 1, start, end - 1);
292 bi->start = start;
293 bi->end = end;
294 numa_remove_memblk_from(j--, mi);
298 /* clear unused ones */
299 for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
300 mi->blk[i].start = mi->blk[i].end = 0;
301 mi->blk[i].nid = NUMA_NO_NODE;
304 return 0;
308 * Mark all currently memblock-reserved physical memory (which covers the
309 * kernel's own memory ranges) as hot-unswappable.
311 static void __init numa_clear_kernel_node_hotplug(void)
313 nodemask_t reserved_nodemask = NODE_MASK_NONE;
314 struct memblock_region *mb_region;
315 int i;
318 * We have to do some preprocessing of memblock regions, to
319 * make them suitable for reservation.
321 * At this time, all memory regions reserved by memblock are
322 * used by the kernel, but those regions are not split up
323 * along node boundaries yet, and don't necessarily have their
324 * node ID set yet either.
326 * So iterate over all parsed memory blocks and use those ranges to
327 * set the nid in memblock.reserved. This will split up the
328 * memblock regions along node boundaries and will set the node IDs
329 * as well.
331 for (i = 0; i < numa_meminfo.nr_blks; i++) {
332 struct numa_memblk *mb = numa_meminfo.blk + i;
333 int ret;
335 ret = memblock_set_node(mb->start, mb->end - mb->start,
336 &memblock.reserved, mb->nid);
337 WARN_ON_ONCE(ret);
341 * Now go over all reserved memblock regions, to construct a
342 * node mask of all kernel reserved memory areas.
344 * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
345 * numa_meminfo might not include all memblock.reserved
346 * memory ranges, because quirks such as trim_snb_memory()
347 * reserve specific pages for Sandy Bridge graphics. ]
349 for_each_reserved_mem_region(mb_region) {
350 int nid = memblock_get_region_node(mb_region);
352 if (numa_valid_node(nid))
353 node_set(nid, reserved_nodemask);
357 * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
358 * belonging to the reserved node mask.
360 * Note that this will include memory regions that reside
361 * on nodes that contain kernel memory - entire nodes
362 * become hot-unpluggable:
364 for (i = 0; i < numa_meminfo.nr_blks; i++) {
365 struct numa_memblk *mb = numa_meminfo.blk + i;
367 if (!node_isset(mb->nid, reserved_nodemask))
368 continue;
370 memblock_clear_hotplug(mb->start, mb->end - mb->start);
374 static int __init numa_register_meminfo(struct numa_meminfo *mi)
376 int i;
378 /* Account for nodes with cpus and no memory */
379 node_possible_map = numa_nodes_parsed;
380 numa_nodemask_from_meminfo(&node_possible_map, mi);
381 if (WARN_ON(nodes_empty(node_possible_map)))
382 return -EINVAL;
384 for (i = 0; i < mi->nr_blks; i++) {
385 struct numa_memblk *mb = &mi->blk[i];
387 memblock_set_node(mb->start, mb->end - mb->start,
388 &memblock.memory, mb->nid);
392 * At very early time, the kernel have to use some memory such as
393 * loading the kernel image. We cannot prevent this anyway. So any
394 * node the kernel resides in should be un-hotpluggable.
396 * And when we come here, alloc node data won't fail.
398 numa_clear_kernel_node_hotplug();
401 * If sections array is gonna be used for pfn -> nid mapping, check
402 * whether its granularity is fine enough.
404 if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
405 unsigned long pfn_align = node_map_pfn_alignment();
407 if (pfn_align && pfn_align < PAGES_PER_SECTION) {
408 unsigned long node_align_mb = PFN_PHYS(pfn_align) >> 20;
410 unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) >> 20;
412 pr_warn("Node alignment %luMB < min %luMB, rejecting NUMA config\n",
413 node_align_mb, sect_align_mb);
414 return -EINVAL;
418 return 0;
421 int __init numa_memblks_init(int (*init_func)(void),
422 bool memblock_force_top_down)
424 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
425 int ret;
427 nodes_clear(numa_nodes_parsed);
428 nodes_clear(node_possible_map);
429 nodes_clear(node_online_map);
430 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
431 WARN_ON(memblock_set_node(0, max_addr, &memblock.memory, NUMA_NO_NODE));
432 WARN_ON(memblock_set_node(0, max_addr, &memblock.reserved,
433 NUMA_NO_NODE));
434 /* In case that parsing SRAT failed. */
435 WARN_ON(memblock_clear_hotplug(0, max_addr));
436 numa_reset_distance();
438 ret = init_func();
439 if (ret < 0)
440 return ret;
443 * We reset memblock back to the top-down direction
444 * here because if we configured ACPI_NUMA, we have
445 * parsed SRAT in init_func(). It is ok to have the
446 * reset here even if we did't configure ACPI_NUMA
447 * or acpi numa init fails and fallbacks to dummy
448 * numa init.
450 if (memblock_force_top_down)
451 memblock_set_bottom_up(false);
453 ret = numa_cleanup_meminfo(&numa_meminfo);
454 if (ret < 0)
455 return ret;
457 numa_emulation(&numa_meminfo, numa_distance_cnt);
459 return numa_register_meminfo(&numa_meminfo);
462 static int __init cmp_memblk(const void *a, const void *b)
464 const struct numa_memblk *ma = *(const struct numa_memblk **)a;
465 const struct numa_memblk *mb = *(const struct numa_memblk **)b;
467 return (ma->start > mb->start) - (ma->start < mb->start);
470 static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
473 * numa_fill_memblks - Fill gaps in numa_meminfo memblks
474 * @start: address to begin fill
475 * @end: address to end fill
477 * Find and extend numa_meminfo memblks to cover the physical
478 * address range @start-@end
480 * RETURNS:
481 * 0 : Success
482 * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
485 int __init numa_fill_memblks(u64 start, u64 end)
487 struct numa_memblk **blk = &numa_memblk_list[0];
488 struct numa_meminfo *mi = &numa_meminfo;
489 int count = 0;
490 u64 prev_end;
493 * Create a list of pointers to numa_meminfo memblks that
494 * overlap start, end. The list is used to make in-place
495 * changes that fill out the numa_meminfo memblks.
497 for (int i = 0; i < mi->nr_blks; i++) {
498 struct numa_memblk *bi = &mi->blk[i];
500 if (memblock_addrs_overlap(start, end - start, bi->start,
501 bi->end - bi->start)) {
502 blk[count] = &mi->blk[i];
503 count++;
506 if (!count)
507 return NUMA_NO_MEMBLK;
509 /* Sort the list of pointers in memblk->start order */
510 sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
512 /* Make sure the first/last memblks include start/end */
513 blk[0]->start = min(blk[0]->start, start);
514 blk[count - 1]->end = max(blk[count - 1]->end, end);
517 * Fill any gaps by tracking the previous memblks
518 * end address and backfilling to it if needed.
520 prev_end = blk[0]->end;
521 for (int i = 1; i < count; i++) {
522 struct numa_memblk *curr = blk[i];
524 if (prev_end >= curr->start) {
525 if (prev_end < curr->end)
526 prev_end = curr->end;
527 } else {
528 curr->start = prev_end;
529 prev_end = curr->end;
532 return 0;
535 #ifdef CONFIG_NUMA_KEEP_MEMINFO
536 static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
538 int i;
540 for (i = 0; i < mi->nr_blks; i++)
541 if (mi->blk[i].start <= start && mi->blk[i].end > start)
542 return mi->blk[i].nid;
543 return NUMA_NO_NODE;
546 int phys_to_target_node(u64 start)
548 int nid = meminfo_to_nid(&numa_meminfo, start);
551 * Prefer online nodes, but if reserved memory might be
552 * hot-added continue the search with reserved ranges.
554 if (nid != NUMA_NO_NODE)
555 return nid;
557 return meminfo_to_nid(&numa_reserved_meminfo, start);
559 EXPORT_SYMBOL_GPL(phys_to_target_node);
561 int memory_add_physaddr_to_nid(u64 start)
563 int nid = meminfo_to_nid(&numa_meminfo, start);
565 if (nid == NUMA_NO_NODE)
566 nid = numa_meminfo.blk[0].nid;
567 return nid;
569 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
571 #endif /* CONFIG_NUMA_KEEP_MEMINFO */