1 // SPDX-License-Identifier: GPL-2.0
3 * Basic Node interface support
6 #include <linux/module.h>
7 #include <linux/init.h>
9 #include <linux/memory.h>
10 #include <linux/vmstat.h>
11 #include <linux/notifier.h>
12 #include <linux/node.h>
13 #include <linux/hugetlb.h>
14 #include <linux/compaction.h>
15 #include <linux/cpumask.h>
16 #include <linux/topology.h>
17 #include <linux/nodemask.h>
18 #include <linux/cpu.h>
19 #include <linux/device.h>
20 #include <linux/pm_runtime.h>
21 #include <linux/swap.h>
22 #include <linux/slab.h>
24 static const struct bus_type node_subsys
= {
29 static inline ssize_t
cpumap_read(struct file
*file
, struct kobject
*kobj
,
30 const struct bin_attribute
*attr
, char *buf
,
31 loff_t off
, size_t count
)
33 struct device
*dev
= kobj_to_dev(kobj
);
34 struct node
*node_dev
= to_node(dev
);
38 if (!alloc_cpumask_var(&mask
, GFP_KERNEL
))
41 cpumask_and(mask
, cpumask_of_node(node_dev
->dev
.id
), cpu_online_mask
);
42 n
= cpumap_print_bitmask_to_buf(buf
, mask
, off
, count
);
43 free_cpumask_var(mask
);
48 static const BIN_ATTR_RO(cpumap
, CPUMAP_FILE_MAX_BYTES
);
50 static inline ssize_t
cpulist_read(struct file
*file
, struct kobject
*kobj
,
51 const struct bin_attribute
*attr
, char *buf
,
52 loff_t off
, size_t count
)
54 struct device
*dev
= kobj_to_dev(kobj
);
55 struct node
*node_dev
= to_node(dev
);
59 if (!alloc_cpumask_var(&mask
, GFP_KERNEL
))
62 cpumask_and(mask
, cpumask_of_node(node_dev
->dev
.id
), cpu_online_mask
);
63 n
= cpumap_print_list_to_buf(buf
, mask
, off
, count
);
64 free_cpumask_var(mask
);
69 static const BIN_ATTR_RO(cpulist
, CPULIST_FILE_MAX_BYTES
);
72 * struct node_access_nodes - Access class device to hold user visible
73 * relationships to other nodes.
74 * @dev: Device for this memory access class
75 * @list_node: List element in the node's access list
76 * @access: The access class rank
77 * @coord: Heterogeneous memory performance coordinates
79 struct node_access_nodes
{
81 struct list_head list_node
;
83 #ifdef CONFIG_HMEM_REPORTING
84 struct access_coordinate coord
;
87 #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
89 static struct attribute
*node_init_access_node_attrs
[] = {
93 static struct attribute
*node_targ_access_node_attrs
[] = {
97 static const struct attribute_group initiators
= {
99 .attrs
= node_init_access_node_attrs
,
102 static const struct attribute_group targets
= {
104 .attrs
= node_targ_access_node_attrs
,
107 static const struct attribute_group
*node_access_node_groups
[] = {
113 static void node_remove_accesses(struct node
*node
)
115 struct node_access_nodes
*c
, *cnext
;
117 list_for_each_entry_safe(c
, cnext
, &node
->access_list
, list_node
) {
118 list_del(&c
->list_node
);
119 device_unregister(&c
->dev
);
123 static void node_access_release(struct device
*dev
)
125 kfree(to_access_nodes(dev
));
128 static struct node_access_nodes
*node_init_node_access(struct node
*node
,
129 enum access_coordinate_class access
)
131 struct node_access_nodes
*access_node
;
134 list_for_each_entry(access_node
, &node
->access_list
, list_node
)
135 if (access_node
->access
== access
)
138 access_node
= kzalloc(sizeof(*access_node
), GFP_KERNEL
);
142 access_node
->access
= access
;
143 dev
= &access_node
->dev
;
144 dev
->parent
= &node
->dev
;
145 dev
->release
= node_access_release
;
146 dev
->groups
= node_access_node_groups
;
147 if (dev_set_name(dev
, "access%u", access
))
150 if (device_register(dev
))
153 pm_runtime_no_callbacks(dev
);
154 list_add_tail(&access_node
->list_node
, &node
->access_list
);
157 kfree_const(dev
->kobj
.name
);
163 #ifdef CONFIG_HMEM_REPORTING
164 #define ACCESS_ATTR(property) \
165 static ssize_t property##_show(struct device *dev, \
166 struct device_attribute *attr, \
169 return sysfs_emit(buf, "%u\n", \
170 to_access_nodes(dev)->coord.property); \
172 static DEVICE_ATTR_RO(property)
174 ACCESS_ATTR(read_bandwidth
);
175 ACCESS_ATTR(read_latency
);
176 ACCESS_ATTR(write_bandwidth
);
177 ACCESS_ATTR(write_latency
);
179 static struct attribute
*access_attrs
[] = {
180 &dev_attr_read_bandwidth
.attr
,
181 &dev_attr_read_latency
.attr
,
182 &dev_attr_write_bandwidth
.attr
,
183 &dev_attr_write_latency
.attr
,
188 * node_set_perf_attrs - Set the performance values for given access class
189 * @nid: Node identifier to be set
190 * @coord: Heterogeneous memory performance coordinates
191 * @access: The access class the for the given attributes
193 void node_set_perf_attrs(unsigned int nid
, struct access_coordinate
*coord
,
194 enum access_coordinate_class access
)
196 struct node_access_nodes
*c
;
200 if (WARN_ON_ONCE(!node_online(nid
)))
203 node
= node_devices
[nid
];
204 c
= node_init_node_access(node
, access
);
209 for (i
= 0; access_attrs
[i
] != NULL
; i
++) {
210 if (sysfs_add_file_to_group(&c
->dev
.kobj
, access_attrs
[i
],
212 pr_info("failed to add performance attribute to node %d\n",
218 EXPORT_SYMBOL_GPL(node_set_perf_attrs
);
221 * struct node_cache_info - Internal tracking for memory node caches
222 * @dev: Device represeting the cache level
223 * @node: List element for tracking in the node
224 * @cache_attrs:Attributes for this cache level
226 struct node_cache_info
{
228 struct list_head node
;
229 struct node_cache_attrs cache_attrs
;
231 #define to_cache_info(device) container_of(device, struct node_cache_info, dev)
233 #define CACHE_ATTR(name, fmt) \
234 static ssize_t name##_show(struct device *dev, \
235 struct device_attribute *attr, \
238 return sysfs_emit(buf, fmt "\n", \
239 to_cache_info(dev)->cache_attrs.name); \
241 static DEVICE_ATTR_RO(name);
243 CACHE_ATTR(size
, "%llu")
244 CACHE_ATTR(line_size
, "%u")
245 CACHE_ATTR(indexing
, "%u")
246 CACHE_ATTR(write_policy
, "%u")
248 static struct attribute
*cache_attrs
[] = {
249 &dev_attr_indexing
.attr
,
251 &dev_attr_line_size
.attr
,
252 &dev_attr_write_policy
.attr
,
255 ATTRIBUTE_GROUPS(cache
);
257 static void node_cache_release(struct device
*dev
)
262 static void node_cacheinfo_release(struct device
*dev
)
264 struct node_cache_info
*info
= to_cache_info(dev
);
268 static void node_init_cache_dev(struct node
*node
)
272 dev
= kzalloc(sizeof(*dev
), GFP_KERNEL
);
276 device_initialize(dev
);
277 dev
->parent
= &node
->dev
;
278 dev
->release
= node_cache_release
;
279 if (dev_set_name(dev
, "memory_side_cache"))
285 pm_runtime_no_callbacks(dev
);
286 node
->cache_dev
= dev
;
293 * node_add_cache() - add cache attribute to a memory node
294 * @nid: Node identifier that has new cache attributes
295 * @cache_attrs: Attributes for the cache being added
297 void node_add_cache(unsigned int nid
, struct node_cache_attrs
*cache_attrs
)
299 struct node_cache_info
*info
;
303 if (!node_online(nid
) || !node_devices
[nid
])
306 node
= node_devices
[nid
];
307 list_for_each_entry(info
, &node
->cache_attrs
, node
) {
308 if (info
->cache_attrs
.level
== cache_attrs
->level
) {
310 "attempt to add duplicate cache level:%d\n",
316 if (!node
->cache_dev
)
317 node_init_cache_dev(node
);
318 if (!node
->cache_dev
)
321 info
= kzalloc(sizeof(*info
), GFP_KERNEL
);
326 device_initialize(dev
);
327 dev
->parent
= node
->cache_dev
;
328 dev
->release
= node_cacheinfo_release
;
329 dev
->groups
= cache_groups
;
330 if (dev_set_name(dev
, "index%d", cache_attrs
->level
))
333 info
->cache_attrs
= *cache_attrs
;
334 if (device_add(dev
)) {
335 dev_warn(&node
->dev
, "failed to add cache level:%d\n",
339 pm_runtime_no_callbacks(dev
);
340 list_add_tail(&info
->node
, &node
->cache_attrs
);
346 static void node_remove_caches(struct node
*node
)
348 struct node_cache_info
*info
, *next
;
350 if (!node
->cache_dev
)
353 list_for_each_entry_safe(info
, next
, &node
->cache_attrs
, node
) {
354 list_del(&info
->node
);
355 device_unregister(&info
->dev
);
357 device_unregister(node
->cache_dev
);
360 static void node_init_caches(unsigned int nid
)
362 INIT_LIST_HEAD(&node_devices
[nid
]->cache_attrs
);
365 static void node_init_caches(unsigned int nid
) { }
366 static void node_remove_caches(struct node
*node
) { }
369 #define K(x) ((x) << (PAGE_SHIFT - 10))
370 static ssize_t
node_read_meminfo(struct device
*dev
,
371 struct device_attribute
*attr
, char *buf
)
375 struct pglist_data
*pgdat
= NODE_DATA(nid
);
377 unsigned long sreclaimable
, sunreclaimable
;
378 unsigned long swapcached
= 0;
380 si_meminfo_node(&i
, nid
);
381 sreclaimable
= node_page_state_pages(pgdat
, NR_SLAB_RECLAIMABLE_B
);
382 sunreclaimable
= node_page_state_pages(pgdat
, NR_SLAB_UNRECLAIMABLE_B
);
384 swapcached
= node_page_state_pages(pgdat
, NR_SWAPCACHE
);
386 len
= sysfs_emit_at(buf
, len
,
387 "Node %d MemTotal: %8lu kB\n"
388 "Node %d MemFree: %8lu kB\n"
389 "Node %d MemUsed: %8lu kB\n"
390 "Node %d SwapCached: %8lu kB\n"
391 "Node %d Active: %8lu kB\n"
392 "Node %d Inactive: %8lu kB\n"
393 "Node %d Active(anon): %8lu kB\n"
394 "Node %d Inactive(anon): %8lu kB\n"
395 "Node %d Active(file): %8lu kB\n"
396 "Node %d Inactive(file): %8lu kB\n"
397 "Node %d Unevictable: %8lu kB\n"
398 "Node %d Mlocked: %8lu kB\n",
401 nid
, K(i
.totalram
- i
.freeram
),
403 nid
, K(node_page_state(pgdat
, NR_ACTIVE_ANON
) +
404 node_page_state(pgdat
, NR_ACTIVE_FILE
)),
405 nid
, K(node_page_state(pgdat
, NR_INACTIVE_ANON
) +
406 node_page_state(pgdat
, NR_INACTIVE_FILE
)),
407 nid
, K(node_page_state(pgdat
, NR_ACTIVE_ANON
)),
408 nid
, K(node_page_state(pgdat
, NR_INACTIVE_ANON
)),
409 nid
, K(node_page_state(pgdat
, NR_ACTIVE_FILE
)),
410 nid
, K(node_page_state(pgdat
, NR_INACTIVE_FILE
)),
411 nid
, K(node_page_state(pgdat
, NR_UNEVICTABLE
)),
412 nid
, K(sum_zone_node_page_state(nid
, NR_MLOCK
)));
414 #ifdef CONFIG_HIGHMEM
415 len
+= sysfs_emit_at(buf
, len
,
416 "Node %d HighTotal: %8lu kB\n"
417 "Node %d HighFree: %8lu kB\n"
418 "Node %d LowTotal: %8lu kB\n"
419 "Node %d LowFree: %8lu kB\n",
422 nid
, K(i
.totalram
- i
.totalhigh
),
423 nid
, K(i
.freeram
- i
.freehigh
));
425 len
+= sysfs_emit_at(buf
, len
,
426 "Node %d Dirty: %8lu kB\n"
427 "Node %d Writeback: %8lu kB\n"
428 "Node %d FilePages: %8lu kB\n"
429 "Node %d Mapped: %8lu kB\n"
430 "Node %d AnonPages: %8lu kB\n"
431 "Node %d Shmem: %8lu kB\n"
432 "Node %d KernelStack: %8lu kB\n"
433 #ifdef CONFIG_SHADOW_CALL_STACK
434 "Node %d ShadowCallStack:%8lu kB\n"
436 "Node %d PageTables: %8lu kB\n"
437 "Node %d SecPageTables: %8lu kB\n"
438 "Node %d NFS_Unstable: %8lu kB\n"
439 "Node %d Bounce: %8lu kB\n"
440 "Node %d WritebackTmp: %8lu kB\n"
441 "Node %d KReclaimable: %8lu kB\n"
442 "Node %d Slab: %8lu kB\n"
443 "Node %d SReclaimable: %8lu kB\n"
444 "Node %d SUnreclaim: %8lu kB\n"
445 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
446 "Node %d AnonHugePages: %8lu kB\n"
447 "Node %d ShmemHugePages: %8lu kB\n"
448 "Node %d ShmemPmdMapped: %8lu kB\n"
449 "Node %d FileHugePages: %8lu kB\n"
450 "Node %d FilePmdMapped: %8lu kB\n"
452 #ifdef CONFIG_UNACCEPTED_MEMORY
453 "Node %d Unaccepted: %8lu kB\n"
456 nid
, K(node_page_state(pgdat
, NR_FILE_DIRTY
)),
457 nid
, K(node_page_state(pgdat
, NR_WRITEBACK
)),
458 nid
, K(node_page_state(pgdat
, NR_FILE_PAGES
)),
459 nid
, K(node_page_state(pgdat
, NR_FILE_MAPPED
)),
460 nid
, K(node_page_state(pgdat
, NR_ANON_MAPPED
)),
462 nid
, node_page_state(pgdat
, NR_KERNEL_STACK_KB
),
463 #ifdef CONFIG_SHADOW_CALL_STACK
464 nid
, node_page_state(pgdat
, NR_KERNEL_SCS_KB
),
466 nid
, K(node_page_state(pgdat
, NR_PAGETABLE
)),
467 nid
, K(node_page_state(pgdat
, NR_SECONDARY_PAGETABLE
)),
469 nid
, K(sum_zone_node_page_state(nid
, NR_BOUNCE
)),
470 nid
, K(node_page_state(pgdat
, NR_WRITEBACK_TEMP
)),
471 nid
, K(sreclaimable
+
472 node_page_state(pgdat
, NR_KERNEL_MISC_RECLAIMABLE
)),
473 nid
, K(sreclaimable
+ sunreclaimable
),
474 nid
, K(sreclaimable
),
475 nid
, K(sunreclaimable
)
476 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
478 nid
, K(node_page_state(pgdat
, NR_ANON_THPS
)),
479 nid
, K(node_page_state(pgdat
, NR_SHMEM_THPS
)),
480 nid
, K(node_page_state(pgdat
, NR_SHMEM_PMDMAPPED
)),
481 nid
, K(node_page_state(pgdat
, NR_FILE_THPS
)),
482 nid
, K(node_page_state(pgdat
, NR_FILE_PMDMAPPED
))
484 #ifdef CONFIG_UNACCEPTED_MEMORY
486 nid
, K(sum_zone_node_page_state(nid
, NR_UNACCEPTED
))
489 len
+= hugetlb_report_node_meminfo(buf
, len
, nid
);
494 static DEVICE_ATTR(meminfo
, 0444, node_read_meminfo
, NULL
);
496 static ssize_t
node_read_numastat(struct device
*dev
,
497 struct device_attribute
*attr
, char *buf
)
499 fold_vm_numa_events();
500 return sysfs_emit(buf
,
504 "interleave_hit %lu\n"
507 sum_zone_numa_event_state(dev
->id
, NUMA_HIT
),
508 sum_zone_numa_event_state(dev
->id
, NUMA_MISS
),
509 sum_zone_numa_event_state(dev
->id
, NUMA_FOREIGN
),
510 sum_zone_numa_event_state(dev
->id
, NUMA_INTERLEAVE_HIT
),
511 sum_zone_numa_event_state(dev
->id
, NUMA_LOCAL
),
512 sum_zone_numa_event_state(dev
->id
, NUMA_OTHER
));
514 static DEVICE_ATTR(numastat
, 0444, node_read_numastat
, NULL
);
516 static ssize_t
node_read_vmstat(struct device
*dev
,
517 struct device_attribute
*attr
, char *buf
)
520 struct pglist_data
*pgdat
= NODE_DATA(nid
);
524 for (i
= 0; i
< NR_VM_ZONE_STAT_ITEMS
; i
++)
525 len
+= sysfs_emit_at(buf
, len
, "%s %lu\n",
527 sum_zone_node_page_state(nid
, i
));
530 fold_vm_numa_events();
531 for (i
= 0; i
< NR_VM_NUMA_EVENT_ITEMS
; i
++)
532 len
+= sysfs_emit_at(buf
, len
, "%s %lu\n",
534 sum_zone_numa_event_state(nid
, i
));
537 for (i
= 0; i
< NR_VM_NODE_STAT_ITEMS
; i
++) {
538 unsigned long pages
= node_page_state_pages(pgdat
, i
);
540 if (vmstat_item_print_in_thp(i
))
541 pages
/= HPAGE_PMD_NR
;
542 len
+= sysfs_emit_at(buf
, len
, "%s %lu\n", node_stat_name(i
),
548 static DEVICE_ATTR(vmstat
, 0444, node_read_vmstat
, NULL
);
550 static ssize_t
node_read_distance(struct device
*dev
,
551 struct device_attribute
*attr
, char *buf
)
558 * buf is currently PAGE_SIZE in length and each node needs 4 chars
559 * at the most (distance + space or newline).
561 BUILD_BUG_ON(MAX_NUMNODES
* 4 > PAGE_SIZE
);
563 for_each_online_node(i
) {
564 len
+= sysfs_emit_at(buf
, len
, "%s%d",
565 i
? " " : "", node_distance(nid
, i
));
568 len
+= sysfs_emit_at(buf
, len
, "\n");
571 static DEVICE_ATTR(distance
, 0444, node_read_distance
, NULL
);
573 static struct attribute
*node_dev_attrs
[] = {
574 &dev_attr_meminfo
.attr
,
575 &dev_attr_numastat
.attr
,
576 &dev_attr_distance
.attr
,
577 &dev_attr_vmstat
.attr
,
581 static const struct bin_attribute
*node_dev_bin_attrs
[] = {
587 static const struct attribute_group node_dev_group
= {
588 .attrs
= node_dev_attrs
,
589 .bin_attrs_new
= node_dev_bin_attrs
,
592 static const struct attribute_group
*node_dev_groups
[] = {
594 #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP
595 &arch_node_dev_group
,
597 #ifdef CONFIG_MEMORY_FAILURE
598 &memory_failure_attr_group
,
603 static void node_device_release(struct device
*dev
)
609 * register_node - Setup a sysfs device for a node.
610 * @num - Node number to use when creating the device.
612 * Initialize and register the node device.
614 static int register_node(struct node
*node
, int num
)
619 node
->dev
.bus
= &node_subsys
;
620 node
->dev
.release
= node_device_release
;
621 node
->dev
.groups
= node_dev_groups
;
622 error
= device_register(&node
->dev
);
625 put_device(&node
->dev
);
627 hugetlb_register_node(node
);
628 compaction_register_node(node
);
635 * unregister_node - unregister a node device
636 * @node: node going away
638 * Unregisters a node device @node. All the devices on the node must be
639 * unregistered before calling this function.
641 void unregister_node(struct node
*node
)
643 hugetlb_unregister_node(node
);
644 compaction_unregister_node(node
);
645 node_remove_accesses(node
);
646 node_remove_caches(node
);
647 device_unregister(&node
->dev
);
650 struct node
*node_devices
[MAX_NUMNODES
];
653 * register cpu under node
655 int register_cpu_under_node(unsigned int cpu
, unsigned int nid
)
660 if (!node_online(nid
))
663 obj
= get_cpu_device(cpu
);
667 ret
= sysfs_create_link(&node_devices
[nid
]->dev
.kobj
,
669 kobject_name(&obj
->kobj
));
673 return sysfs_create_link(&obj
->kobj
,
674 &node_devices
[nid
]->dev
.kobj
,
675 kobject_name(&node_devices
[nid
]->dev
.kobj
));
679 * register_memory_node_under_compute_node - link memory node to its compute
680 * node for a given access class.
681 * @mem_nid: Memory node number
682 * @cpu_nid: Cpu node number
683 * @access: Access class to register
686 * For use with platforms that may have separate memory and compute nodes.
687 * This function will export node relationships linking which memory
688 * initiator nodes can access memory targets at a given ranked access
691 int register_memory_node_under_compute_node(unsigned int mem_nid
,
692 unsigned int cpu_nid
,
693 enum access_coordinate_class access
)
695 struct node
*init_node
, *targ_node
;
696 struct node_access_nodes
*initiator
, *target
;
699 if (!node_online(cpu_nid
) || !node_online(mem_nid
))
702 init_node
= node_devices
[cpu_nid
];
703 targ_node
= node_devices
[mem_nid
];
704 initiator
= node_init_node_access(init_node
, access
);
705 target
= node_init_node_access(targ_node
, access
);
706 if (!initiator
|| !target
)
709 ret
= sysfs_add_link_to_group(&initiator
->dev
.kobj
, "targets",
710 &targ_node
->dev
.kobj
,
711 dev_name(&targ_node
->dev
));
715 ret
= sysfs_add_link_to_group(&target
->dev
.kobj
, "initiators",
716 &init_node
->dev
.kobj
,
717 dev_name(&init_node
->dev
));
723 sysfs_remove_link_from_group(&initiator
->dev
.kobj
, "targets",
724 dev_name(&targ_node
->dev
));
728 int unregister_cpu_under_node(unsigned int cpu
, unsigned int nid
)
732 if (!node_online(nid
))
735 obj
= get_cpu_device(cpu
);
739 sysfs_remove_link(&node_devices
[nid
]->dev
.kobj
,
740 kobject_name(&obj
->kobj
));
741 sysfs_remove_link(&obj
->kobj
,
742 kobject_name(&node_devices
[nid
]->dev
.kobj
));
747 #ifdef CONFIG_MEMORY_HOTPLUG
748 static int __ref
get_nid_for_pfn(unsigned long pfn
)
750 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
751 if (system_state
< SYSTEM_RUNNING
)
752 return early_pfn_to_nid(pfn
);
754 return pfn_to_nid(pfn
);
757 static void do_register_memory_block_under_node(int nid
,
758 struct memory_block
*mem_blk
,
759 enum meminit_context context
)
763 memory_block_add_nid(mem_blk
, nid
, context
);
765 ret
= sysfs_create_link_nowarn(&node_devices
[nid
]->dev
.kobj
,
767 kobject_name(&mem_blk
->dev
.kobj
));
768 if (ret
&& ret
!= -EEXIST
)
769 dev_err_ratelimited(&node_devices
[nid
]->dev
,
770 "can't create link to %s in sysfs (%d)\n",
771 kobject_name(&mem_blk
->dev
.kobj
), ret
);
773 ret
= sysfs_create_link_nowarn(&mem_blk
->dev
.kobj
,
774 &node_devices
[nid
]->dev
.kobj
,
775 kobject_name(&node_devices
[nid
]->dev
.kobj
));
776 if (ret
&& ret
!= -EEXIST
)
777 dev_err_ratelimited(&mem_blk
->dev
,
778 "can't create link to %s in sysfs (%d)\n",
779 kobject_name(&node_devices
[nid
]->dev
.kobj
),
783 /* register memory section under specified node if it spans that node */
784 static int register_mem_block_under_node_early(struct memory_block
*mem_blk
,
787 unsigned long memory_block_pfns
= memory_block_size_bytes() / PAGE_SIZE
;
788 unsigned long start_pfn
= section_nr_to_pfn(mem_blk
->start_section_nr
);
789 unsigned long end_pfn
= start_pfn
+ memory_block_pfns
- 1;
790 int nid
= *(int *)arg
;
793 for (pfn
= start_pfn
; pfn
<= end_pfn
; pfn
++) {
797 * memory block could have several absent sections from start.
798 * skip pfn range from absent section
800 if (!pfn_in_present_section(pfn
)) {
801 pfn
= round_down(pfn
+ PAGES_PER_SECTION
,
802 PAGES_PER_SECTION
) - 1;
807 * We need to check if page belongs to nid only at the boot
808 * case because node's ranges can be interleaved.
810 page_nid
= get_nid_for_pfn(pfn
);
816 do_register_memory_block_under_node(nid
, mem_blk
, MEMINIT_EARLY
);
819 /* mem section does not span the specified node */
824 * During hotplug we know that all pages in the memory block belong to the same
827 static int register_mem_block_under_node_hotplug(struct memory_block
*mem_blk
,
830 int nid
= *(int *)arg
;
832 do_register_memory_block_under_node(nid
, mem_blk
, MEMINIT_HOTPLUG
);
837 * Unregister a memory block device under the node it spans. Memory blocks
838 * with multiple nodes cannot be offlined and therefore also never be removed.
840 void unregister_memory_block_under_nodes(struct memory_block
*mem_blk
)
842 if (mem_blk
->nid
== NUMA_NO_NODE
)
845 sysfs_remove_link(&node_devices
[mem_blk
->nid
]->dev
.kobj
,
846 kobject_name(&mem_blk
->dev
.kobj
));
847 sysfs_remove_link(&mem_blk
->dev
.kobj
,
848 kobject_name(&node_devices
[mem_blk
->nid
]->dev
.kobj
));
851 void register_memory_blocks_under_node(int nid
, unsigned long start_pfn
,
852 unsigned long end_pfn
,
853 enum meminit_context context
)
855 walk_memory_blocks_func_t func
;
857 if (context
== MEMINIT_HOTPLUG
)
858 func
= register_mem_block_under_node_hotplug
;
860 func
= register_mem_block_under_node_early
;
862 walk_memory_blocks(PFN_PHYS(start_pfn
), PFN_PHYS(end_pfn
- start_pfn
),
866 #endif /* CONFIG_MEMORY_HOTPLUG */
868 int __register_one_node(int nid
)
874 node
= kzalloc(sizeof(struct node
), GFP_KERNEL
);
878 INIT_LIST_HEAD(&node
->access_list
);
879 node_devices
[nid
] = node
;
881 error
= register_node(node_devices
[nid
], nid
);
883 /* link cpu under this node */
884 for_each_present_cpu(cpu
) {
885 if (cpu_to_node(cpu
) == nid
)
886 register_cpu_under_node(cpu
, nid
);
889 node_init_caches(nid
);
894 void unregister_one_node(int nid
)
896 if (!node_devices
[nid
])
899 unregister_node(node_devices
[nid
]);
900 node_devices
[nid
] = NULL
;
904 * node states attributes
908 struct device_attribute attr
;
909 enum node_states state
;
912 static ssize_t
show_node_state(struct device
*dev
,
913 struct device_attribute
*attr
, char *buf
)
915 struct node_attr
*na
= container_of(attr
, struct node_attr
, attr
);
917 return sysfs_emit(buf
, "%*pbl\n",
918 nodemask_pr_args(&node_states
[na
->state
]));
921 #define _NODE_ATTR(name, state) \
922 { __ATTR(name, 0444, show_node_state, NULL), state }
924 static struct node_attr node_state_attr
[] = {
925 [N_POSSIBLE
] = _NODE_ATTR(possible
, N_POSSIBLE
),
926 [N_ONLINE
] = _NODE_ATTR(online
, N_ONLINE
),
927 [N_NORMAL_MEMORY
] = _NODE_ATTR(has_normal_memory
, N_NORMAL_MEMORY
),
928 #ifdef CONFIG_HIGHMEM
929 [N_HIGH_MEMORY
] = _NODE_ATTR(has_high_memory
, N_HIGH_MEMORY
),
931 [N_MEMORY
] = _NODE_ATTR(has_memory
, N_MEMORY
),
932 [N_CPU
] = _NODE_ATTR(has_cpu
, N_CPU
),
933 [N_GENERIC_INITIATOR
] = _NODE_ATTR(has_generic_initiator
,
934 N_GENERIC_INITIATOR
),
937 static struct attribute
*node_state_attrs
[] = {
938 &node_state_attr
[N_POSSIBLE
].attr
.attr
,
939 &node_state_attr
[N_ONLINE
].attr
.attr
,
940 &node_state_attr
[N_NORMAL_MEMORY
].attr
.attr
,
941 #ifdef CONFIG_HIGHMEM
942 &node_state_attr
[N_HIGH_MEMORY
].attr
.attr
,
944 &node_state_attr
[N_MEMORY
].attr
.attr
,
945 &node_state_attr
[N_CPU
].attr
.attr
,
946 &node_state_attr
[N_GENERIC_INITIATOR
].attr
.attr
,
950 static const struct attribute_group memory_root_attr_group
= {
951 .attrs
= node_state_attrs
,
954 static const struct attribute_group
*cpu_root_attr_groups
[] = {
955 &memory_root_attr_group
,
959 void __init
node_dev_init(void)
963 BUILD_BUG_ON(ARRAY_SIZE(node_state_attr
) != NR_NODE_STATES
);
964 BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs
)-1 != NR_NODE_STATES
);
966 ret
= subsys_system_register(&node_subsys
, cpu_root_attr_groups
);
968 panic("%s() failed to register subsystem: %d\n", __func__
, ret
);
971 * Create all node devices, which will properly link the node
972 * to applicable memory block devices and already created cpu devices.
974 for_each_online_node(i
) {
975 ret
= register_one_node(i
);
977 panic("%s() failed to add node: %d\n", __func__
, ret
);