1 // SPDX-License-Identifier: GPL-2.0
3 * CPU subsystem support
6 #include <linux/kernel.h>
7 #include <linux/module.h>
8 #include <linux/init.h>
9 #include <linux/sched.h>
10 #include <linux/cpu.h>
11 #include <linux/topology.h>
12 #include <linux/device.h>
13 #include <linux/node.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
16 #include <linux/percpu.h>
17 #include <linux/acpi.h>
19 #include <linux/cpufeature.h>
20 #include <linux/tick.h>
21 #include <linux/pm_qos.h>
22 #include <linux/sched/isolation.h>
26 static DEFINE_PER_CPU(struct device
*, cpu_sys_devices
);
28 static int cpu_subsys_match(struct device
*dev
, struct device_driver
*drv
)
30 /* ACPI style match is the only one that may succeed. */
31 if (acpi_driver_match_device(dev
, drv
))
37 #ifdef CONFIG_HOTPLUG_CPU
38 static void change_cpu_under_node(struct cpu
*cpu
,
39 unsigned int from_nid
, unsigned int to_nid
)
41 int cpuid
= cpu
->dev
.id
;
42 unregister_cpu_under_node(cpuid
, from_nid
);
43 register_cpu_under_node(cpuid
, to_nid
);
44 cpu
->node_id
= to_nid
;
47 static int cpu_subsys_online(struct device
*dev
)
49 struct cpu
*cpu
= container_of(dev
, struct cpu
, dev
);
54 from_nid
= cpu_to_node(cpuid
);
55 if (from_nid
== NUMA_NO_NODE
)
58 ret
= cpu_device_up(dev
);
60 * When hot adding memory to memoryless node and enabling a cpu
61 * on the node, node number of the cpu may internally change.
63 to_nid
= cpu_to_node(cpuid
);
64 if (from_nid
!= to_nid
)
65 change_cpu_under_node(cpu
, from_nid
, to_nid
);
70 static int cpu_subsys_offline(struct device
*dev
)
72 return cpu_device_down(dev
);
75 void unregister_cpu(struct cpu
*cpu
)
77 int logical_cpu
= cpu
->dev
.id
;
79 unregister_cpu_under_node(logical_cpu
, cpu_to_node(logical_cpu
));
81 device_unregister(&cpu
->dev
);
82 per_cpu(cpu_sys_devices
, logical_cpu
) = NULL
;
86 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
87 static ssize_t
cpu_probe_store(struct device
*dev
,
88 struct device_attribute
*attr
,
95 ret
= lock_device_hotplug_sysfs();
99 cnt
= arch_cpu_probe(buf
, count
);
101 unlock_device_hotplug();
105 static ssize_t
cpu_release_store(struct device
*dev
,
106 struct device_attribute
*attr
,
113 ret
= lock_device_hotplug_sysfs();
117 cnt
= arch_cpu_release(buf
, count
);
119 unlock_device_hotplug();
123 static DEVICE_ATTR(probe
, S_IWUSR
, NULL
, cpu_probe_store
);
124 static DEVICE_ATTR(release
, S_IWUSR
, NULL
, cpu_release_store
);
125 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
126 #endif /* CONFIG_HOTPLUG_CPU */
128 struct bus_type cpu_subsys
= {
131 .match
= cpu_subsys_match
,
132 #ifdef CONFIG_HOTPLUG_CPU
133 .online
= cpu_subsys_online
,
134 .offline
= cpu_subsys_offline
,
137 EXPORT_SYMBOL_GPL(cpu_subsys
);
140 #include <linux/kexec.h>
142 static ssize_t
crash_notes_show(struct device
*dev
,
143 struct device_attribute
*attr
,
146 struct cpu
*cpu
= container_of(dev
, struct cpu
, dev
);
147 unsigned long long addr
;
150 cpunum
= cpu
->dev
.id
;
153 * Might be reading other cpu's data based on which cpu read thread
154 * has been scheduled. But cpu data (memory) is allocated once during
155 * boot up and this data does not change there after. Hence this
156 * operation should be safe. No locking required.
158 addr
= per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes
, cpunum
));
160 return sysfs_emit(buf
, "%llx\n", addr
);
162 static DEVICE_ATTR_ADMIN_RO(crash_notes
);
164 static ssize_t
crash_notes_size_show(struct device
*dev
,
165 struct device_attribute
*attr
,
168 return sysfs_emit(buf
, "%zu\n", sizeof(note_buf_t
));
170 static DEVICE_ATTR_ADMIN_RO(crash_notes_size
);
172 static struct attribute
*crash_note_cpu_attrs
[] = {
173 &dev_attr_crash_notes
.attr
,
174 &dev_attr_crash_notes_size
.attr
,
178 static struct attribute_group crash_note_cpu_attr_group
= {
179 .attrs
= crash_note_cpu_attrs
,
183 static const struct attribute_group
*common_cpu_attr_groups
[] = {
185 &crash_note_cpu_attr_group
,
190 static const struct attribute_group
*hotplugable_cpu_attr_groups
[] = {
192 &crash_note_cpu_attr_group
,
198 * Print cpu online, possible, present, and system maps
202 struct device_attribute attr
;
203 const struct cpumask
*const map
;
206 static ssize_t
show_cpus_attr(struct device
*dev
,
207 struct device_attribute
*attr
,
210 struct cpu_attr
*ca
= container_of(attr
, struct cpu_attr
, attr
);
212 return cpumap_print_to_pagebuf(true, buf
, ca
->map
);
215 #define _CPU_ATTR(name, map) \
216 { __ATTR(name, 0444, show_cpus_attr, NULL), map }
218 /* Keep in sync with cpu_subsys_attrs */
219 static struct cpu_attr cpu_attrs
[] = {
220 _CPU_ATTR(online
, &__cpu_online_mask
),
221 _CPU_ATTR(possible
, &__cpu_possible_mask
),
222 _CPU_ATTR(present
, &__cpu_present_mask
),
226 * Print values for NR_CPUS and offlined cpus
228 static ssize_t
print_cpus_kernel_max(struct device
*dev
,
229 struct device_attribute
*attr
, char *buf
)
231 return sysfs_emit(buf
, "%d\n", NR_CPUS
- 1);
233 static DEVICE_ATTR(kernel_max
, 0444, print_cpus_kernel_max
, NULL
);
235 /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
236 unsigned int total_cpus
;
238 static ssize_t
print_cpus_offline(struct device
*dev
,
239 struct device_attribute
*attr
, char *buf
)
242 cpumask_var_t offline
;
244 /* display offline cpus < nr_cpu_ids */
245 if (!alloc_cpumask_var(&offline
, GFP_KERNEL
))
247 cpumask_andnot(offline
, cpu_possible_mask
, cpu_online_mask
);
248 len
+= sysfs_emit_at(buf
, len
, "%*pbl", cpumask_pr_args(offline
));
249 free_cpumask_var(offline
);
251 /* display offline cpus >= nr_cpu_ids */
252 if (total_cpus
&& nr_cpu_ids
< total_cpus
) {
253 len
+= sysfs_emit_at(buf
, len
, ",");
255 if (nr_cpu_ids
== total_cpus
-1)
256 len
+= sysfs_emit_at(buf
, len
, "%u", nr_cpu_ids
);
258 len
+= sysfs_emit_at(buf
, len
, "%u-%d",
259 nr_cpu_ids
, total_cpus
- 1);
262 len
+= sysfs_emit_at(buf
, len
, "\n");
266 static DEVICE_ATTR(offline
, 0444, print_cpus_offline
, NULL
);
268 static ssize_t
print_cpus_isolated(struct device
*dev
,
269 struct device_attribute
*attr
, char *buf
)
272 cpumask_var_t isolated
;
274 if (!alloc_cpumask_var(&isolated
, GFP_KERNEL
))
277 cpumask_andnot(isolated
, cpu_possible_mask
,
278 housekeeping_cpumask(HK_FLAG_DOMAIN
));
279 len
= sysfs_emit(buf
, "%*pbl\n", cpumask_pr_args(isolated
));
281 free_cpumask_var(isolated
);
285 static DEVICE_ATTR(isolated
, 0444, print_cpus_isolated
, NULL
);
287 #ifdef CONFIG_NO_HZ_FULL
288 static ssize_t
print_cpus_nohz_full(struct device
*dev
,
289 struct device_attribute
*attr
, char *buf
)
291 return sysfs_emit(buf
, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask
));
293 static DEVICE_ATTR(nohz_full
, 0444, print_cpus_nohz_full
, NULL
);
296 static void cpu_device_release(struct device
*dev
)
299 * This is an empty function to prevent the driver core from spitting a
300 * warning at us. Yes, I know this is directly opposite of what the
301 * documentation for the driver core and kobjects say, and the author
302 * of this code has already been publically ridiculed for doing
303 * something as foolish as this. However, at this point in time, it is
304 * the only way to handle the issue of statically allocated cpu
305 * devices. The different architectures will have their cpu device
306 * code reworked to properly handle this in the near future, so this
307 * function will then be changed to correctly free up the memory held
310 * Never copy this way of doing things, or you too will be made fun of
311 * on the linux-kernel list, you have been warned.
315 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
316 static ssize_t
print_cpu_modalias(struct device
*dev
,
317 struct device_attribute
*attr
,
323 len
+= sysfs_emit_at(buf
, len
,
324 "cpu:type:" CPU_FEATURE_TYPEFMT
":feature:",
325 CPU_FEATURE_TYPEVAL
);
327 for (i
= 0; i
< MAX_CPU_FEATURES
; i
++)
328 if (cpu_have_feature(i
)) {
329 if (len
+ sizeof(",XXXX\n") >= PAGE_SIZE
) {
330 WARN(1, "CPU features overflow page\n");
333 len
+= sysfs_emit_at(buf
, len
, ",%04X", i
);
335 len
+= sysfs_emit_at(buf
, len
, "\n");
339 static int cpu_uevent(struct device
*dev
, struct kobj_uevent_env
*env
)
341 char *buf
= kzalloc(PAGE_SIZE
, GFP_KERNEL
);
343 print_cpu_modalias(NULL
, NULL
, buf
);
344 add_uevent_var(env
, "MODALIAS=%s", buf
);
352 * register_cpu - Setup a sysfs device for a CPU.
353 * @cpu - cpu->hotpluggable field set to 1 will generate a control file in
354 * sysfs for this CPU.
355 * @num - CPU number to use when creating the device.
357 * Initialize and register the CPU device.
359 int register_cpu(struct cpu
*cpu
, int num
)
363 cpu
->node_id
= cpu_to_node(num
);
364 memset(&cpu
->dev
, 0x00, sizeof(struct device
));
366 cpu
->dev
.bus
= &cpu_subsys
;
367 cpu
->dev
.release
= cpu_device_release
;
368 cpu
->dev
.offline_disabled
= !cpu
->hotpluggable
;
369 cpu
->dev
.offline
= !cpu_online(num
);
370 cpu
->dev
.of_node
= of_get_cpu_node(num
, NULL
);
371 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
372 cpu
->dev
.bus
->uevent
= cpu_uevent
;
374 cpu
->dev
.groups
= common_cpu_attr_groups
;
375 if (cpu
->hotpluggable
)
376 cpu
->dev
.groups
= hotplugable_cpu_attr_groups
;
377 error
= device_register(&cpu
->dev
);
379 put_device(&cpu
->dev
);
383 per_cpu(cpu_sys_devices
, num
) = &cpu
->dev
;
384 register_cpu_under_node(num
, cpu_to_node(num
));
385 dev_pm_qos_expose_latency_limit(&cpu
->dev
,
386 PM_QOS_RESUME_LATENCY_NO_CONSTRAINT
);
391 struct device
*get_cpu_device(unsigned cpu
)
393 if (cpu
< nr_cpu_ids
&& cpu_possible(cpu
))
394 return per_cpu(cpu_sys_devices
, cpu
);
398 EXPORT_SYMBOL_GPL(get_cpu_device
);
400 static void device_create_release(struct device
*dev
)
406 static struct device
*
407 __cpu_device_create(struct device
*parent
, void *drvdata
,
408 const struct attribute_group
**groups
,
409 const char *fmt
, va_list args
)
411 struct device
*dev
= NULL
;
412 int retval
= -ENODEV
;
414 dev
= kzalloc(sizeof(*dev
), GFP_KERNEL
);
420 device_initialize(dev
);
421 dev
->parent
= parent
;
422 dev
->groups
= groups
;
423 dev
->release
= device_create_release
;
424 device_set_pm_not_required(dev
);
425 dev_set_drvdata(dev
, drvdata
);
427 retval
= kobject_set_name_vargs(&dev
->kobj
, fmt
, args
);
431 retval
= device_add(dev
);
439 return ERR_PTR(retval
);
442 struct device
*cpu_device_create(struct device
*parent
, void *drvdata
,
443 const struct attribute_group
**groups
,
444 const char *fmt
, ...)
449 va_start(vargs
, fmt
);
450 dev
= __cpu_device_create(parent
, drvdata
, groups
, fmt
, vargs
);
454 EXPORT_SYMBOL_GPL(cpu_device_create
);
456 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
457 static DEVICE_ATTR(modalias
, 0444, print_cpu_modalias
, NULL
);
460 static struct attribute
*cpu_root_attrs
[] = {
461 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
462 &dev_attr_probe
.attr
,
463 &dev_attr_release
.attr
,
465 &cpu_attrs
[0].attr
.attr
,
466 &cpu_attrs
[1].attr
.attr
,
467 &cpu_attrs
[2].attr
.attr
,
468 &dev_attr_kernel_max
.attr
,
469 &dev_attr_offline
.attr
,
470 &dev_attr_isolated
.attr
,
471 #ifdef CONFIG_NO_HZ_FULL
472 &dev_attr_nohz_full
.attr
,
474 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
475 &dev_attr_modalias
.attr
,
480 static struct attribute_group cpu_root_attr_group
= {
481 .attrs
= cpu_root_attrs
,
484 static const struct attribute_group
*cpu_root_attr_groups
[] = {
485 &cpu_root_attr_group
,
489 bool cpu_is_hotpluggable(unsigned cpu
)
491 struct device
*dev
= get_cpu_device(cpu
);
492 return dev
&& container_of(dev
, struct cpu
, dev
)->hotpluggable
;
494 EXPORT_SYMBOL_GPL(cpu_is_hotpluggable
);
496 #ifdef CONFIG_GENERIC_CPU_DEVICES
497 static DEFINE_PER_CPU(struct cpu
, cpu_devices
);
500 static void __init
cpu_dev_register_generic(void)
502 #ifdef CONFIG_GENERIC_CPU_DEVICES
505 for_each_possible_cpu(i
) {
506 if (register_cpu(&per_cpu(cpu_devices
, i
), i
))
507 panic("Failed to register CPU device");
512 #ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
514 ssize_t __weak
cpu_show_meltdown(struct device
*dev
,
515 struct device_attribute
*attr
, char *buf
)
517 return sysfs_emit(buf
, "Not affected\n");
520 ssize_t __weak
cpu_show_spectre_v1(struct device
*dev
,
521 struct device_attribute
*attr
, char *buf
)
523 return sysfs_emit(buf
, "Not affected\n");
526 ssize_t __weak
cpu_show_spectre_v2(struct device
*dev
,
527 struct device_attribute
*attr
, char *buf
)
529 return sysfs_emit(buf
, "Not affected\n");
532 ssize_t __weak
cpu_show_spec_store_bypass(struct device
*dev
,
533 struct device_attribute
*attr
, char *buf
)
535 return sysfs_emit(buf
, "Not affected\n");
538 ssize_t __weak
cpu_show_l1tf(struct device
*dev
,
539 struct device_attribute
*attr
, char *buf
)
541 return sysfs_emit(buf
, "Not affected\n");
544 ssize_t __weak
cpu_show_mds(struct device
*dev
,
545 struct device_attribute
*attr
, char *buf
)
547 return sysfs_emit(buf
, "Not affected\n");
550 ssize_t __weak
cpu_show_tsx_async_abort(struct device
*dev
,
551 struct device_attribute
*attr
,
554 return sysfs_emit(buf
, "Not affected\n");
557 ssize_t __weak
cpu_show_itlb_multihit(struct device
*dev
,
558 struct device_attribute
*attr
, char *buf
)
560 return sysfs_emit(buf
, "Not affected\n");
563 ssize_t __weak
cpu_show_srbds(struct device
*dev
,
564 struct device_attribute
*attr
, char *buf
)
566 return sysfs_emit(buf
, "Not affected\n");
569 static DEVICE_ATTR(meltdown
, 0444, cpu_show_meltdown
, NULL
);
570 static DEVICE_ATTR(spectre_v1
, 0444, cpu_show_spectre_v1
, NULL
);
571 static DEVICE_ATTR(spectre_v2
, 0444, cpu_show_spectre_v2
, NULL
);
572 static DEVICE_ATTR(spec_store_bypass
, 0444, cpu_show_spec_store_bypass
, NULL
);
573 static DEVICE_ATTR(l1tf
, 0444, cpu_show_l1tf
, NULL
);
574 static DEVICE_ATTR(mds
, 0444, cpu_show_mds
, NULL
);
575 static DEVICE_ATTR(tsx_async_abort
, 0444, cpu_show_tsx_async_abort
, NULL
);
576 static DEVICE_ATTR(itlb_multihit
, 0444, cpu_show_itlb_multihit
, NULL
);
577 static DEVICE_ATTR(srbds
, 0444, cpu_show_srbds
, NULL
);
579 static struct attribute
*cpu_root_vulnerabilities_attrs
[] = {
580 &dev_attr_meltdown
.attr
,
581 &dev_attr_spectre_v1
.attr
,
582 &dev_attr_spectre_v2
.attr
,
583 &dev_attr_spec_store_bypass
.attr
,
586 &dev_attr_tsx_async_abort
.attr
,
587 &dev_attr_itlb_multihit
.attr
,
588 &dev_attr_srbds
.attr
,
592 static const struct attribute_group cpu_root_vulnerabilities_group
= {
593 .name
= "vulnerabilities",
594 .attrs
= cpu_root_vulnerabilities_attrs
,
597 static void __init
cpu_register_vulnerabilities(void)
599 if (sysfs_create_group(&cpu_subsys
.dev_root
->kobj
,
600 &cpu_root_vulnerabilities_group
))
601 pr_err("Unable to register CPU vulnerabilities\n");
605 static inline void cpu_register_vulnerabilities(void) { }
608 void __init
cpu_dev_init(void)
610 if (subsys_system_register(&cpu_subsys
, cpu_root_attr_groups
))
611 panic("Failed to register CPU subsystem");
613 cpu_dev_register_generic();
614 cpu_register_vulnerabilities();