1 // SPDX-License-Identifier: GPL-2.0
3 * CPU subsystem support
6 #include <linux/kernel.h>
7 #include <linux/module.h>
8 #include <linux/init.h>
9 #include <linux/sched.h>
10 #include <linux/cpu.h>
11 #include <linux/topology.h>
12 #include <linux/device.h>
13 #include <linux/node.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
16 #include <linux/percpu.h>
17 #include <linux/acpi.h>
19 #include <linux/cpufeature.h>
20 #include <linux/tick.h>
21 #include <linux/pm_qos.h>
22 #include <linux/delay.h>
23 #include <linux/sched/isolation.h>
27 static DEFINE_PER_CPU(struct device
*, cpu_sys_devices
);
29 static int cpu_subsys_match(struct device
*dev
, const struct device_driver
*drv
)
31 /* ACPI style match is the only one that may succeed. */
32 if (acpi_driver_match_device(dev
, drv
))
38 #ifdef CONFIG_HOTPLUG_CPU
39 static void change_cpu_under_node(struct cpu
*cpu
,
40 unsigned int from_nid
, unsigned int to_nid
)
42 int cpuid
= cpu
->dev
.id
;
43 unregister_cpu_under_node(cpuid
, from_nid
);
44 register_cpu_under_node(cpuid
, to_nid
);
45 cpu
->node_id
= to_nid
;
48 static int cpu_subsys_online(struct device
*dev
)
50 struct cpu
*cpu
= container_of(dev
, struct cpu
, dev
);
56 from_nid
= cpu_to_node(cpuid
);
57 if (from_nid
== NUMA_NO_NODE
)
61 ret
= cpu_device_up(dev
);
64 * If -EBUSY is returned, it is likely that hotplug is temporarily
65 * disabled when cpu_hotplug_disable() was called. This condition is
66 * transient. So we retry after waiting for an exponentially
67 * increasing delay up to a total of at least 620ms as some PCI
68 * device initialization can take quite a while.
74 msleep(10 * (1 << retries
));
79 * When hot adding memory to memoryless node and enabling a cpu
80 * on the node, node number of the cpu may internally change.
82 to_nid
= cpu_to_node(cpuid
);
83 if (from_nid
!= to_nid
)
84 change_cpu_under_node(cpu
, from_nid
, to_nid
);
89 static int cpu_subsys_offline(struct device
*dev
)
91 return cpu_device_down(dev
);
94 void unregister_cpu(struct cpu
*cpu
)
96 int logical_cpu
= cpu
->dev
.id
;
98 set_cpu_enabled(logical_cpu
, false);
99 unregister_cpu_under_node(logical_cpu
, cpu_to_node(logical_cpu
));
101 device_unregister(&cpu
->dev
);
102 per_cpu(cpu_sys_devices
, logical_cpu
) = NULL
;
106 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
107 static ssize_t
cpu_probe_store(struct device
*dev
,
108 struct device_attribute
*attr
,
115 ret
= lock_device_hotplug_sysfs();
119 cnt
= arch_cpu_probe(buf
, count
);
121 unlock_device_hotplug();
125 static ssize_t
cpu_release_store(struct device
*dev
,
126 struct device_attribute
*attr
,
133 ret
= lock_device_hotplug_sysfs();
137 cnt
= arch_cpu_release(buf
, count
);
139 unlock_device_hotplug();
143 static DEVICE_ATTR(probe
, S_IWUSR
, NULL
, cpu_probe_store
);
144 static DEVICE_ATTR(release
, S_IWUSR
, NULL
, cpu_release_store
);
145 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
146 #endif /* CONFIG_HOTPLUG_CPU */
148 #ifdef CONFIG_CRASH_DUMP
149 #include <linux/kexec.h>
151 static ssize_t
crash_notes_show(struct device
*dev
,
152 struct device_attribute
*attr
,
155 struct cpu
*cpu
= container_of(dev
, struct cpu
, dev
);
156 unsigned long long addr
;
159 cpunum
= cpu
->dev
.id
;
162 * Might be reading other cpu's data based on which cpu read thread
163 * has been scheduled. But cpu data (memory) is allocated once during
164 * boot up and this data does not change there after. Hence this
165 * operation should be safe. No locking required.
167 addr
= per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes
, cpunum
));
169 return sysfs_emit(buf
, "%llx\n", addr
);
171 static DEVICE_ATTR_ADMIN_RO(crash_notes
);
173 static ssize_t
crash_notes_size_show(struct device
*dev
,
174 struct device_attribute
*attr
,
177 return sysfs_emit(buf
, "%zu\n", sizeof(note_buf_t
));
179 static DEVICE_ATTR_ADMIN_RO(crash_notes_size
);
181 static struct attribute
*crash_note_cpu_attrs
[] = {
182 &dev_attr_crash_notes
.attr
,
183 &dev_attr_crash_notes_size
.attr
,
187 static const struct attribute_group crash_note_cpu_attr_group
= {
188 .attrs
= crash_note_cpu_attrs
,
192 static const struct attribute_group
*common_cpu_attr_groups
[] = {
193 #ifdef CONFIG_CRASH_DUMP
194 &crash_note_cpu_attr_group
,
199 static const struct attribute_group
*hotplugable_cpu_attr_groups
[] = {
200 #ifdef CONFIG_CRASH_DUMP
201 &crash_note_cpu_attr_group
,
207 * Print cpu online, possible, present, and system maps
211 struct device_attribute attr
;
212 const struct cpumask
*const map
;
215 static ssize_t
show_cpus_attr(struct device
*dev
,
216 struct device_attribute
*attr
,
219 struct cpu_attr
*ca
= container_of(attr
, struct cpu_attr
, attr
);
221 return cpumap_print_to_pagebuf(true, buf
, ca
->map
);
224 #define _CPU_ATTR(name, map) \
225 { __ATTR(name, 0444, show_cpus_attr, NULL), map }
227 /* Keep in sync with cpu_subsys_attrs */
228 static struct cpu_attr cpu_attrs
[] = {
229 _CPU_ATTR(online
, &__cpu_online_mask
),
230 _CPU_ATTR(possible
, &__cpu_possible_mask
),
231 _CPU_ATTR(present
, &__cpu_present_mask
),
235 * Print values for NR_CPUS and offlined cpus
237 static ssize_t
print_cpus_kernel_max(struct device
*dev
,
238 struct device_attribute
*attr
, char *buf
)
240 return sysfs_emit(buf
, "%d\n", NR_CPUS
- 1);
242 static DEVICE_ATTR(kernel_max
, 0444, print_cpus_kernel_max
, NULL
);
244 /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
245 unsigned int total_cpus
;
247 static ssize_t
print_cpus_offline(struct device
*dev
,
248 struct device_attribute
*attr
, char *buf
)
251 cpumask_var_t offline
;
253 /* display offline cpus < nr_cpu_ids */
254 if (!alloc_cpumask_var(&offline
, GFP_KERNEL
))
256 cpumask_andnot(offline
, cpu_possible_mask
, cpu_online_mask
);
257 len
+= sysfs_emit_at(buf
, len
, "%*pbl", cpumask_pr_args(offline
));
258 free_cpumask_var(offline
);
260 /* display offline cpus >= nr_cpu_ids */
261 if (total_cpus
&& nr_cpu_ids
< total_cpus
) {
262 len
+= sysfs_emit_at(buf
, len
, ",");
264 if (nr_cpu_ids
== total_cpus
-1)
265 len
+= sysfs_emit_at(buf
, len
, "%u", nr_cpu_ids
);
267 len
+= sysfs_emit_at(buf
, len
, "%u-%d",
268 nr_cpu_ids
, total_cpus
- 1);
271 len
+= sysfs_emit_at(buf
, len
, "\n");
275 static DEVICE_ATTR(offline
, 0444, print_cpus_offline
, NULL
);
277 static ssize_t
print_cpus_enabled(struct device
*dev
,
278 struct device_attribute
*attr
, char *buf
)
280 return sysfs_emit(buf
, "%*pbl\n", cpumask_pr_args(cpu_enabled_mask
));
282 static DEVICE_ATTR(enabled
, 0444, print_cpus_enabled
, NULL
);
284 static ssize_t
print_cpus_isolated(struct device
*dev
,
285 struct device_attribute
*attr
, char *buf
)
288 cpumask_var_t isolated
;
290 if (!alloc_cpumask_var(&isolated
, GFP_KERNEL
))
293 cpumask_andnot(isolated
, cpu_possible_mask
,
294 housekeeping_cpumask(HK_TYPE_DOMAIN
));
295 len
= sysfs_emit(buf
, "%*pbl\n", cpumask_pr_args(isolated
));
297 free_cpumask_var(isolated
);
301 static DEVICE_ATTR(isolated
, 0444, print_cpus_isolated
, NULL
);
303 #ifdef CONFIG_NO_HZ_FULL
304 static ssize_t
print_cpus_nohz_full(struct device
*dev
,
305 struct device_attribute
*attr
, char *buf
)
307 return sysfs_emit(buf
, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask
));
309 static DEVICE_ATTR(nohz_full
, 0444, print_cpus_nohz_full
, NULL
);
312 #ifdef CONFIG_CRASH_HOTPLUG
313 static ssize_t
crash_hotplug_show(struct device
*dev
,
314 struct device_attribute
*attr
,
317 return sysfs_emit(buf
, "%d\n", crash_check_hotplug_support());
319 static DEVICE_ATTR_RO(crash_hotplug
);
322 static void cpu_device_release(struct device
*dev
)
325 * This is an empty function to prevent the driver core from spitting a
326 * warning at us. Yes, I know this is directly opposite of what the
327 * documentation for the driver core and kobjects say, and the author
328 * of this code has already been publically ridiculed for doing
329 * something as foolish as this. However, at this point in time, it is
330 * the only way to handle the issue of statically allocated cpu
331 * devices. The different architectures will have their cpu device
332 * code reworked to properly handle this in the near future, so this
333 * function will then be changed to correctly free up the memory held
336 * Never copy this way of doing things, or you too will be made fun of
337 * on the linux-kernel list, you have been warned.
341 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
342 static ssize_t
print_cpu_modalias(struct device
*dev
,
343 struct device_attribute
*attr
,
349 len
+= sysfs_emit_at(buf
, len
,
350 "cpu:type:" CPU_FEATURE_TYPEFMT
":feature:",
351 CPU_FEATURE_TYPEVAL
);
353 for (i
= 0; i
< MAX_CPU_FEATURES
; i
++)
354 if (cpu_have_feature(i
)) {
355 if (len
+ sizeof(",XXXX\n") >= PAGE_SIZE
) {
356 WARN(1, "CPU features overflow page\n");
359 len
+= sysfs_emit_at(buf
, len
, ",%04X", i
);
361 len
+= sysfs_emit_at(buf
, len
, "\n");
365 static int cpu_uevent(const struct device
*dev
, struct kobj_uevent_env
*env
)
367 char *buf
= kzalloc(PAGE_SIZE
, GFP_KERNEL
);
369 print_cpu_modalias(NULL
, NULL
, buf
);
370 add_uevent_var(env
, "MODALIAS=%s", buf
);
377 const struct bus_type cpu_subsys
= {
380 .match
= cpu_subsys_match
,
381 #ifdef CONFIG_HOTPLUG_CPU
382 .online
= cpu_subsys_online
,
383 .offline
= cpu_subsys_offline
,
385 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
386 .uevent
= cpu_uevent
,
389 EXPORT_SYMBOL_GPL(cpu_subsys
);
392 * register_cpu - Setup a sysfs device for a CPU.
393 * @cpu - cpu->hotpluggable field set to 1 will generate a control file in
394 * sysfs for this CPU.
395 * @num - CPU number to use when creating the device.
397 * Initialize and register the CPU device.
399 int register_cpu(struct cpu
*cpu
, int num
)
403 cpu
->node_id
= cpu_to_node(num
);
404 memset(&cpu
->dev
, 0x00, sizeof(struct device
));
406 cpu
->dev
.bus
= &cpu_subsys
;
407 cpu
->dev
.release
= cpu_device_release
;
408 cpu
->dev
.offline_disabled
= !cpu
->hotpluggable
;
409 cpu
->dev
.offline
= !cpu_online(num
);
410 cpu
->dev
.of_node
= of_get_cpu_node(num
, NULL
);
411 cpu
->dev
.groups
= common_cpu_attr_groups
;
412 if (cpu
->hotpluggable
)
413 cpu
->dev
.groups
= hotplugable_cpu_attr_groups
;
414 error
= device_register(&cpu
->dev
);
416 put_device(&cpu
->dev
);
420 per_cpu(cpu_sys_devices
, num
) = &cpu
->dev
;
421 register_cpu_under_node(num
, cpu_to_node(num
));
422 dev_pm_qos_expose_latency_limit(&cpu
->dev
,
423 PM_QOS_RESUME_LATENCY_NO_CONSTRAINT
);
424 set_cpu_enabled(num
, true);
429 struct device
*get_cpu_device(unsigned int cpu
)
431 if (cpu
< nr_cpu_ids
&& cpu_possible(cpu
))
432 return per_cpu(cpu_sys_devices
, cpu
);
436 EXPORT_SYMBOL_GPL(get_cpu_device
);
438 static void device_create_release(struct device
*dev
)
444 static struct device
*
445 __cpu_device_create(struct device
*parent
, void *drvdata
,
446 const struct attribute_group
**groups
,
447 const char *fmt
, va_list args
)
449 struct device
*dev
= NULL
;
450 int retval
= -ENOMEM
;
452 dev
= kzalloc(sizeof(*dev
), GFP_KERNEL
);
456 device_initialize(dev
);
457 dev
->parent
= parent
;
458 dev
->groups
= groups
;
459 dev
->release
= device_create_release
;
460 device_set_pm_not_required(dev
);
461 dev_set_drvdata(dev
, drvdata
);
463 retval
= kobject_set_name_vargs(&dev
->kobj
, fmt
, args
);
467 retval
= device_add(dev
);
475 return ERR_PTR(retval
);
478 struct device
*cpu_device_create(struct device
*parent
, void *drvdata
,
479 const struct attribute_group
**groups
,
480 const char *fmt
, ...)
485 va_start(vargs
, fmt
);
486 dev
= __cpu_device_create(parent
, drvdata
, groups
, fmt
, vargs
);
490 EXPORT_SYMBOL_GPL(cpu_device_create
);
492 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
493 static DEVICE_ATTR(modalias
, 0444, print_cpu_modalias
, NULL
);
496 static struct attribute
*cpu_root_attrs
[] = {
497 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
498 &dev_attr_probe
.attr
,
499 &dev_attr_release
.attr
,
501 &cpu_attrs
[0].attr
.attr
,
502 &cpu_attrs
[1].attr
.attr
,
503 &cpu_attrs
[2].attr
.attr
,
504 &dev_attr_kernel_max
.attr
,
505 &dev_attr_offline
.attr
,
506 &dev_attr_enabled
.attr
,
507 &dev_attr_isolated
.attr
,
508 #ifdef CONFIG_NO_HZ_FULL
509 &dev_attr_nohz_full
.attr
,
511 #ifdef CONFIG_CRASH_HOTPLUG
512 &dev_attr_crash_hotplug
.attr
,
514 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
515 &dev_attr_modalias
.attr
,
520 static const struct attribute_group cpu_root_attr_group
= {
521 .attrs
= cpu_root_attrs
,
524 static const struct attribute_group
*cpu_root_attr_groups
[] = {
525 &cpu_root_attr_group
,
529 bool cpu_is_hotpluggable(unsigned int cpu
)
531 struct device
*dev
= get_cpu_device(cpu
);
532 return dev
&& container_of(dev
, struct cpu
, dev
)->hotpluggable
533 && tick_nohz_cpu_hotpluggable(cpu
);
535 EXPORT_SYMBOL_GPL(cpu_is_hotpluggable
);
537 #ifdef CONFIG_GENERIC_CPU_DEVICES
538 DEFINE_PER_CPU(struct cpu
, cpu_devices
);
540 bool __weak
arch_cpu_is_hotpluggable(int cpu
)
545 int __weak
arch_register_cpu(int cpu
)
547 struct cpu
*c
= &per_cpu(cpu_devices
, cpu
);
549 c
->hotpluggable
= arch_cpu_is_hotpluggable(cpu
);
551 return register_cpu(c
, cpu
);
554 #ifdef CONFIG_HOTPLUG_CPU
555 void __weak
arch_unregister_cpu(int num
)
557 unregister_cpu(&per_cpu(cpu_devices
, num
));
559 #endif /* CONFIG_HOTPLUG_CPU */
560 #endif /* CONFIG_GENERIC_CPU_DEVICES */
562 static void __init
cpu_dev_register_generic(void)
566 if (!IS_ENABLED(CONFIG_GENERIC_CPU_DEVICES
))
569 for_each_present_cpu(i
) {
570 ret
= arch_register_cpu(i
);
571 if (ret
&& ret
!= -EPROBE_DEFER
)
572 pr_warn("register_cpu %d failed (%d)\n", i
, ret
);
576 #ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
577 static ssize_t
cpu_show_not_affected(struct device
*dev
,
578 struct device_attribute
*attr
, char *buf
)
580 return sysfs_emit(buf
, "Not affected\n");
583 #define CPU_SHOW_VULN_FALLBACK(func) \
584 ssize_t cpu_show_##func(struct device *, \
585 struct device_attribute *, char *) \
586 __attribute__((weak, alias("cpu_show_not_affected")))
588 CPU_SHOW_VULN_FALLBACK(meltdown
);
589 CPU_SHOW_VULN_FALLBACK(spectre_v1
);
590 CPU_SHOW_VULN_FALLBACK(spectre_v2
);
591 CPU_SHOW_VULN_FALLBACK(spec_store_bypass
);
592 CPU_SHOW_VULN_FALLBACK(l1tf
);
593 CPU_SHOW_VULN_FALLBACK(mds
);
594 CPU_SHOW_VULN_FALLBACK(tsx_async_abort
);
595 CPU_SHOW_VULN_FALLBACK(itlb_multihit
);
596 CPU_SHOW_VULN_FALLBACK(srbds
);
597 CPU_SHOW_VULN_FALLBACK(mmio_stale_data
);
598 CPU_SHOW_VULN_FALLBACK(retbleed
);
599 CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow
);
600 CPU_SHOW_VULN_FALLBACK(gds
);
601 CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling
);
603 static DEVICE_ATTR(meltdown
, 0444, cpu_show_meltdown
, NULL
);
604 static DEVICE_ATTR(spectre_v1
, 0444, cpu_show_spectre_v1
, NULL
);
605 static DEVICE_ATTR(spectre_v2
, 0444, cpu_show_spectre_v2
, NULL
);
606 static DEVICE_ATTR(spec_store_bypass
, 0444, cpu_show_spec_store_bypass
, NULL
);
607 static DEVICE_ATTR(l1tf
, 0444, cpu_show_l1tf
, NULL
);
608 static DEVICE_ATTR(mds
, 0444, cpu_show_mds
, NULL
);
609 static DEVICE_ATTR(tsx_async_abort
, 0444, cpu_show_tsx_async_abort
, NULL
);
610 static DEVICE_ATTR(itlb_multihit
, 0444, cpu_show_itlb_multihit
, NULL
);
611 static DEVICE_ATTR(srbds
, 0444, cpu_show_srbds
, NULL
);
612 static DEVICE_ATTR(mmio_stale_data
, 0444, cpu_show_mmio_stale_data
, NULL
);
613 static DEVICE_ATTR(retbleed
, 0444, cpu_show_retbleed
, NULL
);
614 static DEVICE_ATTR(spec_rstack_overflow
, 0444, cpu_show_spec_rstack_overflow
, NULL
);
615 static DEVICE_ATTR(gather_data_sampling
, 0444, cpu_show_gds
, NULL
);
616 static DEVICE_ATTR(reg_file_data_sampling
, 0444, cpu_show_reg_file_data_sampling
, NULL
);
618 static struct attribute
*cpu_root_vulnerabilities_attrs
[] = {
619 &dev_attr_meltdown
.attr
,
620 &dev_attr_spectre_v1
.attr
,
621 &dev_attr_spectre_v2
.attr
,
622 &dev_attr_spec_store_bypass
.attr
,
625 &dev_attr_tsx_async_abort
.attr
,
626 &dev_attr_itlb_multihit
.attr
,
627 &dev_attr_srbds
.attr
,
628 &dev_attr_mmio_stale_data
.attr
,
629 &dev_attr_retbleed
.attr
,
630 &dev_attr_spec_rstack_overflow
.attr
,
631 &dev_attr_gather_data_sampling
.attr
,
632 &dev_attr_reg_file_data_sampling
.attr
,
636 static const struct attribute_group cpu_root_vulnerabilities_group
= {
637 .name
= "vulnerabilities",
638 .attrs
= cpu_root_vulnerabilities_attrs
,
641 static void __init
cpu_register_vulnerabilities(void)
643 struct device
*dev
= bus_get_dev_root(&cpu_subsys
);
646 if (sysfs_create_group(&dev
->kobj
, &cpu_root_vulnerabilities_group
))
647 pr_err("Unable to register CPU vulnerabilities\n");
653 static inline void cpu_register_vulnerabilities(void) { }
656 void __init
cpu_dev_init(void)
658 if (subsys_system_register(&cpu_subsys
, cpu_root_attr_groups
))
659 panic("Failed to register CPU subsystem");
661 cpu_dev_register_generic();
662 cpu_register_vulnerabilities();