1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * pseries CPU Hotplug infrastructure.
5 * Split out from arch/powerpc/platforms/pseries/setup.c
6 * arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
8 * Peter Bergner, IBM March 2001.
9 * Copyright (C) 2001 IBM.
10 * Dave Engebretsen, Peter Bergner, and
11 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
12 * Plus various changes from other IBM teams...
14 * Copyright (C) 2006 Michael Ellerman, IBM Corporation
17 #define pr_fmt(fmt) "pseries-hotplug-cpu: " fmt
19 #include <linux/kernel.h>
20 #include <linux/interrupt.h>
21 #include <linux/delay.h>
22 #include <linux/sched.h> /* for idle_task_exit */
23 #include <linux/sched/hotplug.h>
24 #include <linux/cpu.h>
26 #include <linux/slab.h>
29 #include <asm/firmware.h>
30 #include <asm/machdep.h>
31 #include <asm/vdso_datapage.h>
34 #include <asm/plpar_wrappers.h>
35 #include <asm/topology.h>
39 /* This version can't take the spinlock, because it never returns */
40 static int rtas_stop_self_token
= RTAS_UNKNOWN_SERVICE
;
42 static void rtas_stop_self(void)
44 static struct rtas_args args
;
48 BUG_ON(rtas_stop_self_token
== RTAS_UNKNOWN_SERVICE
);
50 printk("cpu %u (hwid %u) Ready to die...\n",
51 smp_processor_id(), hard_smp_processor_id());
53 rtas_call_unlocked(&args
, rtas_stop_self_token
, 0, 1, NULL
);
55 panic("Alas, I survived.\n");
58 static void pseries_cpu_offline_self(void)
60 unsigned int hwcpu
= hard_smp_processor_id();
69 unregister_slb_shadow(hwcpu
);
72 /* Should never get here... */
77 static int pseries_cpu_disable(void)
79 int cpu
= smp_processor_id();
81 set_cpu_online(cpu
, false);
82 vdso_data
->processorCount
--;
84 /*fix boot_cpuid here*/
85 if (cpu
== boot_cpuid
)
86 boot_cpuid
= cpumask_any(cpu_online_mask
);
88 /* FIXME: abstract this to not be platform specific later on */
90 xive_smp_disable_cpu();
92 xics_migrate_irqs_away();
94 cleanup_cpu_mmu_context();
100 * pseries_cpu_die: Wait for the cpu to die.
101 * @cpu: logical processor id of the CPU whose death we're awaiting.
103 * This function is called from the context of the thread which is performing
104 * the cpu-offline. Here we wait for long enough to allow the cpu in question
105 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
108 * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
111 static void pseries_cpu_die(unsigned int cpu
)
114 unsigned int pcpu
= get_hard_smp_processor_id(cpu
);
115 unsigned long timeout
= jiffies
+ msecs_to_jiffies(120000);
118 cpu_status
= smp_query_cpu_stopped(pcpu
);
119 if (cpu_status
== QCSS_STOPPED
||
120 cpu_status
== QCSS_HARDWARE_ERROR
)
123 if (time_after(jiffies
, timeout
)) {
124 pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
126 timeout
= jiffies
+ msecs_to_jiffies(120000);
132 if (cpu_status
== QCSS_HARDWARE_ERROR
) {
133 pr_warn("CPU %i (hwid %i) reported error while dying\n",
137 /* Isolation and deallocation are definitely done by
138 * drslot_chrp_cpu. If they were not they would be
139 * done here. Change isolate state to Isolate and
140 * change allocation-state to Unusable.
142 paca_ptrs
[cpu
]->cpu_start
= 0;
146 * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle
147 * here is that a cpu device node may represent up to two logical cpus
148 * in the SMT case. We must honor the assumption in other code that
149 * the logical ids for sibling SMT threads x and y are adjacent, such
150 * that x^1 == y and y^1 == x.
152 static int pseries_add_processor(struct device_node
*np
)
155 cpumask_var_t candidate_mask
, tmp
;
156 int err
= -ENOSPC
, len
, nthreads
, i
;
157 const __be32
*intserv
;
159 intserv
= of_get_property(np
, "ibm,ppc-interrupt-server#s", &len
);
163 zalloc_cpumask_var(&candidate_mask
, GFP_KERNEL
);
164 zalloc_cpumask_var(&tmp
, GFP_KERNEL
);
166 nthreads
= len
/ sizeof(u32
);
167 for (i
= 0; i
< nthreads
; i
++)
168 cpumask_set_cpu(i
, tmp
);
170 cpu_maps_update_begin();
172 BUG_ON(!cpumask_subset(cpu_present_mask
, cpu_possible_mask
));
174 /* Get a bitmap of unoccupied slots. */
175 cpumask_xor(candidate_mask
, cpu_possible_mask
, cpu_present_mask
);
176 if (cpumask_empty(candidate_mask
)) {
177 /* If we get here, it most likely means that NR_CPUS is
178 * less than the partition's max processors setting.
180 printk(KERN_ERR
"Cannot add cpu %pOF; this system configuration"
181 " supports %d logical cpus.\n", np
,
182 num_possible_cpus());
186 while (!cpumask_empty(tmp
))
187 if (cpumask_subset(tmp
, candidate_mask
))
188 /* Found a range where we can insert the new cpu(s) */
191 cpumask_shift_left(tmp
, tmp
, nthreads
);
193 if (cpumask_empty(tmp
)) {
194 printk(KERN_ERR
"Unable to find space in cpu_present_mask for"
195 " processor %pOFn with %d thread(s)\n", np
,
200 for_each_cpu(cpu
, tmp
) {
201 BUG_ON(cpu_present(cpu
));
202 set_cpu_present(cpu
, true);
203 set_hard_smp_processor_id(cpu
, be32_to_cpu(*intserv
++));
207 cpu_maps_update_done();
208 free_cpumask_var(candidate_mask
);
209 free_cpumask_var(tmp
);
214 * Update the present map for a cpu node which is going away, and set
215 * the hard id in the paca(s) to -1 to be consistent with boot time
216 * convention for non-present cpus.
218 static void pseries_remove_processor(struct device_node
*np
)
221 int len
, nthreads
, i
;
222 const __be32
*intserv
;
225 intserv
= of_get_property(np
, "ibm,ppc-interrupt-server#s", &len
);
229 nthreads
= len
/ sizeof(u32
);
231 cpu_maps_update_begin();
232 for (i
= 0; i
< nthreads
; i
++) {
233 thread
= be32_to_cpu(intserv
[i
]);
234 for_each_present_cpu(cpu
) {
235 if (get_hard_smp_processor_id(cpu
) != thread
)
237 BUG_ON(cpu_online(cpu
));
238 set_cpu_present(cpu
, false);
239 set_hard_smp_processor_id(cpu
, -1);
240 update_numa_cpu_lookup_table(cpu
, -1);
243 if (cpu
>= nr_cpu_ids
)
244 printk(KERN_WARNING
"Could not find cpu to remove "
245 "with physical id 0x%x\n", thread
);
247 cpu_maps_update_done();
250 static int dlpar_offline_cpu(struct device_node
*dn
)
254 int len
, nthreads
, i
;
255 const __be32
*intserv
;
258 intserv
= of_get_property(dn
, "ibm,ppc-interrupt-server#s", &len
);
262 nthreads
= len
/ sizeof(u32
);
264 cpu_maps_update_begin();
265 for (i
= 0; i
< nthreads
; i
++) {
266 thread
= be32_to_cpu(intserv
[i
]);
267 for_each_present_cpu(cpu
) {
268 if (get_hard_smp_processor_id(cpu
) != thread
)
271 if (!cpu_online(cpu
))
274 cpu_maps_update_done();
275 rc
= device_offline(get_cpu_device(cpu
));
278 cpu_maps_update_begin();
281 if (cpu
== num_possible_cpus()) {
282 pr_warn("Could not find cpu to offline with physical id 0x%x\n",
286 cpu_maps_update_done();
292 static int dlpar_online_cpu(struct device_node
*dn
)
296 int len
, nthreads
, i
;
297 const __be32
*intserv
;
300 intserv
= of_get_property(dn
, "ibm,ppc-interrupt-server#s", &len
);
304 nthreads
= len
/ sizeof(u32
);
306 cpu_maps_update_begin();
307 for (i
= 0; i
< nthreads
; i
++) {
308 thread
= be32_to_cpu(intserv
[i
]);
309 for_each_present_cpu(cpu
) {
310 if (get_hard_smp_processor_id(cpu
) != thread
)
312 cpu_maps_update_done();
313 find_and_online_cpu_nid(cpu
);
314 rc
= device_online(get_cpu_device(cpu
));
316 dlpar_offline_cpu(dn
);
319 cpu_maps_update_begin();
323 if (cpu
== num_possible_cpus())
324 printk(KERN_WARNING
"Could not find cpu to online "
325 "with physical id 0x%x\n", thread
);
327 cpu_maps_update_done();
334 static bool dlpar_cpu_exists(struct device_node
*parent
, u32 drc_index
)
336 struct device_node
*child
= NULL
;
341 /* Assume cpu doesn't exist */
344 for_each_child_of_node(parent
, child
) {
345 rc
= of_property_read_u32(child
, "ibm,my-drc-index",
350 if (my_drc_index
== drc_index
) {
360 static bool drc_info_valid_index(struct device_node
*parent
, u32 drc_index
)
362 struct property
*info
;
363 struct of_drc_info drc
;
368 info
= of_find_property(parent
, "ibm,drc-info", NULL
);
372 value
= of_prop_next_u32(info
, NULL
, &count
);
374 /* First value of ibm,drc-info is number of drc-info records */
380 for (i
= 0; i
< count
; i
++) {
381 if (of_read_drc_info_cell(&info
, &value
, &drc
))
384 if (strncmp(drc
.drc_type
, "CPU", 3))
387 if (drc_index
> drc
.last_drc_index
)
390 index
= drc
.drc_index_start
;
391 for (j
= 0; j
< drc
.num_sequential_elems
; j
++) {
392 if (drc_index
== index
)
395 index
+= drc
.sequential_inc
;
402 static bool valid_cpu_drc_index(struct device_node
*parent
, u32 drc_index
)
407 if (of_find_property(parent
, "ibm,drc-info", NULL
))
408 return drc_info_valid_index(parent
, drc_index
);
410 /* Note that the format of the ibm,drc-indexes array is
411 * the number of entries in the array followed by the array
412 * of drc values so we start looking at index = 1.
418 rc
= of_property_read_u32_index(parent
, "ibm,drc-indexes",
424 if (drc
== drc_index
)
431 static ssize_t
dlpar_cpu_add(u32 drc_index
)
433 struct device_node
*dn
, *parent
;
436 pr_debug("Attempting to add CPU, drc index: %x\n", drc_index
);
438 parent
= of_find_node_by_path("/cpus");
440 pr_warn("Failed to find CPU root node \"/cpus\"\n");
444 if (dlpar_cpu_exists(parent
, drc_index
)) {
446 pr_warn("CPU with drc index %x already exists\n", drc_index
);
450 if (!valid_cpu_drc_index(parent
, drc_index
)) {
452 pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index
);
456 rc
= dlpar_acquire_drc(drc_index
);
458 pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
464 dn
= dlpar_configure_connector(cpu_to_be32(drc_index
), parent
);
466 pr_warn("Failed call to configure-connector, drc index: %x\n",
468 dlpar_release_drc(drc_index
);
473 rc
= dlpar_attach_node(dn
, parent
);
475 /* Regardless we are done with parent now */
480 pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
483 rc
= dlpar_release_drc(drc_index
);
485 dlpar_free_cc_nodes(dn
);
490 rc
= dlpar_online_cpu(dn
);
493 pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
496 rc
= dlpar_detach_node(dn
);
498 dlpar_release_drc(drc_index
);
503 pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn
,
508 static ssize_t
dlpar_cpu_remove(struct device_node
*dn
, u32 drc_index
)
512 pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
515 rc
= dlpar_offline_cpu(dn
);
517 pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn
, rc
);
521 rc
= dlpar_release_drc(drc_index
);
523 pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
525 dlpar_online_cpu(dn
);
529 rc
= dlpar_detach_node(dn
);
533 pr_warn("Failed to detach CPU %pOFn, rc: %d", dn
, rc
);
535 rc
= dlpar_acquire_drc(drc_index
);
537 dlpar_online_cpu(dn
);
542 pr_debug("Successfully removed CPU, drc index: %x\n", drc_index
);
546 static struct device_node
*cpu_drc_index_to_dn(u32 drc_index
)
548 struct device_node
*dn
;
552 for_each_node_by_type(dn
, "cpu") {
553 rc
= of_property_read_u32(dn
, "ibm,my-drc-index", &my_index
);
557 if (my_index
== drc_index
)
564 static int dlpar_cpu_remove_by_index(u32 drc_index
)
566 struct device_node
*dn
;
569 dn
= cpu_drc_index_to_dn(drc_index
);
571 pr_warn("Cannot find CPU (drc index %x) to remove\n",
576 rc
= dlpar_cpu_remove(dn
, drc_index
);
581 static int find_dlpar_cpus_to_remove(u32
*cpu_drcs
, int cpus_to_remove
)
583 struct device_node
*dn
;
587 /* We want to find cpus_to_remove + 1 CPUs to ensure we do not
588 * remove the last CPU.
590 for_each_node_by_type(dn
, "cpu") {
593 if (cpus_found
> cpus_to_remove
) {
598 /* Note that cpus_found is always 1 ahead of the index
599 * into the cpu_drcs array, so we use cpus_found - 1
601 rc
= of_property_read_u32(dn
, "ibm,my-drc-index",
602 &cpu_drcs
[cpus_found
- 1]);
604 pr_warn("Error occurred getting drc-index for %pOFn\n",
611 if (cpus_found
< cpus_to_remove
) {
612 pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
613 cpus_found
, cpus_to_remove
);
614 } else if (cpus_found
== cpus_to_remove
) {
615 pr_warn("Cannot remove all CPUs\n");
621 static int dlpar_cpu_remove_by_count(u32 cpus_to_remove
)
625 int cpus_removed
= 0;
628 pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove
);
630 cpu_drcs
= kcalloc(cpus_to_remove
, sizeof(*cpu_drcs
), GFP_KERNEL
);
634 cpus_found
= find_dlpar_cpus_to_remove(cpu_drcs
, cpus_to_remove
);
635 if (cpus_found
<= cpus_to_remove
) {
640 for (i
= 0; i
< cpus_to_remove
; i
++) {
641 rc
= dlpar_cpu_remove_by_index(cpu_drcs
[i
]);
648 if (cpus_removed
!= cpus_to_remove
) {
649 pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
651 for (i
= 0; i
< cpus_removed
; i
++)
652 dlpar_cpu_add(cpu_drcs
[i
]);
663 static int find_drc_info_cpus_to_add(struct device_node
*cpus
,
664 struct property
*info
,
665 u32
*cpu_drcs
, u32 cpus_to_add
)
667 struct of_drc_info drc
;
669 u32 count
, drc_index
;
676 value
= of_prop_next_u32(info
, NULL
, &count
);
680 for (i
= 0; i
< count
; i
++) {
681 of_read_drc_info_cell(&info
, &value
, &drc
);
682 if (strncmp(drc
.drc_type
, "CPU", 3))
685 drc_index
= drc
.drc_index_start
;
686 for (j
= 0; j
< drc
.num_sequential_elems
; j
++) {
687 if (dlpar_cpu_exists(cpus
, drc_index
))
690 cpu_drcs
[cpus_found
++] = drc_index
;
692 if (cpus_found
== cpus_to_add
)
695 drc_index
+= drc
.sequential_inc
;
702 static int find_drc_index_cpus_to_add(struct device_node
*cpus
,
703 u32
*cpu_drcs
, u32 cpus_to_add
)
709 /* Search the ibm,drc-indexes array for possible CPU drcs to
710 * add. Note that the format of the ibm,drc-indexes array is
711 * the number of entries in the array followed by the array
712 * of drc values so we start looking at index = 1.
715 while (cpus_found
< cpus_to_add
) {
716 rc
= of_property_read_u32_index(cpus
, "ibm,drc-indexes",
717 index
++, &drc_index
);
722 if (dlpar_cpu_exists(cpus
, drc_index
))
725 cpu_drcs
[cpus_found
++] = drc_index
;
731 static int dlpar_cpu_add_by_count(u32 cpus_to_add
)
733 struct device_node
*parent
;
734 struct property
*info
;
740 pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add
);
742 cpu_drcs
= kcalloc(cpus_to_add
, sizeof(*cpu_drcs
), GFP_KERNEL
);
746 parent
= of_find_node_by_path("/cpus");
748 pr_warn("Could not find CPU root node in device tree\n");
753 info
= of_find_property(parent
, "ibm,drc-info", NULL
);
755 cpus_found
= find_drc_info_cpus_to_add(parent
, info
, cpu_drcs
, cpus_to_add
);
757 cpus_found
= find_drc_index_cpus_to_add(parent
, cpu_drcs
, cpus_to_add
);
761 if (cpus_found
< cpus_to_add
) {
762 pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
763 cpus_found
, cpus_to_add
);
768 for (i
= 0; i
< cpus_to_add
; i
++) {
769 rc
= dlpar_cpu_add(cpu_drcs
[i
]);
776 if (cpus_added
< cpus_to_add
) {
777 pr_warn("CPU hot-add failed, removing any added CPUs\n");
779 for (i
= 0; i
< cpus_added
; i
++)
780 dlpar_cpu_remove_by_index(cpu_drcs
[i
]);
791 int dlpar_cpu(struct pseries_hp_errorlog
*hp_elog
)
793 u32 count
, drc_index
;
796 count
= hp_elog
->_drc_u
.drc_count
;
797 drc_index
= hp_elog
->_drc_u
.drc_index
;
799 lock_device_hotplug();
801 switch (hp_elog
->action
) {
802 case PSERIES_HP_ELOG_ACTION_REMOVE
:
803 if (hp_elog
->id_type
== PSERIES_HP_ELOG_ID_DRC_COUNT
)
804 rc
= dlpar_cpu_remove_by_count(count
);
805 else if (hp_elog
->id_type
== PSERIES_HP_ELOG_ID_DRC_INDEX
)
806 rc
= dlpar_cpu_remove_by_index(drc_index
);
810 case PSERIES_HP_ELOG_ACTION_ADD
:
811 if (hp_elog
->id_type
== PSERIES_HP_ELOG_ID_DRC_COUNT
)
812 rc
= dlpar_cpu_add_by_count(count
);
813 else if (hp_elog
->id_type
== PSERIES_HP_ELOG_ID_DRC_INDEX
)
814 rc
= dlpar_cpu_add(drc_index
);
819 pr_err("Invalid action (%d) specified\n", hp_elog
->action
);
824 unlock_device_hotplug();
828 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
830 static ssize_t
dlpar_cpu_probe(const char *buf
, size_t count
)
835 rc
= kstrtou32(buf
, 0, &drc_index
);
839 rc
= dlpar_cpu_add(drc_index
);
841 return rc
? rc
: count
;
844 static ssize_t
dlpar_cpu_release(const char *buf
, size_t count
)
846 struct device_node
*dn
;
850 dn
= of_find_node_by_path(buf
);
854 rc
= of_property_read_u32(dn
, "ibm,my-drc-index", &drc_index
);
860 rc
= dlpar_cpu_remove(dn
, drc_index
);
863 return rc
? rc
: count
;
866 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
868 static int pseries_smp_notifier(struct notifier_block
*nb
,
869 unsigned long action
, void *data
)
871 struct of_reconfig_data
*rd
= data
;
875 case OF_RECONFIG_ATTACH_NODE
:
876 err
= pseries_add_processor(rd
->dn
);
878 case OF_RECONFIG_DETACH_NODE
:
879 pseries_remove_processor(rd
->dn
);
882 return notifier_from_errno(err
);
885 static struct notifier_block pseries_smp_nb
= {
886 .notifier_call
= pseries_smp_notifier
,
889 static int __init
pseries_cpu_hotplug_init(void)
893 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
894 ppc_md
.cpu_probe
= dlpar_cpu_probe
;
895 ppc_md
.cpu_release
= dlpar_cpu_release
;
896 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
898 rtas_stop_self_token
= rtas_token("stop-self");
899 qcss_tok
= rtas_token("query-cpu-stopped-state");
901 if (rtas_stop_self_token
== RTAS_UNKNOWN_SERVICE
||
902 qcss_tok
== RTAS_UNKNOWN_SERVICE
) {
903 printk(KERN_INFO
"CPU Hotplug not supported by firmware "
908 smp_ops
->cpu_offline_self
= pseries_cpu_offline_self
;
909 smp_ops
->cpu_disable
= pseries_cpu_disable
;
910 smp_ops
->cpu_die
= pseries_cpu_die
;
912 /* Processors can be added/removed only on LPAR */
913 if (firmware_has_feature(FW_FEATURE_LPAR
))
914 of_reconfig_notifier_register(&pseries_smp_nb
);
918 machine_arch_initcall(pseries
, pseries_cpu_hotplug_init
);