1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
6 * deal of code from the sparc and intel versions.
8 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
10 * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
11 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
16 #include <linux/kernel.h>
17 #include <linux/export.h>
18 #include <linux/sched/mm.h>
19 #include <linux/sched/task_stack.h>
20 #include <linux/sched/topology.h>
21 #include <linux/smp.h>
22 #include <linux/interrupt.h>
23 #include <linux/delay.h>
24 #include <linux/init.h>
25 #include <linux/spinlock.h>
26 #include <linux/cache.h>
27 #include <linux/err.h>
28 #include <linux/device.h>
29 #include <linux/cpu.h>
30 #include <linux/notifier.h>
31 #include <linux/topology.h>
32 #include <linux/profile.h>
33 #include <linux/processor.h>
34 #include <linux/random.h>
35 #include <linux/stackprotector.h>
36 #include <linux/pgtable.h>
37 #include <linux/clockchips.h>
38 #include <linux/kexec.h>
40 #include <asm/ptrace.h>
41 #include <linux/atomic.h>
43 #include <asm/hw_irq.h>
44 #include <asm/kvm_ppc.h>
45 #include <asm/dbell.h>
49 #include <asm/machdep.h>
50 #include <asm/mmu_context.h>
51 #include <asm/cputhreads.h>
52 #include <asm/cputable.h>
54 #include <asm/vdso_datapage.h>
59 #include <asm/debug.h>
60 #include <asm/cpu_has_feature.h>
61 #include <asm/ftrace.h>
63 #include <asm/fadump.h>
64 #include <asm/systemcfg.h>
66 #include <trace/events/ipi.h>
70 #define DBG(fmt...) udbg_printf(fmt)
75 #ifdef CONFIG_HOTPLUG_CPU
76 /* State of each CPU during hotplug phases */
77 static DEFINE_PER_CPU(int, cpu_state
) = { 0 };
80 struct task_struct
*secondary_current
;
81 bool has_big_cores __ro_after_init
;
82 bool coregroup_enabled __ro_after_init
;
83 bool thread_group_shares_l2 __ro_after_init
;
84 bool thread_group_shares_l3 __ro_after_init
;
86 DEFINE_PER_CPU(cpumask_var_t
, cpu_sibling_map
);
87 DEFINE_PER_CPU(cpumask_var_t
, cpu_smallcore_map
);
88 DEFINE_PER_CPU(cpumask_var_t
, cpu_l2_cache_map
);
89 DEFINE_PER_CPU(cpumask_var_t
, cpu_core_map
);
90 static DEFINE_PER_CPU(cpumask_var_t
, cpu_coregroup_map
);
92 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map
);
93 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map
);
94 EXPORT_PER_CPU_SYMBOL(cpu_core_map
);
95 EXPORT_SYMBOL_GPL(has_big_cores
);
97 #define MAX_THREAD_LIST_SIZE 8
98 #define THREAD_GROUP_SHARE_L1 1
99 #define THREAD_GROUP_SHARE_L2_L3 2
100 struct thread_groups
{
101 unsigned int property
;
102 unsigned int nr_groups
;
103 unsigned int threads_per_group
;
104 unsigned int thread_list
[MAX_THREAD_LIST_SIZE
];
107 /* Maximum number of properties that groups of threads within a core can share */
108 #define MAX_THREAD_GROUP_PROPERTIES 2
110 struct thread_groups_list
{
111 unsigned int nr_properties
;
112 struct thread_groups property_tgs
[MAX_THREAD_GROUP_PROPERTIES
];
115 static struct thread_groups_list tgl
[NR_CPUS
] __initdata
;
117 * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
118 * the set its siblings that share the L1-cache.
120 DEFINE_PER_CPU(cpumask_var_t
, thread_group_l1_cache_map
);
123 * On some big-cores system, thread_group_l2_cache_map for each CPU
124 * corresponds to the set its siblings within the core that share the
127 DEFINE_PER_CPU(cpumask_var_t
, thread_group_l2_cache_map
);
130 * On P10, thread_group_l3_cache_map for each CPU is equal to the
131 * thread_group_l2_cache_map
133 DEFINE_PER_CPU(cpumask_var_t
, thread_group_l3_cache_map
);
135 /* SMP operations for this machine */
136 struct smp_ops_t
*smp_ops
;
138 /* Can't be static due to PowerMac hackery */
139 volatile unsigned int cpu_callin_map
[NR_CPUS
];
141 int smt_enabled_at_boot
= 1;
144 * Returns 1 if the specified cpu should be brought up during boot.
145 * Used to inhibit booting threads if they've been disabled or
146 * limited on the command line
148 int smp_generic_cpu_bootable(unsigned int nr
)
150 /* Special case - we inhibit secondary thread startup
151 * during boot if the user requests it.
153 if (system_state
< SYSTEM_RUNNING
&& cpu_has_feature(CPU_FTR_SMT
)) {
154 if (!smt_enabled_at_boot
&& cpu_thread_in_core(nr
) != 0)
156 if (smt_enabled_at_boot
157 && cpu_thread_in_core(nr
) >= smt_enabled_at_boot
)
166 int smp_generic_kick_cpu(int nr
)
168 if (nr
< 0 || nr
>= nr_cpu_ids
)
172 * The processor is currently spinning, waiting for the
173 * cpu_start field to become non-zero After we set cpu_start,
174 * the processor will continue on to secondary_start
176 if (!paca_ptrs
[nr
]->cpu_start
) {
177 paca_ptrs
[nr
]->cpu_start
= 1;
182 #ifdef CONFIG_HOTPLUG_CPU
184 * Ok it's not there, so it might be soft-unplugged, let's
185 * try to bring it back
187 generic_set_cpu_up(nr
);
189 smp_send_reschedule(nr
);
190 #endif /* CONFIG_HOTPLUG_CPU */
194 #endif /* CONFIG_PPC64 */
196 static irqreturn_t
call_function_action(int irq
, void *data
)
198 generic_smp_call_function_interrupt();
202 static irqreturn_t
reschedule_action(int irq
, void *data
)
208 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
209 static irqreturn_t
tick_broadcast_ipi_action(int irq
, void *data
)
211 timer_broadcast_interrupt();
216 #ifdef CONFIG_NMI_IPI
217 static irqreturn_t
nmi_ipi_action(int irq
, void *data
)
219 smp_handle_nmi_ipi(get_irq_regs());
224 static irq_handler_t smp_ipi_action
[] = {
225 [PPC_MSG_CALL_FUNCTION
] = call_function_action
,
226 [PPC_MSG_RESCHEDULE
] = reschedule_action
,
227 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
228 [PPC_MSG_TICK_BROADCAST
] = tick_broadcast_ipi_action
,
230 #ifdef CONFIG_NMI_IPI
231 [PPC_MSG_NMI_IPI
] = nmi_ipi_action
,
236 * The NMI IPI is a fallback and not truly non-maskable. It is simpler
237 * than going through the call function infrastructure, and strongly
238 * serialized, so it is more appropriate for debugging.
240 const char *smp_ipi_name
[] = {
241 [PPC_MSG_CALL_FUNCTION
] = "ipi call function",
242 [PPC_MSG_RESCHEDULE
] = "ipi reschedule",
243 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
244 [PPC_MSG_TICK_BROADCAST
] = "ipi tick-broadcast",
246 #ifdef CONFIG_NMI_IPI
247 [PPC_MSG_NMI_IPI
] = "nmi ipi",
251 /* optional function to request ipi, for controllers with >= 4 ipis */
252 int smp_request_message_ipi(int virq
, int msg
)
256 if (msg
< 0 || msg
> PPC_MSG_NMI_IPI
)
258 #ifndef CONFIG_NMI_IPI
259 if (msg
== PPC_MSG_NMI_IPI
)
263 err
= request_irq(virq
, smp_ipi_action
[msg
],
264 IRQF_PERCPU
| IRQF_NO_THREAD
| IRQF_NO_SUSPEND
,
265 smp_ipi_name
[msg
], NULL
);
266 WARN(err
< 0, "unable to request_irq %d for %s (rc %d)\n",
267 virq
, smp_ipi_name
[msg
], err
);
272 #ifdef CONFIG_PPC_SMP_MUXED_IPI
273 struct cpu_messages
{
274 long messages
; /* current messages */
276 static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages
, ipi_message
);
278 void smp_muxed_ipi_set_message(int cpu
, int msg
)
280 struct cpu_messages
*info
= &per_cpu(ipi_message
, cpu
);
281 char *message
= (char *)&info
->messages
;
284 * Order previous accesses before accesses in the IPI handler.
287 WRITE_ONCE(message
[msg
], 1);
290 void smp_muxed_ipi_message_pass(int cpu
, int msg
)
292 smp_muxed_ipi_set_message(cpu
, msg
);
295 * cause_ipi functions are required to include a full barrier
296 * before doing whatever causes the IPI.
298 smp_ops
->cause_ipi(cpu
);
301 #ifdef __BIG_ENDIAN__
302 #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
304 #define IPI_MESSAGE(A) (1uL << (8 * (A)))
307 irqreturn_t
smp_ipi_demux(void)
309 mb(); /* order any irq clear */
311 return smp_ipi_demux_relaxed();
314 /* sync-free variant. Callers should ensure synchronization */
315 irqreturn_t
smp_ipi_demux_relaxed(void)
317 struct cpu_messages
*info
;
320 info
= this_cpu_ptr(&ipi_message
);
322 all
= xchg(&info
->messages
, 0);
323 #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
325 * Must check for PPC_MSG_RM_HOST_ACTION messages
326 * before PPC_MSG_CALL_FUNCTION messages because when
327 * a VM is destroyed, we call kick_all_cpus_sync()
328 * to ensure that any pending PPC_MSG_RM_HOST_ACTION
329 * messages have completed before we free any VCPUs.
331 if (all
& IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION
))
332 kvmppc_xics_ipi_action();
334 if (all
& IPI_MESSAGE(PPC_MSG_CALL_FUNCTION
))
335 generic_smp_call_function_interrupt();
336 if (all
& IPI_MESSAGE(PPC_MSG_RESCHEDULE
))
338 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
339 if (all
& IPI_MESSAGE(PPC_MSG_TICK_BROADCAST
))
340 timer_broadcast_interrupt();
342 #ifdef CONFIG_NMI_IPI
343 if (all
& IPI_MESSAGE(PPC_MSG_NMI_IPI
))
344 nmi_ipi_action(0, NULL
);
346 } while (READ_ONCE(info
->messages
));
350 #endif /* CONFIG_PPC_SMP_MUXED_IPI */
352 static inline void do_message_pass(int cpu
, int msg
)
354 if (smp_ops
->message_pass
)
355 smp_ops
->message_pass(cpu
, msg
);
356 #ifdef CONFIG_PPC_SMP_MUXED_IPI
358 smp_muxed_ipi_message_pass(cpu
, msg
);
362 void arch_smp_send_reschedule(int cpu
)
365 do_message_pass(cpu
, PPC_MSG_RESCHEDULE
);
367 EXPORT_SYMBOL_GPL(arch_smp_send_reschedule
);
369 void arch_send_call_function_single_ipi(int cpu
)
371 do_message_pass(cpu
, PPC_MSG_CALL_FUNCTION
);
374 void arch_send_call_function_ipi_mask(const struct cpumask
*mask
)
378 for_each_cpu(cpu
, mask
)
379 do_message_pass(cpu
, PPC_MSG_CALL_FUNCTION
);
382 #ifdef CONFIG_NMI_IPI
387 * NMI IPIs may not be recoverable, so should not be used as ongoing part of
388 * a running system. They can be used for crash, debug, halt/reboot, etc.
390 * The IPI call waits with interrupts disabled until all targets enter the
391 * NMI handler, then returns. Subsequent IPIs can be issued before targets
392 * have returned from their handlers, so there is no guarantee about
393 * concurrency or re-entrancy.
395 * A new NMI can be issued before all targets exit the handler.
397 * The IPI call may time out without all targets entering the NMI handler.
398 * In that case, there is some logic to recover (and ignore subsequent
399 * NMI interrupts that may eventually be raised), but the platform interrupt
400 * handler may not be able to distinguish this from other exception causes,
401 * which may cause a crash.
404 static atomic_t __nmi_ipi_lock
= ATOMIC_INIT(0);
405 static struct cpumask nmi_ipi_pending_mask
;
406 static bool nmi_ipi_busy
= false;
407 static void (*nmi_ipi_function
)(struct pt_regs
*) = NULL
;
409 noinstr
static void nmi_ipi_lock_start(unsigned long *flags
)
411 raw_local_irq_save(*flags
);
413 while (raw_atomic_cmpxchg(&__nmi_ipi_lock
, 0, 1) == 1) {
414 raw_local_irq_restore(*flags
);
415 spin_until_cond(raw_atomic_read(&__nmi_ipi_lock
) == 0);
416 raw_local_irq_save(*flags
);
421 noinstr
static void nmi_ipi_lock(void)
423 while (raw_atomic_cmpxchg(&__nmi_ipi_lock
, 0, 1) == 1)
424 spin_until_cond(raw_atomic_read(&__nmi_ipi_lock
) == 0);
427 noinstr
static void nmi_ipi_unlock(void)
430 WARN_ON(raw_atomic_read(&__nmi_ipi_lock
) != 1);
431 raw_atomic_set(&__nmi_ipi_lock
, 0);
434 noinstr
static void nmi_ipi_unlock_end(unsigned long *flags
)
437 raw_local_irq_restore(*flags
);
441 * Platform NMI handler calls this to ack
443 noinstr
int smp_handle_nmi_ipi(struct pt_regs
*regs
)
445 void (*fn
)(struct pt_regs
*) = NULL
;
447 int me
= raw_smp_processor_id();
451 * Unexpected NMIs are possible here because the interrupt may not
452 * be able to distinguish NMI IPIs from other types of NMIs, or
453 * because the caller may have timed out.
455 nmi_ipi_lock_start(&flags
);
456 if (cpumask_test_cpu(me
, &nmi_ipi_pending_mask
)) {
457 cpumask_clear_cpu(me
, &nmi_ipi_pending_mask
);
458 fn
= READ_ONCE(nmi_ipi_function
);
462 nmi_ipi_unlock_end(&flags
);
470 static void do_smp_send_nmi_ipi(int cpu
, bool safe
)
472 if (!safe
&& smp_ops
->cause_nmi_ipi
&& smp_ops
->cause_nmi_ipi(cpu
))
476 do_message_pass(cpu
, PPC_MSG_NMI_IPI
);
480 for_each_online_cpu(c
) {
481 if (c
== raw_smp_processor_id())
483 do_message_pass(c
, PPC_MSG_NMI_IPI
);
489 * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
490 * - fn is the target callback function.
491 * - delay_us > 0 is the delay before giving up waiting for targets to
492 * begin executing the handler, == 0 specifies indefinite delay.
494 static int __smp_send_nmi_ipi(int cpu
, void (*fn
)(struct pt_regs
*),
495 u64 delay_us
, bool safe
)
498 int me
= raw_smp_processor_id();
502 BUG_ON(cpu
< 0 && cpu
!= NMI_IPI_ALL_OTHERS
);
504 if (unlikely(!smp_ops
))
507 nmi_ipi_lock_start(&flags
);
508 while (nmi_ipi_busy
) {
509 nmi_ipi_unlock_end(&flags
);
510 spin_until_cond(!nmi_ipi_busy
);
511 nmi_ipi_lock_start(&flags
);
514 nmi_ipi_function
= fn
;
516 WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask
));
520 cpumask_copy(&nmi_ipi_pending_mask
, cpu_online_mask
);
521 cpumask_clear_cpu(me
, &nmi_ipi_pending_mask
);
523 cpumask_set_cpu(cpu
, &nmi_ipi_pending_mask
);
528 /* Interrupts remain hard disabled */
530 do_smp_send_nmi_ipi(cpu
, safe
);
533 /* nmi_ipi_busy is set here, so unlock/lock is okay */
534 while (!cpumask_empty(&nmi_ipi_pending_mask
)) {
545 if (!cpumask_empty(&nmi_ipi_pending_mask
)) {
546 /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
548 cpumask_clear(&nmi_ipi_pending_mask
);
551 nmi_ipi_function
= NULL
;
552 nmi_ipi_busy
= false;
554 nmi_ipi_unlock_end(&flags
);
559 int smp_send_nmi_ipi(int cpu
, void (*fn
)(struct pt_regs
*), u64 delay_us
)
561 return __smp_send_nmi_ipi(cpu
, fn
, delay_us
, false);
564 int smp_send_safe_nmi_ipi(int cpu
, void (*fn
)(struct pt_regs
*), u64 delay_us
)
566 return __smp_send_nmi_ipi(cpu
, fn
, delay_us
, true);
568 #endif /* CONFIG_NMI_IPI */
570 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
571 void tick_broadcast(const struct cpumask
*mask
)
575 for_each_cpu(cpu
, mask
)
576 do_message_pass(cpu
, PPC_MSG_TICK_BROADCAST
);
580 #ifdef CONFIG_DEBUGGER
581 static void debugger_ipi_callback(struct pt_regs
*regs
)
586 void smp_send_debugger_break(void)
588 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS
, debugger_ipi_callback
, 1000000);
592 #ifdef CONFIG_CRASH_DUMP
593 void crash_send_ipi(void (*crash_ipi_callback
)(struct pt_regs
*))
597 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS
, crash_ipi_callback
, 1000000);
598 if (kdump_in_progress() && crash_wake_offline
) {
599 for_each_present_cpu(cpu
) {
603 * crash_ipi_callback will wait for
604 * all cpus, including offline CPUs.
605 * We don't care about nmi_ipi_function.
606 * Offline cpus will jump straight into
607 * crash_ipi_callback, we can skip the
608 * entire NMI dance and waiting for
609 * cpus to clear pending mask, etc.
611 do_smp_send_nmi_ipi(cpu
, false);
617 void crash_smp_send_stop(void)
619 static bool stopped
= false;
622 * In case of fadump, register data for all CPUs is captured by f/w
623 * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
624 * this rtas call to avoid tricky post processing of those CPUs'
627 if (should_fadump_crash())
635 #ifdef CONFIG_CRASH_DUMP
636 if (kexec_crash_image
) {
637 crash_kexec_prepare();
645 #ifdef CONFIG_NMI_IPI
646 static void nmi_stop_this_cpu(struct pt_regs
*regs
)
649 * IRQs are already hard disabled by the smp_handle_nmi_ipi.
651 set_cpu_online(smp_processor_id(), false);
658 void smp_send_stop(void)
660 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS
, nmi_stop_this_cpu
, 1000000);
663 #else /* CONFIG_NMI_IPI */
665 static void stop_this_cpu(void *dummy
)
670 * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
671 * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
672 * to know other CPUs are offline before it breaks locks to flush
673 * printk buffers, in case we panic()ed while holding the lock.
675 set_cpu_online(smp_processor_id(), false);
682 void smp_send_stop(void)
684 static bool stopped
= false;
687 * Prevent waiting on csd lock from a previous smp_send_stop.
688 * This is racy, but in general callers try to do the right
689 * thing and only fire off one smp_send_stop (e.g., see
697 smp_call_function(stop_this_cpu
, NULL
, 0);
699 #endif /* CONFIG_NMI_IPI */
701 static struct task_struct
*current_set
[NR_CPUS
];
703 static void smp_store_cpu_info(int id
)
705 per_cpu(cpu_pvr
, id
) = mfspr(SPRN_PVR
);
706 #ifdef CONFIG_PPC_E500
707 per_cpu(next_tlbcam_idx
, id
)
708 = (mfspr(SPRN_TLB1CFG
) & TLBnCFG_N_ENTRY
) - 1;
713 * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
714 * rather than just passing around the cpumask we pass around a function that
715 * returns the that cpumask for the given CPU.
717 static void set_cpus_related(int i
, int j
, struct cpumask
*(*get_cpumask
)(int))
719 cpumask_set_cpu(i
, get_cpumask(j
));
720 cpumask_set_cpu(j
, get_cpumask(i
));
723 #ifdef CONFIG_HOTPLUG_CPU
724 static void set_cpus_unrelated(int i
, int j
,
725 struct cpumask
*(*get_cpumask
)(int))
727 cpumask_clear_cpu(i
, get_cpumask(j
));
728 cpumask_clear_cpu(j
, get_cpumask(i
));
733 * Extends set_cpus_related. Instead of setting one CPU at a time in
734 * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
736 static void or_cpumasks_related(int i
, int j
, struct cpumask
*(*srcmask
)(int),
737 struct cpumask
*(*dstmask
)(int))
739 struct cpumask
*mask
;
743 for_each_cpu(k
, srcmask(i
))
744 cpumask_or(dstmask(k
), dstmask(k
), mask
);
750 for_each_cpu(k
, srcmask(j
))
751 cpumask_or(dstmask(k
), dstmask(k
), mask
);
755 * parse_thread_groups: Parses the "ibm,thread-groups" device tree
756 * property for the CPU device node @dn and stores
757 * the parsed output in the thread_groups_list
760 * @dn: The device node of the CPU device.
761 * @tglp: Pointer to a thread group list structure into which the parsed
762 * output of "ibm,thread-groups" is stored.
764 * ibm,thread-groups[0..N-1] array defines which group of threads in
765 * the CPU-device node can be grouped together based on the property.
767 * This array can represent thread groupings for multiple properties.
769 * ibm,thread-groups[i + 0] tells us the property based on which the
770 * threads are being grouped together. If this value is 1, it implies
771 * that the threads in the same group share L1, translation cache. If
772 * the value is 2, it implies that the threads in the same group share
775 * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
776 * property ibm,thread-groups[i]
778 * ibm,thread-groups[i+2] tells us the number of threads in each such
780 * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then,
782 * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by
783 * "ibm,ppc-interrupt-server#s" arranged as per their membership in
787 * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15]
788 * This can be decomposed up into two consecutive arrays:
789 * a) [1,2,4,8,10,12,14,9,11,13,15]
790 * b) [2,2,4,8,10,12,14,9,11,13,15]
794 * a) provides information of Property "1" being shared by "2" groups,
795 * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of
796 * the first group is {8,10,12,14} and the
797 * "ibm,ppc-interrupt-server#s" of the second group is
798 * {9,11,13,15}. Property "1" is indicative of the thread in the
799 * group sharing L1 cache, translation cache and Instruction Data
802 * b) provides information of Property "2" being shared by "2" groups,
803 * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of
804 * the first group is {8,10,12,14} and the
805 * "ibm,ppc-interrupt-server#s" of the second group is
806 * {9,11,13,15}. Property "2" indicates that the threads in each
807 * group share the L2-cache.
809 * Returns 0 on success, -EINVAL if the property does not exist,
810 * -ENODATA if property does not have a value, and -EOVERFLOW if the
811 * property data isn't large enough.
813 static int parse_thread_groups(struct device_node
*dn
,
814 struct thread_groups_list
*tglp
)
816 unsigned int property_idx
= 0;
817 u32
*thread_group_array
;
818 size_t total_threads
;
823 count
= of_property_count_u32_elems(dn
, "ibm,thread-groups");
824 thread_group_array
= kcalloc(count
, sizeof(u32
), GFP_KERNEL
);
825 ret
= of_property_read_u32_array(dn
, "ibm,thread-groups",
826 thread_group_array
, count
);
830 while (i
< count
&& property_idx
< MAX_THREAD_GROUP_PROPERTIES
) {
832 struct thread_groups
*tg
= &tglp
->property_tgs
[property_idx
++];
834 tg
->property
= thread_group_array
[i
];
835 tg
->nr_groups
= thread_group_array
[i
+ 1];
836 tg
->threads_per_group
= thread_group_array
[i
+ 2];
837 total_threads
= tg
->nr_groups
* tg
->threads_per_group
;
839 thread_list
= &thread_group_array
[i
+ 3];
841 for (j
= 0; j
< total_threads
; j
++)
842 tg
->thread_list
[j
] = thread_list
[j
];
843 i
= i
+ 3 + total_threads
;
846 tglp
->nr_properties
= property_idx
;
849 kfree(thread_group_array
);
854 * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
855 * that @cpu belongs to.
857 * @cpu : The logical CPU whose thread group is being searched.
858 * @tg : The thread-group structure of the CPU node which @cpu belongs
861 * Returns the index to tg->thread_list that points to the start
862 * of the thread_group that @cpu belongs to.
864 * Returns -1 if cpu doesn't belong to any of the groups pointed to by
867 static int get_cpu_thread_group_start(int cpu
, struct thread_groups
*tg
)
869 int hw_cpu_id
= get_hard_smp_processor_id(cpu
);
872 for (i
= 0; i
< tg
->nr_groups
; i
++) {
873 int group_start
= i
* tg
->threads_per_group
;
875 for (j
= 0; j
< tg
->threads_per_group
; j
++) {
876 int idx
= group_start
+ j
;
878 if (tg
->thread_list
[idx
] == hw_cpu_id
)
886 static struct thread_groups
*__init
get_thread_groups(int cpu
,
890 struct device_node
*dn
= of_get_cpu_node(cpu
, NULL
);
891 struct thread_groups_list
*cpu_tgl
= &tgl
[cpu
];
892 struct thread_groups
*tg
= NULL
;
901 if (!cpu_tgl
->nr_properties
) {
902 *err
= parse_thread_groups(dn
, cpu_tgl
);
907 for (i
= 0; i
< cpu_tgl
->nr_properties
; i
++) {
908 if (cpu_tgl
->property_tgs
[i
].property
== group_property
) {
909 tg
= &cpu_tgl
->property_tgs
[i
];
921 static int __init
update_mask_from_threadgroup(cpumask_var_t
*mask
, struct thread_groups
*tg
,
922 int cpu
, int cpu_group_start
)
924 int first_thread
= cpu_first_thread_sibling(cpu
);
927 zalloc_cpumask_var_node(mask
, GFP_KERNEL
, cpu_to_node(cpu
));
929 for (i
= first_thread
; i
< first_thread
+ threads_per_core
; i
++) {
930 int i_group_start
= get_cpu_thread_group_start(i
, tg
);
932 if (unlikely(i_group_start
== -1)) {
937 if (i_group_start
== cpu_group_start
)
938 cpumask_set_cpu(i
, *mask
);
944 static int __init
init_thread_group_cache_map(int cpu
, int cache_property
)
947 int cpu_group_start
= -1, err
= 0;
948 struct thread_groups
*tg
= NULL
;
949 cpumask_var_t
*mask
= NULL
;
951 if (cache_property
!= THREAD_GROUP_SHARE_L1
&&
952 cache_property
!= THREAD_GROUP_SHARE_L2_L3
)
955 tg
= get_thread_groups(cpu
, cache_property
, &err
);
960 cpu_group_start
= get_cpu_thread_group_start(cpu
, tg
);
962 if (unlikely(cpu_group_start
== -1)) {
967 if (cache_property
== THREAD_GROUP_SHARE_L1
) {
968 mask
= &per_cpu(thread_group_l1_cache_map
, cpu
);
969 update_mask_from_threadgroup(mask
, tg
, cpu
, cpu_group_start
);
971 else if (cache_property
== THREAD_GROUP_SHARE_L2_L3
) {
972 mask
= &per_cpu(thread_group_l2_cache_map
, cpu
);
973 update_mask_from_threadgroup(mask
, tg
, cpu
, cpu_group_start
);
974 mask
= &per_cpu(thread_group_l3_cache_map
, cpu
);
975 update_mask_from_threadgroup(mask
, tg
, cpu
, cpu_group_start
);
982 static bool shared_caches __ro_after_init
;
984 #ifdef CONFIG_SCHED_SMT
985 /* cpumask of CPUs with asymmetric SMT dependency */
986 static int powerpc_smt_flags(void)
988 int flags
= SD_SHARE_CPUCAPACITY
| SD_SHARE_LLC
;
990 if (cpu_has_feature(CPU_FTR_ASYM_SMT
)) {
991 printk_once(KERN_INFO
"Enabling Asymmetric SMT scheduling\n");
992 flags
|= SD_ASYM_PACKING
;
999 * On shared processor LPARs scheduled on a big core (which has two or more
1000 * independent thread groups per core), prefer lower numbered CPUs, so
1001 * that workload consolidates to lesser number of cores.
1003 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(splpar_asym_pack
);
1006 * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
1007 * This topology makes it *much* cheaper to migrate tasks between adjacent cores
1008 * since the migrated task remains cache hot. We want to take advantage of this
1009 * at the scheduler level so an extra topology level is required.
1011 static int powerpc_shared_cache_flags(void)
1013 if (static_branch_unlikely(&splpar_asym_pack
))
1014 return SD_SHARE_LLC
| SD_ASYM_PACKING
;
1016 return SD_SHARE_LLC
;
1019 static int powerpc_shared_proc_flags(void)
1021 if (static_branch_unlikely(&splpar_asym_pack
))
1022 return SD_ASYM_PACKING
;
1028 * We can't just pass cpu_l2_cache_mask() directly because
1029 * returns a non-const pointer and the compiler barfs on that.
1031 static const struct cpumask
*shared_cache_mask(int cpu
)
1033 return per_cpu(cpu_l2_cache_map
, cpu
);
1036 #ifdef CONFIG_SCHED_SMT
1037 static const struct cpumask
*smallcore_smt_mask(int cpu
)
1039 return cpu_smallcore_mask(cpu
);
1043 static struct cpumask
*cpu_coregroup_mask(int cpu
)
1045 return per_cpu(cpu_coregroup_map
, cpu
);
1048 static bool has_coregroup_support(void)
1050 /* Coregroup identification not available on shared systems */
1051 if (is_shared_processor())
1054 return coregroup_enabled
;
1057 static const struct cpumask
*cpu_mc_mask(int cpu
)
1059 return cpu_coregroup_mask(cpu
);
1062 static int __init
init_big_cores(void)
1066 for_each_possible_cpu(cpu
) {
1067 int err
= init_thread_group_cache_map(cpu
, THREAD_GROUP_SHARE_L1
);
1072 zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map
, cpu
),
1077 has_big_cores
= true;
1079 for_each_possible_cpu(cpu
) {
1080 int err
= init_thread_group_cache_map(cpu
, THREAD_GROUP_SHARE_L2_L3
);
1086 thread_group_shares_l2
= true;
1087 thread_group_shares_l3
= true;
1088 pr_debug("L2/L3 cache only shared by the threads in the small core\n");
1093 void __init
smp_prepare_cpus(unsigned int max_cpus
)
1095 unsigned int cpu
, num_threads
;
1097 DBG("smp_prepare_cpus\n");
1100 * setup_cpu may need to be called on the boot cpu. We haven't
1101 * spun any cpus up but lets be paranoid.
1103 BUG_ON(boot_cpuid
!= smp_processor_id());
1105 /* Fixup boot cpu */
1106 smp_store_cpu_info(boot_cpuid
);
1107 cpu_callin_map
[boot_cpuid
] = 1;
1109 for_each_possible_cpu(cpu
) {
1110 zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map
, cpu
),
1111 GFP_KERNEL
, cpu_to_node(cpu
));
1112 zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map
, cpu
),
1113 GFP_KERNEL
, cpu_to_node(cpu
));
1114 zalloc_cpumask_var_node(&per_cpu(cpu_core_map
, cpu
),
1115 GFP_KERNEL
, cpu_to_node(cpu
));
1116 if (has_coregroup_support())
1117 zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map
, cpu
),
1118 GFP_KERNEL
, cpu_to_node(cpu
));
1122 * numa_node_id() works after this.
1124 if (cpu_present(cpu
)) {
1125 set_cpu_numa_node(cpu
, numa_cpu_lookup_table
[cpu
]);
1126 set_cpu_numa_mem(cpu
,
1127 local_memory_node(numa_cpu_lookup_table
[cpu
]));
1132 /* Init the cpumasks so the boot CPU is related to itself */
1133 cpumask_set_cpu(boot_cpuid
, cpu_sibling_mask(boot_cpuid
));
1134 cpumask_set_cpu(boot_cpuid
, cpu_l2_cache_mask(boot_cpuid
));
1135 cpumask_set_cpu(boot_cpuid
, cpu_core_mask(boot_cpuid
));
1137 if (has_coregroup_support())
1138 cpumask_set_cpu(boot_cpuid
, cpu_coregroup_mask(boot_cpuid
));
1141 if (has_big_cores
) {
1142 cpumask_set_cpu(boot_cpuid
,
1143 cpu_smallcore_mask(boot_cpuid
));
1146 if (cpu_to_chip_id(boot_cpuid
) != -1) {
1147 int idx
= DIV_ROUND_UP(num_possible_cpus(), threads_per_core
);
1150 * All threads of a core will all belong to the same core,
1151 * chip_id_lookup_table will have one entry per core.
1152 * Assumption: if boot_cpuid doesn't have a chip-id, then no
1153 * other CPUs, will also not have chip-id.
1155 chip_id_lookup_table
= kcalloc(idx
, sizeof(int), GFP_KERNEL
);
1156 if (chip_id_lookup_table
)
1157 memset(chip_id_lookup_table
, -1, sizeof(int) * idx
);
1160 if (smp_ops
&& smp_ops
->probe
)
1163 // Initalise the generic SMT topology support
1165 if (smt_enabled_at_boot
)
1166 num_threads
= smt_enabled_at_boot
;
1167 cpu_smt_set_num_threads(num_threads
, threads_per_core
);
1170 void __init
smp_prepare_boot_cpu(void)
1172 BUG_ON(smp_processor_id() != boot_cpuid
);
1174 paca_ptrs
[boot_cpuid
]->__current
= current
;
1176 set_numa_node(numa_cpu_lookup_table
[boot_cpuid
]);
1177 current_set
[boot_cpuid
] = current
;
1180 #ifdef CONFIG_HOTPLUG_CPU
1182 int generic_cpu_disable(void)
1184 unsigned int cpu
= smp_processor_id();
1186 if (cpu
== boot_cpuid
)
1189 set_cpu_online(cpu
, false);
1190 #ifdef CONFIG_PPC64_PROC_SYSTEMCFG
1191 systemcfg
->processorCount
--;
1193 /* Update affinity of all IRQs previously aimed at this CPU */
1194 irq_migrate_all_off_this_cpu();
1197 * Depending on the details of the interrupt controller, it's possible
1198 * that one of the interrupts we just migrated away from this CPU is
1199 * actually already pending on this CPU. If we leave it in that state
1200 * the interrupt will never be EOI'ed, and will never fire again. So
1201 * temporarily enable interrupts here, to allow any pending interrupt to
1202 * be received (and EOI'ed), before we take this CPU offline.
1206 local_irq_disable();
1211 void generic_cpu_die(unsigned int cpu
)
1215 for (i
= 0; i
< 100; i
++) {
1217 if (is_cpu_dead(cpu
))
1221 printk(KERN_ERR
"CPU%d didn't die...\n", cpu
);
1224 void generic_set_cpu_dead(unsigned int cpu
)
1226 per_cpu(cpu_state
, cpu
) = CPU_DEAD
;
1230 * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
1231 * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
1232 * which makes the delay in generic_cpu_die() not happen.
1234 void generic_set_cpu_up(unsigned int cpu
)
1236 per_cpu(cpu_state
, cpu
) = CPU_UP_PREPARE
;
1239 int generic_check_cpu_restart(unsigned int cpu
)
1241 return per_cpu(cpu_state
, cpu
) == CPU_UP_PREPARE
;
1244 int is_cpu_dead(unsigned int cpu
)
1246 return per_cpu(cpu_state
, cpu
) == CPU_DEAD
;
1249 static bool secondaries_inhibited(void)
1251 return kvm_hv_mode_active();
1254 #else /* HOTPLUG_CPU */
1256 #define secondaries_inhibited() 0
1260 static void cpu_idle_thread_init(unsigned int cpu
, struct task_struct
*idle
)
1263 paca_ptrs
[cpu
]->__current
= idle
;
1264 paca_ptrs
[cpu
]->kstack
= (unsigned long)task_stack_page(idle
) +
1265 THREAD_SIZE
- STACK_FRAME_MIN_SIZE
;
1267 task_thread_info(idle
)->cpu
= cpu
;
1268 secondary_current
= current_set
[cpu
] = idle
;
1271 int __cpu_up(unsigned int cpu
, struct task_struct
*tidle
)
1273 const unsigned long boot_spin_ms
= 5 * MSEC_PER_SEC
;
1274 const bool booting
= system_state
< SYSTEM_RUNNING
;
1275 const unsigned long hp_spin_ms
= 1;
1276 unsigned long deadline
;
1278 const unsigned long spin_wait_ms
= booting
? boot_spin_ms
: hp_spin_ms
;
1281 * Don't allow secondary threads to come online if inhibited
1283 if (threads_per_core
> 1 && secondaries_inhibited() &&
1284 cpu_thread_in_subcore(cpu
))
1287 if (smp_ops
== NULL
||
1288 (smp_ops
->cpu_bootable
&& !smp_ops
->cpu_bootable(cpu
)))
1291 cpu_idle_thread_init(cpu
, tidle
);
1294 * The platform might need to allocate resources prior to bringing
1297 if (smp_ops
->prepare_cpu
) {
1298 rc
= smp_ops
->prepare_cpu(cpu
);
1303 /* Make sure callin-map entry is 0 (can be leftover a CPU
1306 cpu_callin_map
[cpu
] = 0;
1308 /* The information for processor bringup must
1309 * be written out to main store before we release
1315 DBG("smp: kicking cpu %d\n", cpu
);
1316 rc
= smp_ops
->kick_cpu(cpu
);
1318 pr_err("smp: failed starting cpu %d (rc %d)\n", cpu
, rc
);
1323 * At boot time, simply spin on the callin word until the
1326 * At run time, spin for an optimistic amount of time to avoid
1327 * sleeping in the common case.
1329 deadline
= jiffies
+ msecs_to_jiffies(spin_wait_ms
);
1330 spin_until_cond(cpu_callin_map
[cpu
] || time_is_before_jiffies(deadline
));
1332 if (!cpu_callin_map
[cpu
] && system_state
>= SYSTEM_RUNNING
) {
1333 const unsigned long sleep_interval_us
= 10 * USEC_PER_MSEC
;
1334 const unsigned long sleep_wait_ms
= 100 * MSEC_PER_SEC
;
1336 deadline
= jiffies
+ msecs_to_jiffies(sleep_wait_ms
);
1337 while (!cpu_callin_map
[cpu
] && time_is_after_jiffies(deadline
))
1338 fsleep(sleep_interval_us
);
1341 if (!cpu_callin_map
[cpu
]) {
1342 printk(KERN_ERR
"Processor %u is stuck.\n", cpu
);
1346 DBG("Processor %u found.\n", cpu
);
1348 if (smp_ops
->give_timebase
)
1349 smp_ops
->give_timebase();
1351 /* Wait until cpu puts itself in the online & active maps */
1352 spin_until_cond(cpu_online(cpu
));
1357 /* Return the value of the reg property corresponding to the given
1360 int cpu_to_core_id(int cpu
)
1362 struct device_node
*np
;
1365 np
= of_get_cpu_node(cpu
, NULL
);
1369 id
= of_get_cpu_hwid(np
, 0);
1374 EXPORT_SYMBOL_GPL(cpu_to_core_id
);
1376 /* Helper routines for cpu to core mapping */
1377 int cpu_core_index_of_thread(int cpu
)
1379 return cpu
>> threads_shift
;
1381 EXPORT_SYMBOL_GPL(cpu_core_index_of_thread
);
1383 int cpu_first_thread_of_core(int core
)
1385 return core
<< threads_shift
;
1387 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core
);
1389 /* Must be called when no change can occur to cpu_present_mask,
1390 * i.e. during cpu online or offline.
1392 static struct device_node
*cpu_to_l2cache(int cpu
)
1394 struct device_node
*np
;
1395 struct device_node
*cache
;
1397 if (!cpu_present(cpu
))
1400 np
= of_get_cpu_node(cpu
, NULL
);
1404 cache
= of_find_next_cache_node(np
);
1411 static bool update_mask_by_l2(int cpu
, cpumask_var_t
*mask
)
1413 struct cpumask
*(*submask_fn
)(int) = cpu_sibling_mask
;
1414 struct device_node
*l2_cache
, *np
;
1418 submask_fn
= cpu_smallcore_mask
;
1421 * If the threads in a thread-group share L2 cache, then the
1422 * L2-mask can be obtained from thread_group_l2_cache_map.
1424 if (thread_group_shares_l2
) {
1425 cpumask_set_cpu(cpu
, cpu_l2_cache_mask(cpu
));
1427 for_each_cpu(i
, per_cpu(thread_group_l2_cache_map
, cpu
)) {
1429 set_cpus_related(i
, cpu
, cpu_l2_cache_mask
);
1432 /* Verify that L1-cache siblings are a subset of L2 cache-siblings */
1433 if (!cpumask_equal(submask_fn(cpu
), cpu_l2_cache_mask(cpu
)) &&
1434 !cpumask_subset(submask_fn(cpu
), cpu_l2_cache_mask(cpu
))) {
1435 pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
1442 l2_cache
= cpu_to_l2cache(cpu
);
1443 if (!l2_cache
|| !*mask
) {
1444 /* Assume only core siblings share cache with this CPU */
1445 for_each_cpu(i
, cpu_sibling_mask(cpu
))
1446 set_cpus_related(cpu
, i
, cpu_l2_cache_mask
);
1451 cpumask_and(*mask
, cpu_online_mask
, cpu_cpu_mask(cpu
));
1453 /* Update l2-cache mask with all the CPUs that are part of submask */
1454 or_cpumasks_related(cpu
, cpu
, submask_fn
, cpu_l2_cache_mask
);
1456 /* Skip all CPUs already part of current CPU l2-cache mask */
1457 cpumask_andnot(*mask
, *mask
, cpu_l2_cache_mask(cpu
));
1459 for_each_cpu(i
, *mask
) {
1461 * when updating the marks the current CPU has not been marked
1462 * online, but we need to update the cache masks
1464 np
= cpu_to_l2cache(i
);
1466 /* Skip all CPUs already part of current CPU l2-cache */
1467 if (np
== l2_cache
) {
1468 or_cpumasks_related(cpu
, i
, submask_fn
, cpu_l2_cache_mask
);
1469 cpumask_andnot(*mask
, *mask
, submask_fn(i
));
1471 cpumask_andnot(*mask
, *mask
, cpu_l2_cache_mask(i
));
1476 of_node_put(l2_cache
);
1481 #ifdef CONFIG_HOTPLUG_CPU
1482 static void remove_cpu_from_masks(int cpu
)
1484 struct cpumask
*(*mask_fn
)(int) = cpu_sibling_mask
;
1487 unmap_cpu_from_node(cpu
);
1490 mask_fn
= cpu_l2_cache_mask
;
1492 for_each_cpu(i
, mask_fn(cpu
)) {
1493 set_cpus_unrelated(cpu
, i
, cpu_l2_cache_mask
);
1494 set_cpus_unrelated(cpu
, i
, cpu_sibling_mask
);
1496 set_cpus_unrelated(cpu
, i
, cpu_smallcore_mask
);
1499 for_each_cpu(i
, cpu_core_mask(cpu
))
1500 set_cpus_unrelated(cpu
, i
, cpu_core_mask
);
1502 if (has_coregroup_support()) {
1503 for_each_cpu(i
, cpu_coregroup_mask(cpu
))
1504 set_cpus_unrelated(cpu
, i
, cpu_coregroup_mask
);
1509 static inline void add_cpu_to_smallcore_masks(int cpu
)
1516 cpumask_set_cpu(cpu
, cpu_smallcore_mask(cpu
));
1518 for_each_cpu(i
, per_cpu(thread_group_l1_cache_map
, cpu
)) {
1520 set_cpus_related(i
, cpu
, cpu_smallcore_mask
);
1524 static void update_coregroup_mask(int cpu
, cpumask_var_t
*mask
)
1526 struct cpumask
*(*submask_fn
)(int) = cpu_sibling_mask
;
1527 int coregroup_id
= cpu_to_coregroup_id(cpu
);
1531 submask_fn
= cpu_l2_cache_mask
;
1534 /* Assume only siblings are part of this CPU's coregroup */
1535 for_each_cpu(i
, submask_fn(cpu
))
1536 set_cpus_related(cpu
, i
, cpu_coregroup_mask
);
1541 cpumask_and(*mask
, cpu_online_mask
, cpu_cpu_mask(cpu
));
1543 /* Update coregroup mask with all the CPUs that are part of submask */
1544 or_cpumasks_related(cpu
, cpu
, submask_fn
, cpu_coregroup_mask
);
1546 /* Skip all CPUs already part of coregroup mask */
1547 cpumask_andnot(*mask
, *mask
, cpu_coregroup_mask(cpu
));
1549 for_each_cpu(i
, *mask
) {
1550 /* Skip all CPUs not part of this coregroup */
1551 if (coregroup_id
== cpu_to_coregroup_id(i
)) {
1552 or_cpumasks_related(cpu
, i
, submask_fn
, cpu_coregroup_mask
);
1553 cpumask_andnot(*mask
, *mask
, submask_fn(i
));
1555 cpumask_andnot(*mask
, *mask
, cpu_coregroup_mask(i
));
1560 static void add_cpu_to_masks(int cpu
)
1562 struct cpumask
*(*submask_fn
)(int) = cpu_sibling_mask
;
1563 int first_thread
= cpu_first_thread_sibling(cpu
);
1570 * This CPU will not be in the online mask yet so we need to manually
1571 * add it to its own thread sibling mask.
1573 map_cpu_to_node(cpu
, cpu_to_node(cpu
));
1574 cpumask_set_cpu(cpu
, cpu_sibling_mask(cpu
));
1575 cpumask_set_cpu(cpu
, cpu_core_mask(cpu
));
1577 for (i
= first_thread
; i
< first_thread
+ threads_per_core
; i
++)
1579 set_cpus_related(i
, cpu
, cpu_sibling_mask
);
1581 add_cpu_to_smallcore_masks(cpu
);
1583 /* In CPU-hotplug path, hence use GFP_ATOMIC */
1584 ret
= alloc_cpumask_var_node(&mask
, GFP_ATOMIC
, cpu_to_node(cpu
));
1585 update_mask_by_l2(cpu
, &mask
);
1587 if (has_coregroup_support())
1588 update_coregroup_mask(cpu
, &mask
);
1590 if (chip_id_lookup_table
&& ret
)
1591 chip_id
= cpu_to_chip_id(cpu
);
1594 submask_fn
= cpu_l2_cache_mask
;
1596 /* Update core_mask with all the CPUs that are part of submask */
1597 or_cpumasks_related(cpu
, cpu
, submask_fn
, cpu_core_mask
);
1599 /* Skip all CPUs already part of current CPU core mask */
1600 cpumask_andnot(mask
, cpu_online_mask
, cpu_core_mask(cpu
));
1602 /* If chip_id is -1; limit the cpu_core_mask to within PKG */
1604 cpumask_and(mask
, mask
, cpu_cpu_mask(cpu
));
1606 for_each_cpu(i
, mask
) {
1607 if (chip_id
== cpu_to_chip_id(i
)) {
1608 or_cpumasks_related(cpu
, i
, submask_fn
, cpu_core_mask
);
1609 cpumask_andnot(mask
, mask
, submask_fn(i
));
1611 cpumask_andnot(mask
, mask
, cpu_core_mask(i
));
1615 free_cpumask_var(mask
);
1618 /* Activate a secondary processor. */
1619 __no_stack_protector
1620 void start_secondary(void *unused
)
1622 unsigned int cpu
= raw_smp_processor_id();
1624 /* PPC64 calls setup_kup() in early_setup_secondary() */
1625 if (IS_ENABLED(CONFIG_PPC32
))
1628 mmgrab_lazy_tlb(&init_mm
);
1629 current
->active_mm
= &init_mm
;
1630 VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm
)));
1631 cpumask_set_cpu(cpu
, mm_cpumask(&init_mm
));
1632 inc_mm_active_cpus(&init_mm
);
1634 smp_store_cpu_info(cpu
);
1635 set_dec(tb_ticks_per_jiffy
);
1636 rcutree_report_cpu_starting(cpu
);
1637 cpu_callin_map
[cpu
] = 1;
1639 if (smp_ops
->setup_cpu
)
1640 smp_ops
->setup_cpu(cpu
);
1641 if (smp_ops
->take_timebase
)
1642 smp_ops
->take_timebase();
1644 secondary_cpu_time_init();
1646 #ifdef CONFIG_PPC64_PROC_SYSTEMCFG
1647 if (system_state
== SYSTEM_RUNNING
)
1648 systemcfg
->processorCount
++;
1654 set_numa_node(numa_cpu_lookup_table
[cpu
]);
1655 set_numa_mem(local_memory_node(numa_cpu_lookup_table
[cpu
]));
1657 /* Update topology CPU masks */
1658 add_cpu_to_masks(cpu
);
1661 * Check for any shared caches. Note that this must be done on a
1662 * per-core basis because one core in the pair might be disabled.
1664 if (!shared_caches
) {
1665 struct cpumask
*(*sibling_mask
)(int) = cpu_sibling_mask
;
1666 struct cpumask
*mask
= cpu_l2_cache_mask(cpu
);
1669 sibling_mask
= cpu_smallcore_mask
;
1671 if (cpumask_weight(mask
) > cpumask_weight(sibling_mask(cpu
)))
1672 shared_caches
= true;
1676 notify_cpu_starting(cpu
);
1677 set_cpu_online(cpu
, true);
1679 boot_init_stack_canary();
1683 /* We can enable ftrace for secondary cpus now */
1684 this_cpu_enable_ftrace();
1686 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE
);
1691 static struct sched_domain_topology_level powerpc_topology
[6];
1693 static void __init
build_sched_topology(void)
1697 if (is_shared_processor() && has_big_cores
)
1698 static_branch_enable(&splpar_asym_pack
);
1700 #ifdef CONFIG_SCHED_SMT
1701 if (has_big_cores
) {
1702 pr_info("Big cores detected but using small core scheduling\n");
1703 powerpc_topology
[i
++] = (struct sched_domain_topology_level
){
1704 smallcore_smt_mask
, powerpc_smt_flags
, SD_INIT_NAME(SMT
)
1707 powerpc_topology
[i
++] = (struct sched_domain_topology_level
){
1708 cpu_smt_mask
, powerpc_smt_flags
, SD_INIT_NAME(SMT
)
1712 if (shared_caches
) {
1713 powerpc_topology
[i
++] = (struct sched_domain_topology_level
){
1714 shared_cache_mask
, powerpc_shared_cache_flags
, SD_INIT_NAME(CACHE
)
1717 if (has_coregroup_support()) {
1718 powerpc_topology
[i
++] = (struct sched_domain_topology_level
){
1719 cpu_mc_mask
, powerpc_shared_proc_flags
, SD_INIT_NAME(MC
)
1722 powerpc_topology
[i
++] = (struct sched_domain_topology_level
){
1723 cpu_cpu_mask
, powerpc_shared_proc_flags
, SD_INIT_NAME(PKG
)
1726 /* There must be one trailing NULL entry left. */
1727 BUG_ON(i
>= ARRAY_SIZE(powerpc_topology
) - 1);
1729 set_sched_topology(powerpc_topology
);
1732 void __init
smp_cpus_done(unsigned int max_cpus
)
1735 * We are running pinned to the boot CPU, see rest_init().
1737 if (smp_ops
&& smp_ops
->setup_cpu
)
1738 smp_ops
->setup_cpu(boot_cpuid
);
1740 if (smp_ops
&& smp_ops
->bringup_done
)
1741 smp_ops
->bringup_done();
1743 dump_numa_cpu_topology();
1744 build_sched_topology();
1748 * For asym packing, by default lower numbered CPU has higher priority.
1749 * On shared processors, pack to lower numbered core. However avoid moving
1750 * between thread_groups within the same core.
1752 int arch_asym_cpu_priority(int cpu
)
1754 if (static_branch_unlikely(&splpar_asym_pack
))
1755 return -cpu
/ threads_per_core
;
1760 #ifdef CONFIG_HOTPLUG_CPU
1761 int __cpu_disable(void)
1763 int cpu
= smp_processor_id();
1766 if (!smp_ops
->cpu_disable
)
1769 this_cpu_disable_ftrace();
1771 err
= smp_ops
->cpu_disable();
1775 /* Update sibling maps */
1776 remove_cpu_from_masks(cpu
);
1781 void __cpu_die(unsigned int cpu
)
1784 * This could perhaps be a generic call in idlea_task_dead(), but
1785 * that requires testing from all archs, so first put it here to
1787 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu
, mm_cpumask(&init_mm
)));
1788 dec_mm_active_cpus(&init_mm
);
1789 cpumask_clear_cpu(cpu
, mm_cpumask(&init_mm
));
1791 if (smp_ops
->cpu_die
)
1792 smp_ops
->cpu_die(cpu
);
1795 void __noreturn
arch_cpu_idle_dead(void)
1798 * Disable on the down path. This will be re-enabled by
1799 * start_secondary() via start_secondary_resume() below
1801 this_cpu_disable_ftrace();
1803 if (smp_ops
->cpu_offline_self
)
1804 smp_ops
->cpu_offline_self();
1806 /* If we return, we re-enter start_secondary */
1807 start_secondary_resume();