1 // SPDX-License-Identifier: GPL-2.0
3 * Arch specific cpu topology information
5 * Copyright (C) 2016, ARM Ltd.
6 * Written by: Juri Lelli, ARM Ltd.
9 #include <linux/acpi.h>
10 #include <linux/cpu.h>
11 #include <linux/cpufreq.h>
12 #include <linux/device.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/sched/topology.h>
17 #include <linux/cpuset.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/percpu.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
24 bool topology_scale_freq_invariant(void)
26 return cpufreq_supports_freq_invariance() ||
27 arch_freq_counters_available(cpu_online_mask
);
30 __weak
bool arch_freq_counters_available(const struct cpumask
*cpus
)
34 DEFINE_PER_CPU(unsigned long, freq_scale
) = SCHED_CAPACITY_SCALE
;
36 void topology_set_freq_scale(const struct cpumask
*cpus
, unsigned long cur_freq
,
37 unsigned long max_freq
)
42 if (WARN_ON_ONCE(!cur_freq
|| !max_freq
))
46 * If the use of counters for FIE is enabled, just return as we don't
47 * want to update the scale factor with information from CPUFREQ.
48 * Instead the scale factor will be updated from arch_scale_freq_tick.
50 if (arch_freq_counters_available(cpus
))
53 scale
= (cur_freq
<< SCHED_CAPACITY_SHIFT
) / max_freq
;
56 per_cpu(freq_scale
, i
) = scale
;
59 DEFINE_PER_CPU(unsigned long, cpu_scale
) = SCHED_CAPACITY_SCALE
;
61 void topology_set_cpu_scale(unsigned int cpu
, unsigned long capacity
)
63 per_cpu(cpu_scale
, cpu
) = capacity
;
66 DEFINE_PER_CPU(unsigned long, thermal_pressure
);
68 void topology_set_thermal_pressure(const struct cpumask
*cpus
,
69 unsigned long th_pressure
)
73 for_each_cpu(cpu
, cpus
)
74 WRITE_ONCE(per_cpu(thermal_pressure
, cpu
), th_pressure
);
77 static ssize_t
cpu_capacity_show(struct device
*dev
,
78 struct device_attribute
*attr
,
81 struct cpu
*cpu
= container_of(dev
, struct cpu
, dev
);
83 return sysfs_emit(buf
, "%lu\n", topology_get_cpu_scale(cpu
->dev
.id
));
86 static void update_topology_flags_workfn(struct work_struct
*work
);
87 static DECLARE_WORK(update_topology_flags_work
, update_topology_flags_workfn
);
89 static DEVICE_ATTR_RO(cpu_capacity
);
91 static int register_cpu_capacity_sysctl(void)
96 for_each_possible_cpu(i
) {
97 cpu
= get_cpu_device(i
);
99 pr_err("%s: too early to get CPU%d device!\n",
103 device_create_file(cpu
, &dev_attr_cpu_capacity
);
108 subsys_initcall(register_cpu_capacity_sysctl
);
110 static int update_topology
;
112 int topology_update_cpu_topology(void)
114 return update_topology
;
118 * Updating the sched_domains can't be done directly from cpufreq callbacks
119 * due to locking, so queue the work for later.
121 static void update_topology_flags_workfn(struct work_struct
*work
)
124 rebuild_sched_domains();
125 pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
129 static DEFINE_PER_CPU(u32
, freq_factor
) = 1;
130 static u32
*raw_capacity
;
132 static int free_raw_capacity(void)
140 void topology_normalize_cpu_scale(void)
150 for_each_possible_cpu(cpu
) {
151 capacity
= raw_capacity
[cpu
] * per_cpu(freq_factor
, cpu
);
152 capacity_scale
= max(capacity
, capacity_scale
);
155 pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale
);
156 for_each_possible_cpu(cpu
) {
157 capacity
= raw_capacity
[cpu
] * per_cpu(freq_factor
, cpu
);
158 capacity
= div64_u64(capacity
<< SCHED_CAPACITY_SHIFT
,
160 topology_set_cpu_scale(cpu
, capacity
);
161 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
162 cpu
, topology_get_cpu_scale(cpu
));
166 bool __init
topology_parse_cpu_capacity(struct device_node
*cpu_node
, int cpu
)
169 static bool cap_parsing_failed
;
173 if (cap_parsing_failed
)
176 ret
= of_property_read_u32(cpu_node
, "capacity-dmips-mhz",
180 raw_capacity
= kcalloc(num_possible_cpus(),
181 sizeof(*raw_capacity
),
184 cap_parsing_failed
= true;
188 raw_capacity
[cpu
] = cpu_capacity
;
189 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
190 cpu_node
, raw_capacity
[cpu
]);
193 * Update freq_factor for calculating early boot cpu capacities.
194 * For non-clk CPU DVFS mechanism, there's no way to get the
195 * frequency value now, assuming they are running at the same
196 * frequency (by keeping the initial freq_factor value).
198 cpu_clk
= of_clk_get(cpu_node
, 0);
199 if (!PTR_ERR_OR_ZERO(cpu_clk
)) {
200 per_cpu(freq_factor
, cpu
) =
201 clk_get_rate(cpu_clk
) / 1000;
206 pr_err("cpu_capacity: missing %pOF raw capacity\n",
208 pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
210 cap_parsing_failed
= true;
217 #ifdef CONFIG_CPU_FREQ
218 static cpumask_var_t cpus_to_visit
;
219 static void parsing_done_workfn(struct work_struct
*work
);
220 static DECLARE_WORK(parsing_done_work
, parsing_done_workfn
);
223 init_cpu_capacity_callback(struct notifier_block
*nb
,
227 struct cpufreq_policy
*policy
= data
;
233 if (val
!= CPUFREQ_CREATE_POLICY
)
236 pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
237 cpumask_pr_args(policy
->related_cpus
),
238 cpumask_pr_args(cpus_to_visit
));
240 cpumask_andnot(cpus_to_visit
, cpus_to_visit
, policy
->related_cpus
);
242 for_each_cpu(cpu
, policy
->related_cpus
)
243 per_cpu(freq_factor
, cpu
) = policy
->cpuinfo
.max_freq
/ 1000;
245 if (cpumask_empty(cpus_to_visit
)) {
246 topology_normalize_cpu_scale();
247 schedule_work(&update_topology_flags_work
);
249 pr_debug("cpu_capacity: parsing done\n");
250 schedule_work(&parsing_done_work
);
256 static struct notifier_block init_cpu_capacity_notifier
= {
257 .notifier_call
= init_cpu_capacity_callback
,
260 static int __init
register_cpufreq_notifier(void)
265 * on ACPI-based systems we need to use the default cpu capacity
266 * until we have the necessary code to parse the cpu capacity, so
267 * skip registering cpufreq notifier.
269 if (!acpi_disabled
|| !raw_capacity
)
272 if (!alloc_cpumask_var(&cpus_to_visit
, GFP_KERNEL
))
275 cpumask_copy(cpus_to_visit
, cpu_possible_mask
);
277 ret
= cpufreq_register_notifier(&init_cpu_capacity_notifier
,
278 CPUFREQ_POLICY_NOTIFIER
);
281 free_cpumask_var(cpus_to_visit
);
285 core_initcall(register_cpufreq_notifier
);
287 static void parsing_done_workfn(struct work_struct
*work
)
289 cpufreq_unregister_notifier(&init_cpu_capacity_notifier
,
290 CPUFREQ_POLICY_NOTIFIER
);
291 free_cpumask_var(cpus_to_visit
);
295 core_initcall(free_raw_capacity
);
298 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
300 * This function returns the logic cpu number of the node.
301 * There are basically three kinds of return values:
302 * (1) logic cpu number which is > 0.
303 * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
304 * there is no possible logical CPU in the kernel to match. This happens
305 * when CONFIG_NR_CPUS is configure to be smaller than the number of
306 * CPU nodes in DT. We need to just ignore this case.
307 * (3) -1 if the node does not exist in the device tree
309 static int __init
get_cpu_for_node(struct device_node
*node
)
311 struct device_node
*cpu_node
;
314 cpu_node
= of_parse_phandle(node
, "cpu", 0);
318 cpu
= of_cpu_node_to_id(cpu_node
);
320 topology_parse_cpu_capacity(cpu_node
, cpu
);
322 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
323 cpu_node
, cpumask_pr_args(cpu_possible_mask
));
325 of_node_put(cpu_node
);
329 static int __init
parse_core(struct device_node
*core
, int package_id
,
336 struct device_node
*t
;
339 snprintf(name
, sizeof(name
), "thread%d", i
);
340 t
= of_get_child_by_name(core
, name
);
343 cpu
= get_cpu_for_node(t
);
345 cpu_topology
[cpu
].package_id
= package_id
;
346 cpu_topology
[cpu
].core_id
= core_id
;
347 cpu_topology
[cpu
].thread_id
= i
;
348 } else if (cpu
!= -ENODEV
) {
349 pr_err("%pOF: Can't get CPU for thread\n", t
);
358 cpu
= get_cpu_for_node(core
);
361 pr_err("%pOF: Core has both threads and CPU\n",
366 cpu_topology
[cpu
].package_id
= package_id
;
367 cpu_topology
[cpu
].core_id
= core_id
;
368 } else if (leaf
&& cpu
!= -ENODEV
) {
369 pr_err("%pOF: Can't get CPU for leaf core\n", core
);
376 static int __init
parse_cluster(struct device_node
*cluster
, int depth
)
380 bool has_cores
= false;
381 struct device_node
*c
;
382 static int package_id __initdata
;
387 * First check for child clusters; we currently ignore any
388 * information about the nesting of clusters and present the
389 * scheduler with a flat list of them.
393 snprintf(name
, sizeof(name
), "cluster%d", i
);
394 c
= of_get_child_by_name(cluster
, name
);
397 ret
= parse_cluster(c
, depth
+ 1);
405 /* Now check for cores */
408 snprintf(name
, sizeof(name
), "core%d", i
);
409 c
= of_get_child_by_name(cluster
, name
);
414 pr_err("%pOF: cpu-map children should be clusters\n",
421 ret
= parse_core(c
, package_id
, core_id
++);
423 pr_err("%pOF: Non-leaf cluster with core %s\n",
435 if (leaf
&& !has_cores
)
436 pr_warn("%pOF: empty cluster\n", cluster
);
444 static int __init
parse_dt_topology(void)
446 struct device_node
*cn
, *map
;
450 cn
= of_find_node_by_path("/cpus");
452 pr_err("No CPU information found in DT\n");
457 * When topology is provided cpu-map is essentially a root
458 * cluster with restricted subnodes.
460 map
= of_get_child_by_name(cn
, "cpu-map");
464 ret
= parse_cluster(map
, 0);
468 topology_normalize_cpu_scale();
471 * Check that all cores are in the topology; the SMP code will
472 * only mark cores described in the DT as possible.
474 for_each_possible_cpu(cpu
)
475 if (cpu_topology
[cpu
].package_id
== -1)
489 struct cpu_topology cpu_topology
[NR_CPUS
];
490 EXPORT_SYMBOL_GPL(cpu_topology
);
492 const struct cpumask
*cpu_coregroup_mask(int cpu
)
494 const cpumask_t
*core_mask
= cpumask_of_node(cpu_to_node(cpu
));
496 /* Find the smaller of NUMA, core or LLC siblings */
497 if (cpumask_subset(&cpu_topology
[cpu
].core_sibling
, core_mask
)) {
498 /* not numa in package, lets use the package siblings */
499 core_mask
= &cpu_topology
[cpu
].core_sibling
;
501 if (cpu_topology
[cpu
].llc_id
!= -1) {
502 if (cpumask_subset(&cpu_topology
[cpu
].llc_sibling
, core_mask
))
503 core_mask
= &cpu_topology
[cpu
].llc_sibling
;
509 void update_siblings_masks(unsigned int cpuid
)
511 struct cpu_topology
*cpu_topo
, *cpuid_topo
= &cpu_topology
[cpuid
];
514 /* update core and thread sibling masks */
515 for_each_online_cpu(cpu
) {
516 cpu_topo
= &cpu_topology
[cpu
];
518 if (cpuid_topo
->llc_id
== cpu_topo
->llc_id
) {
519 cpumask_set_cpu(cpu
, &cpuid_topo
->llc_sibling
);
520 cpumask_set_cpu(cpuid
, &cpu_topo
->llc_sibling
);
523 if (cpuid_topo
->package_id
!= cpu_topo
->package_id
)
526 cpumask_set_cpu(cpuid
, &cpu_topo
->core_sibling
);
527 cpumask_set_cpu(cpu
, &cpuid_topo
->core_sibling
);
529 if (cpuid_topo
->core_id
!= cpu_topo
->core_id
)
532 cpumask_set_cpu(cpuid
, &cpu_topo
->thread_sibling
);
533 cpumask_set_cpu(cpu
, &cpuid_topo
->thread_sibling
);
537 static void clear_cpu_topology(int cpu
)
539 struct cpu_topology
*cpu_topo
= &cpu_topology
[cpu
];
541 cpumask_clear(&cpu_topo
->llc_sibling
);
542 cpumask_set_cpu(cpu
, &cpu_topo
->llc_sibling
);
544 cpumask_clear(&cpu_topo
->core_sibling
);
545 cpumask_set_cpu(cpu
, &cpu_topo
->core_sibling
);
546 cpumask_clear(&cpu_topo
->thread_sibling
);
547 cpumask_set_cpu(cpu
, &cpu_topo
->thread_sibling
);
550 void __init
reset_cpu_topology(void)
554 for_each_possible_cpu(cpu
) {
555 struct cpu_topology
*cpu_topo
= &cpu_topology
[cpu
];
557 cpu_topo
->thread_id
= -1;
558 cpu_topo
->core_id
= -1;
559 cpu_topo
->package_id
= -1;
560 cpu_topo
->llc_id
= -1;
562 clear_cpu_topology(cpu
);
566 void remove_cpu_topology(unsigned int cpu
)
570 for_each_cpu(sibling
, topology_core_cpumask(cpu
))
571 cpumask_clear_cpu(cpu
, topology_core_cpumask(sibling
));
572 for_each_cpu(sibling
, topology_sibling_cpumask(cpu
))
573 cpumask_clear_cpu(cpu
, topology_sibling_cpumask(sibling
));
574 for_each_cpu(sibling
, topology_llc_cpumask(cpu
))
575 cpumask_clear_cpu(cpu
, topology_llc_cpumask(sibling
));
577 clear_cpu_topology(cpu
);
580 __weak
int __init
parse_acpi_topology(void)
585 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
586 void __init
init_cpu_topology(void)
588 reset_cpu_topology();
591 * Discard anything that was parsed if we hit an error so we
592 * don't use partial information.
594 if (parse_acpi_topology())
595 reset_cpu_topology();
596 else if (of_have_populated_dt() && parse_dt_topology())
597 reset_cpu_topology();