2 * arch/arm/kernel/topology.c
4 * Copyright (C) 2011 Linaro Limited.
5 * Written by: Vincent Guittot
7 * based on arch/sh/kernel/topology.c
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file "COPYING" in the main directory of this archive
14 #include <linux/cpu.h>
15 #include <linux/cpumask.h>
16 #include <linux/init.h>
17 #include <linux/percpu.h>
18 #include <linux/node.h>
19 #include <linux/nodemask.h>
20 #include <linux/sched.h>
21 #include <linux/cpumask.h>
22 #include <linux/cpuset.h>
24 #ifdef CONFIG_CPU_FREQ
25 #include <linux/cpufreq.h>
28 #ifdef CONFIG_DEBUG_FS
29 #include <linux/debugfs.h>
30 #include <linux/uaccess.h> /* for copy_from_user */
33 #include <asm/cputype.h>
34 #include <asm/topology.h>
36 #define MPIDR_SMP_BITMASK (0x3 << 30)
37 #define MPIDR_SMP_VALUE (0x2 << 30)
39 #define MPIDR_MT_BITMASK (0x1 << 24)
42 * These masks reflect the current use of the affinity levels.
43 * The affinity level can be up to 16 bits according to ARM ARM
46 #define MPIDR_LEVEL0_MASK 0x3
47 #define MPIDR_LEVEL0_SHIFT 0
49 #define MPIDR_LEVEL1_MASK 0xF
50 #define MPIDR_LEVEL1_SHIFT 8
52 #define MPIDR_LEVEL2_MASK 0xFF
53 #define MPIDR_LEVEL2_SHIFT 16
55 struct cputopo_arm cpu_topology
[NR_CPUS
];
58 * cpu power scale management
62 * a per cpu data structure should be better because each cpu is mainly
63 * using its own cpu_power even it's not always true because of
67 static DEFINE_PER_CPU(unsigned int, cpu_scale
);
70 * cpu topology mask management
73 unsigned int advanced_topology
= 1;
75 static void normal_cpu_topology_mask(void);
76 static void (*set_cpu_topology_mask
)(void) = normal_cpu_topology_mask
;
78 #ifdef CONFIG_CPU_FREQ
80 * This struct describes parameters to compute cpu_power
82 struct cputopo_power
{
84 int max
; /* max idx in the table */
85 unsigned int step
; /* frequency step for the table */
86 unsigned int *table
; /* table of cpu_power */
89 /* default table with one default cpu_power value */
90 unsigned int table_default_power
[1] = {
94 static struct cputopo_power default_cpu_power
= {
97 .table
= table_default_power
,
100 /* CA-9 table with cpufreq modifying cpu_power */
101 #define CPU_MAX_FREQ 10
102 /* we use a 200Mhz step for scaling cpu power */
103 #define CPU_TOPO_FREQ_STEP 200000
104 /* This table sets the cpu_power scale of a cpu according to 2 inputs which are
105 * the frequency and the sched_mc mode. The content of this table could be SoC
106 * specific so we should add a method to overwrite this default table.
107 * TODO: Study how to use DT for setting this table
109 unsigned int table_ca9_power
[CPU_MAX_FREQ
] = {
110 /* freq< 200 400 600 800 1000 1200 1400 1600 1800 other*/
111 4096, 4096, 4096, 1024, 1024, 1024, 1024, 1024, 1024, 1024, /* Power save mode CA9 MP */
114 static struct cputopo_power CA9_cpu_power
= {
116 .step
= CPU_TOPO_FREQ_STEP
,
117 .table
= table_ca9_power
,
120 #define ARM_CORTEX_A9_DEFAULT_SCALE 0
121 #define ARM_CORTEX_A9_POWER_SCALE 1
122 /* This table list all possible cpu power configuration */
123 struct cputopo_power
*table_config
[2] = {
128 struct cputopo_scale
{
131 struct cputopo_power
*power
;
135 * The table will be mostly used by one cpu which will update the
136 * configuration for all cpu on a cpufreq notification
137 * or a sched_mc level change
139 static struct cputopo_scale cpu_power
[NR_CPUS
];
141 static void set_cpufreq_scale(unsigned int cpuid
, unsigned int freq
)
145 cpu_power
[cpuid
].freq
= freq
;
147 idx
= freq
/ cpu_power
[cpuid
].power
->step
;
148 if (idx
>= cpu_power
[cpuid
].power
->max
)
149 idx
= cpu_power
[cpuid
].power
->max
- 1;
151 per_cpu(cpu_scale
, cpuid
) = cpu_power
[cpuid
].power
->table
[idx
];
155 static void set_power_scale(unsigned int cpu
, unsigned int idx
)
157 cpu_power
[cpu
].id
= idx
;
158 cpu_power
[cpu
].power
= table_config
[idx
];
160 set_cpufreq_scale(cpu
, cpu_power
[cpu
].freq
);
163 static int topo_cpufreq_transition(struct notifier_block
*nb
,
164 unsigned long state
, void *data
)
166 struct cpufreq_freqs
*freqs
= data
;
168 if (state
== CPUFREQ_POSTCHANGE
|| state
== CPUFREQ_RESUMECHANGE
)
169 set_cpufreq_scale(freqs
->cpu
, freqs
->new);
174 static struct notifier_block topo_cpufreq_nb
= {
175 .notifier_call
= topo_cpufreq_transition
,
178 static int topo_cpufreq_init(void)
182 /* TODO set initial value according to current freq */
185 for_each_possible_cpu(cpu
) {
186 cpu_power
[cpu
].freq
= 0;
187 cpu_power
[cpu
].power
= &default_cpu_power
;
190 return cpufreq_register_notifier(&topo_cpufreq_nb
,
191 CPUFREQ_TRANSITION_NOTIFIER
);
194 #define ARM_CORTEX_A9_DEFAULT_SCALE 0
195 #define ARM_CORTEX_A9_POWER_SCALE 0
196 /* This table list all possible cpu power configuration */
197 unsigned int table_config
[1] = {
201 static void set_power_scale(unsigned int cpu
, unsigned int idx
)
203 per_cpu(cpu_scale
, cpu
) = table_config
[idx
];
206 static inline int topo_cpufreq_init(void) {return 0; }
209 static int init_cpu_power_scale(void)
211 /* register cpufreq notifer */
214 /* Do we need to change default config */
215 advanced_topology
= 1;
217 /* Force a cpu topology update */
218 rebuild_sched_domains();
223 core_initcall(init_cpu_power_scale
);
226 * Update the cpu power
229 unsigned long arch_scale_freq_power(struct sched_domain
*sd
, int cpu
)
231 return per_cpu(cpu_scale
, cpu
);
235 * sched_domain flag configuration
237 /* TODO add a config flag for this function */
238 int arch_sd_sibling_asym_packing(void)
240 if (sched_smt_power_savings
|| sched_mc_power_savings
)
241 return SD_ASYM_PACKING
;
246 * default topology function
249 const struct cpumask
*cpu_coregroup_mask(int cpu
)
251 return &(cpu_topology
[cpu
].core_sibling
);
255 * clear cpu topology masks
257 static void clear_cpu_topology_mask(void)
260 for_each_possible_cpu(cpuid
) {
261 struct cputopo_arm
*cpuid_topo
= &(cpu_topology
[cpuid
]);
262 cpumask_clear(&cpuid_topo
->core_sibling
);
263 cpumask_clear(&cpuid_topo
->thread_sibling
);
269 * default_cpu_topology_mask set the core and thread mask as described in the
272 static inline void default_cpu_topology_mask(unsigned int cpuid
)
274 struct cputopo_arm
*cpuid_topo
= &cpu_topology
[cpuid
];
277 for_each_possible_cpu(cpu
) {
278 struct cputopo_arm
*cpu_topo
= &cpu_topology
[cpu
];
280 if (cpuid_topo
->socket_id
== cpu_topo
->socket_id
) {
281 cpumask_set_cpu(cpuid
, &cpu_topo
->core_sibling
);
284 &cpuid_topo
->core_sibling
);
286 if (cpuid_topo
->core_id
== cpu_topo
->core_id
) {
287 cpumask_set_cpu(cpuid
,
288 &cpu_topo
->thread_sibling
);
291 &cpuid_topo
->thread_sibling
);
298 static void normal_cpu_topology_mask(void)
302 for_each_possible_cpu(cpuid
) {
303 default_cpu_topology_mask(cpuid
);
304 set_power_scale(cpuid
, ARM_CORTEX_A9_DEFAULT_SCALE
);
310 * For Cortex-A9 MPcore, we emulate a multi-package topology in power mode.
311 * The goal is to gathers tasks on 1 virtual package
313 static void power_cpu_topology_mask_CA9(void)
315 unsigned int cpuid
, cpu
;
317 for_each_possible_cpu(cpuid
) {
318 struct cputopo_arm
*cpuid_topo
= &cpu_topology
[cpuid
];
320 for_each_possible_cpu(cpu
) {
321 struct cputopo_arm
*cpu_topo
= &cpu_topology
[cpu
];
323 if ((cpuid_topo
->socket_id
== cpu_topo
->socket_id
)
324 && ((cpuid
& 0x1) == (cpu
& 0x1))) {
325 cpumask_set_cpu(cpuid
, &cpu_topo
->core_sibling
);
328 &cpuid_topo
->core_sibling
);
330 if (cpuid_topo
->core_id
== cpu_topo
->core_id
) {
331 cpumask_set_cpu(cpuid
,
332 &cpu_topo
->thread_sibling
);
335 &cpuid_topo
->thread_sibling
);
339 set_power_scale(cpuid
, ARM_CORTEX_A9_POWER_SCALE
);
344 #define ARM_FAMILY_MASK 0xFF0FFFF0
345 #define ARM_CORTEX_A9_FAMILY 0x410FC090
347 /* update_cpu_topology_policy select a cpu topology policy according to the
349 * TODO: The current version assumes that all cores are exactly the same which
350 * might not be true. We need to update it to take into account various
351 * configuration among which system with different kind of core.
353 static int update_cpu_topology_policy(void)
357 if (sched_mc_power_savings
== POWERSAVINGS_BALANCE_NONE
) {
358 set_cpu_topology_mask
= normal_cpu_topology_mask
;
362 cpuid
= read_cpuid_id();
363 cpuid
&= ARM_FAMILY_MASK
;
366 case ARM_CORTEX_A9_FAMILY
:
367 set_cpu_topology_mask
= power_cpu_topology_mask_CA9
;
370 set_cpu_topology_mask
= normal_cpu_topology_mask
;
378 * store_cpu_topology is called at boot when only one cpu is running
379 * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
380 * which prevents simultaneous write access to cpu_topology array
382 void store_cpu_topology(unsigned int cpuid
)
384 struct cputopo_arm
*cpuid_topo
= &(cpu_topology
[cpuid
]);
387 /* If the cpu topology has been already set, just return */
388 if (cpuid_topo
->core_id
!= -1)
391 mpidr
= read_cpuid_mpidr();
393 /* create cpu topology mapping */
394 if ((mpidr
& MPIDR_SMP_BITMASK
) == MPIDR_SMP_VALUE
) {
396 * This is a multiprocessor system
397 * multiprocessor format & multiprocessor mode field are set
400 if (mpidr
& MPIDR_MT_BITMASK
) {
401 /* core performance interdependency */
402 cpuid_topo
->thread_id
= ((mpidr
>> MPIDR_LEVEL0_SHIFT
)
403 & MPIDR_LEVEL0_MASK
);
404 cpuid_topo
->core_id
= ((mpidr
>> MPIDR_LEVEL1_SHIFT
)
405 & MPIDR_LEVEL1_MASK
);
406 cpuid_topo
->socket_id
= ((mpidr
>> MPIDR_LEVEL2_SHIFT
)
407 & MPIDR_LEVEL2_MASK
);
409 /* largely independent cores */
410 cpuid_topo
->thread_id
= -1;
411 cpuid_topo
->core_id
= ((mpidr
>> MPIDR_LEVEL0_SHIFT
)
412 & MPIDR_LEVEL0_MASK
);
413 cpuid_topo
->socket_id
= ((mpidr
>> MPIDR_LEVEL1_SHIFT
)
414 & MPIDR_LEVEL1_MASK
);
418 * This is an uniprocessor system
419 * we are in multiprocessor format but uniprocessor system
420 * or in the old uniprocessor format
423 cpuid_topo
->thread_id
= -1;
424 cpuid_topo
->core_id
= 0;
425 cpuid_topo
->socket_id
= -1;
429 * The core and thread sibling masks can also be updated during the
430 * call of arch_update_cpu_topology
432 default_cpu_topology_mask(cpuid
);
434 printk(KERN_INFO
"CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
435 cpuid
, cpu_topology
[cpuid
].thread_id
,
436 cpu_topology
[cpuid
].core_id
,
437 cpu_topology
[cpuid
].socket_id
, mpidr
);
441 * arch_update_cpu_topology is called by the scheduler before building
442 * a new sched_domain hierarchy.
444 int arch_update_cpu_topology(void)
446 if (!advanced_topology
)
449 /* clear core threads mask */
450 clear_cpu_topology_mask();
452 /* set topology policy */
453 update_cpu_topology_policy();
455 /* set topology mask and power */
456 (*set_cpu_topology_mask
)();
462 * init_cpu_topology is called at boot when only one cpu is running
463 * which prevent simultaneous write access to cpu_topology array
465 void init_cpu_topology(void)
470 for_each_possible_cpu(cpu
) {
471 struct cputopo_arm
*cpu_topo
= &(cpu_topology
[cpu
]);
473 cpu_topo
->thread_id
= -1;
474 cpu_topo
->core_id
= -1;
475 cpu_topo
->socket_id
= -1;
476 cpumask_clear(&cpu_topo
->core_sibling
);
477 cpumask_clear(&cpu_topo
->thread_sibling
);
479 per_cpu(cpu_scale
, cpu
) = SCHED_POWER_SCALE
;
485 * debugfs interface for scaling cpu power
488 #ifdef CONFIG_DEBUG_FS
489 static struct dentry
*topo_debugfs_root
;
491 static ssize_t
dbg_write(struct file
*file
, const char __user
*buf
,
492 size_t size
, loff_t
*off
)
494 unsigned int *value
= file
->f_dentry
->d_inode
->i_private
;
499 if (size
< (sizeof(cdata
)-1)) {
500 if (copy_from_user(cdata
, buf
, size
))
503 if (!strict_strtoul(cdata
, 10, &tmp
)) {
506 #ifdef CONFIG_CPU_FREQ
507 for_each_online_cpu(cpu
)
508 set_power_scale(cpu
, cpu_power
[cpu
].id
);
516 static ssize_t
dbg_read(struct file
*file
, char __user
*buf
,
517 size_t size
, loff_t
*off
)
519 unsigned int *value
= file
->f_dentry
->d_inode
->i_private
;
523 len
= sprintf(cdata
, "%u\n", *value
);
524 return simple_read_from_buffer(buf
, size
, off
, cdata
, len
);
527 static const struct file_operations debugfs_fops
= {
532 static struct dentry
*topo_debugfs_register(unsigned int cpu
,
533 struct dentry
*parent
)
535 struct dentry
*cpu_d
, *d
;
538 sprintf(cpu_name
, "cpu%u", cpu
);
540 cpu_d
= debugfs_create_dir(cpu_name
, parent
);
544 d
= debugfs_create_file("cpu_power", S_IRUGO
| S_IWUGO
,
545 cpu_d
, &per_cpu(cpu_scale
, cpu
), &debugfs_fops
);
549 #ifdef CONFIG_CPU_FREQ
550 d
= debugfs_create_file("scale", S_IRUGO
| S_IWUGO
,
551 cpu_d
, &cpu_power
[cpu
].id
, &debugfs_fops
);
555 d
= debugfs_create_file("freq", S_IRUGO
,
556 cpu_d
, &cpu_power
[cpu
].freq
, &debugfs_fops
);
563 debugfs_remove_recursive(cpu_d
);
567 static int __init
topo_debugfs_init(void)
572 d
= debugfs_create_dir("cpu_topo", NULL
);
575 topo_debugfs_root
= d
;
577 for_each_possible_cpu(cpu
) {
578 d
= topo_debugfs_register(cpu
, topo_debugfs_root
);
585 debugfs_remove_recursive(topo_debugfs_root
);
589 late_initcall(topo_debugfs_init
);