1 /* cpumap.c: used for optimizing CPU assignment
3 * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
6 #include <linux/export.h>
7 #include <linux/slab.h>
8 #include <linux/kernel.h>
9 #include <linux/cpumask.h>
10 #include <linux/spinlock.h>
11 #include <asm/cpudata.h>
25 /* Increment rover every time level is visited */
26 ROVER_INC_ON_VISIT
= 1 << 0,
27 /* Increment parent's rover every time rover wraps around */
28 ROVER_INC_PARENT_ON_LOOP
= 1 << 1,
34 int num_cpus
; /* Number of CPUs in this hierarchy */
36 int child_start
; /* Array index of the first child node */
37 int child_end
; /* Array index of the last child node */
38 int rover
; /* Child node iterator */
41 struct cpuinfo_level
{
42 int start_index
; /* Index of first node of a level in a cpuinfo tree */
43 int end_index
; /* Index of last node of a level in a cpuinfo tree */
44 int num_nodes
; /* Number of nodes in a level in a cpuinfo tree */
50 /* Offsets into nodes[] for each level of the tree */
51 struct cpuinfo_level level
[CPUINFO_LVL_MAX
];
52 struct cpuinfo_node nodes
[0];
56 static struct cpuinfo_tree
*cpuinfo_tree
;
58 static u16 cpu_distribution_map
[NR_CPUS
];
59 static DEFINE_SPINLOCK(cpu_map_lock
);
62 /* Niagara optimized cpuinfo tree traversal. */
63 static const int niagara_iterate_method
[] = {
64 [CPUINFO_LVL_ROOT
] = ROVER_NO_OP
,
66 /* Strands (or virtual CPUs) within a core may not run concurrently
67 * on the Niagara, as instruction pipeline(s) are shared. Distribute
68 * work to strands in different cores first for better concurrency.
69 * Go to next NUMA node when all cores are used.
71 [CPUINFO_LVL_NODE
] = ROVER_INC_ON_VISIT
|ROVER_INC_PARENT_ON_LOOP
,
73 /* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
74 * a proc_id represents an instruction pipeline. Distribute work to
75 * strands in different proc_id groups if the core has multiple
76 * instruction pipelines (e.g. the Niagara 2/2+ has two).
78 [CPUINFO_LVL_CORE
] = ROVER_INC_ON_VISIT
,
80 /* Pick the next strand in the proc_id group. */
81 [CPUINFO_LVL_PROC
] = ROVER_INC_ON_VISIT
,
84 /* Generic cpuinfo tree traversal. Distribute work round robin across NUMA
87 static const int generic_iterate_method
[] = {
88 [CPUINFO_LVL_ROOT
] = ROVER_INC_ON_VISIT
,
89 [CPUINFO_LVL_NODE
] = ROVER_NO_OP
,
90 [CPUINFO_LVL_CORE
] = ROVER_INC_PARENT_ON_LOOP
,
91 [CPUINFO_LVL_PROC
] = ROVER_INC_ON_VISIT
|ROVER_INC_PARENT_ON_LOOP
,
95 static int cpuinfo_id(int cpu
, int level
)
100 case CPUINFO_LVL_ROOT
:
103 case CPUINFO_LVL_NODE
:
104 id
= cpu_to_node(cpu
);
106 case CPUINFO_LVL_CORE
:
107 id
= cpu_data(cpu
).core_id
;
109 case CPUINFO_LVL_PROC
:
110 id
= cpu_data(cpu
).proc_id
;
119 * Enumerate the CPU information in __cpu_data to determine the start index,
120 * end index, and number of nodes for each level in the cpuinfo tree. The
121 * total number of cpuinfo nodes required to build the tree is returned.
123 static int enumerate_cpuinfo_nodes(struct cpuinfo_level
*tree_level
)
125 int prev_id
[CPUINFO_LVL_MAX
];
128 for (i
= CPUINFO_LVL_ROOT
; i
< CPUINFO_LVL_MAX
; i
++) {
129 struct cpuinfo_level
*lv
= &tree_level
[i
];
132 lv
->start_index
= lv
->end_index
= lv
->num_nodes
= 0;
135 num_nodes
= 1; /* Include the root node */
137 for (i
= 0; i
< num_possible_cpus(); i
++) {
141 n
= cpuinfo_id(i
, CPUINFO_LVL_NODE
);
142 if (n
> prev_id
[CPUINFO_LVL_NODE
]) {
143 tree_level
[CPUINFO_LVL_NODE
].num_nodes
++;
144 prev_id
[CPUINFO_LVL_NODE
] = n
;
147 n
= cpuinfo_id(i
, CPUINFO_LVL_CORE
);
148 if (n
> prev_id
[CPUINFO_LVL_CORE
]) {
149 tree_level
[CPUINFO_LVL_CORE
].num_nodes
++;
150 prev_id
[CPUINFO_LVL_CORE
] = n
;
153 n
= cpuinfo_id(i
, CPUINFO_LVL_PROC
);
154 if (n
> prev_id
[CPUINFO_LVL_PROC
]) {
155 tree_level
[CPUINFO_LVL_PROC
].num_nodes
++;
156 prev_id
[CPUINFO_LVL_PROC
] = n
;
161 tree_level
[CPUINFO_LVL_ROOT
].num_nodes
= 1;
163 n
= tree_level
[CPUINFO_LVL_NODE
].num_nodes
;
164 tree_level
[CPUINFO_LVL_NODE
].start_index
= 1;
165 tree_level
[CPUINFO_LVL_NODE
].end_index
= n
;
168 tree_level
[CPUINFO_LVL_CORE
].start_index
= n
;
169 n
+= tree_level
[CPUINFO_LVL_CORE
].num_nodes
;
170 tree_level
[CPUINFO_LVL_CORE
].end_index
= n
- 1;
172 tree_level
[CPUINFO_LVL_PROC
].start_index
= n
;
173 n
+= tree_level
[CPUINFO_LVL_PROC
].num_nodes
;
174 tree_level
[CPUINFO_LVL_PROC
].end_index
= n
- 1;
179 /* Build a tree representation of the CPU hierarchy using the per CPU
180 * information in __cpu_data. Entries in __cpu_data[0..NR_CPUS] are
181 * assumed to be sorted in ascending order based on node, core_id, and
182 * proc_id (in order of significance).
184 static struct cpuinfo_tree
*build_cpuinfo_tree(void)
186 struct cpuinfo_tree
*new_tree
;
187 struct cpuinfo_node
*node
;
188 struct cpuinfo_level tmp_level
[CPUINFO_LVL_MAX
];
189 int num_cpus
[CPUINFO_LVL_MAX
];
190 int level_rover
[CPUINFO_LVL_MAX
];
191 int prev_id
[CPUINFO_LVL_MAX
];
192 int n
, id
, cpu
, prev_cpu
, last_cpu
, level
;
194 n
= enumerate_cpuinfo_nodes(tmp_level
);
196 new_tree
= kzalloc(sizeof(struct cpuinfo_tree
) +
197 (sizeof(struct cpuinfo_node
) * n
), GFP_ATOMIC
);
201 new_tree
->total_nodes
= n
;
202 memcpy(&new_tree
->level
, tmp_level
, sizeof(tmp_level
));
204 prev_cpu
= cpu
= cpumask_first(cpu_online_mask
);
206 /* Initialize all levels in the tree with the first CPU */
207 for (level
= CPUINFO_LVL_PROC
; level
>= CPUINFO_LVL_ROOT
; level
--) {
208 n
= new_tree
->level
[level
].start_index
;
210 level_rover
[level
] = n
;
211 node
= &new_tree
->nodes
[n
];
213 id
= cpuinfo_id(cpu
, level
);
214 if (unlikely(id
< 0)) {
222 node
->parent_index
= (level
> CPUINFO_LVL_ROOT
)
223 ? new_tree
->level
[level
- 1].start_index
: -1;
225 node
->child_start
= node
->child_end
= node
->rover
=
226 (level
== CPUINFO_LVL_PROC
)
227 ? cpu
: new_tree
->level
[level
+ 1].start_index
;
229 prev_id
[level
] = node
->id
;
233 for (last_cpu
= (num_possible_cpus() - 1); last_cpu
>= 0; last_cpu
--) {
234 if (cpu_online(last_cpu
))
238 while (++cpu
<= last_cpu
) {
239 if (!cpu_online(cpu
))
242 for (level
= CPUINFO_LVL_PROC
; level
>= CPUINFO_LVL_ROOT
;
244 id
= cpuinfo_id(cpu
, level
);
245 if (unlikely(id
< 0)) {
250 if ((id
!= prev_id
[level
]) || (cpu
== last_cpu
)) {
252 node
= &new_tree
->nodes
[level_rover
[level
]];
253 node
->num_cpus
= num_cpus
[level
];
259 /* Connect tree node to parent */
260 if (level
== CPUINFO_LVL_ROOT
)
261 node
->parent_index
= -1;
264 level_rover
[level
- 1];
266 if (level
== CPUINFO_LVL_PROC
) {
268 (cpu
== last_cpu
) ? cpu
: prev_cpu
;
271 level_rover
[level
+ 1] - 1;
274 /* Initialize the next node in the same level */
275 n
= ++level_rover
[level
];
276 if (n
<= new_tree
->level
[level
].end_index
) {
277 node
= &new_tree
->nodes
[n
];
281 /* Connect node to child */
282 node
->child_start
= node
->child_end
=
284 (level
== CPUINFO_LVL_PROC
)
285 ? cpu
: level_rover
[level
+ 1];
296 static void increment_rover(struct cpuinfo_tree
*t
, int node_index
,
297 int root_index
, const int *rover_inc_table
)
299 struct cpuinfo_node
*node
= &t
->nodes
[node_index
];
300 int top_level
, level
;
302 top_level
= t
->nodes
[root_index
].level
;
303 for (level
= node
->level
; level
>= top_level
; level
--) {
305 if (node
->rover
<= node
->child_end
)
308 node
->rover
= node
->child_start
;
309 /* If parent's rover does not need to be adjusted, stop here. */
310 if ((level
== top_level
) ||
311 !(rover_inc_table
[level
] & ROVER_INC_PARENT_ON_LOOP
))
314 node
= &t
->nodes
[node
->parent_index
];
318 static int iterate_cpu(struct cpuinfo_tree
*t
, unsigned int root_index
)
320 const int *rover_inc_table
;
321 int level
, new_index
, index
= root_index
;
323 switch (sun4v_chip_type
) {
324 case SUN4V_CHIP_NIAGARA1
:
325 case SUN4V_CHIP_NIAGARA2
:
326 case SUN4V_CHIP_NIAGARA3
:
327 case SUN4V_CHIP_NIAGARA4
:
328 case SUN4V_CHIP_NIAGARA5
:
329 case SUN4V_CHIP_SPARC_M6
:
330 case SUN4V_CHIP_SPARC_M7
:
331 case SUN4V_CHIP_SPARC_M8
:
332 case SUN4V_CHIP_SPARC_SN
:
333 case SUN4V_CHIP_SPARC64X
:
334 rover_inc_table
= niagara_iterate_method
;
337 rover_inc_table
= generic_iterate_method
;
340 for (level
= t
->nodes
[root_index
].level
; level
< CPUINFO_LVL_MAX
;
342 new_index
= t
->nodes
[index
].rover
;
343 if (rover_inc_table
[level
] & ROVER_INC_ON_VISIT
)
344 increment_rover(t
, index
, root_index
, rover_inc_table
);
351 static void _cpu_map_rebuild(void)
360 cpuinfo_tree
= build_cpuinfo_tree();
364 /* Build CPU distribution map that spans all online CPUs. No need
365 * to check if the CPU is online, as that is done when the cpuinfo
366 * tree is being built.
368 for (i
= 0; i
< cpuinfo_tree
->nodes
[0].num_cpus
; i
++)
369 cpu_distribution_map
[i
] = iterate_cpu(cpuinfo_tree
, 0);
372 /* Fallback if the cpuinfo tree could not be built. CPU mapping is linear
375 static int simple_map_to_cpu(unsigned int index
)
377 int i
, end
, cpu_rover
;
380 end
= index
% num_online_cpus();
381 for (i
= 0; i
< num_possible_cpus(); i
++) {
382 if (cpu_online(cpu_rover
)) {
383 if (cpu_rover
>= end
)
390 /* Impossible, since num_online_cpus() <= num_possible_cpus() */
391 return cpumask_first(cpu_online_mask
);
394 static int _map_to_cpu(unsigned int index
)
396 struct cpuinfo_node
*root_node
;
398 if (unlikely(!cpuinfo_tree
)) {
401 return simple_map_to_cpu(index
);
404 root_node
= &cpuinfo_tree
->nodes
[0];
405 #ifdef CONFIG_HOTPLUG_CPU
406 if (unlikely(root_node
->num_cpus
!= num_online_cpus())) {
409 return simple_map_to_cpu(index
);
412 return cpu_distribution_map
[index
% root_node
->num_cpus
];
415 int map_to_cpu(unsigned int index
)
420 spin_lock_irqsave(&cpu_map_lock
, flag
);
421 mapped_cpu
= _map_to_cpu(index
);
423 #ifdef CONFIG_HOTPLUG_CPU
424 while (unlikely(!cpu_online(mapped_cpu
)))
425 mapped_cpu
= _map_to_cpu(index
);
427 spin_unlock_irqrestore(&cpu_map_lock
, flag
);
430 EXPORT_SYMBOL(map_to_cpu
);
432 void cpu_map_rebuild(void)
436 spin_lock_irqsave(&cpu_map_lock
, flag
);
438 spin_unlock_irqrestore(&cpu_map_lock
, flag
);