1 /* cpumap.c: used for optimizing CPU assignment
3 * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
6 #include <linux/export.h>
7 #include <linux/slab.h>
8 #include <linux/kernel.h>
9 #include <linux/init.h>
10 #include <linux/cpumask.h>
11 #include <linux/spinlock.h>
12 #include <asm/cpudata.h>
26 /* Increment rover every time level is visited */
27 ROVER_INC_ON_VISIT
= 1 << 0,
28 /* Increment parent's rover every time rover wraps around */
29 ROVER_INC_PARENT_ON_LOOP
= 1 << 1,
35 int num_cpus
; /* Number of CPUs in this hierarchy */
37 int child_start
; /* Array index of the first child node */
38 int child_end
; /* Array index of the last child node */
39 int rover
; /* Child node iterator */
42 struct cpuinfo_level
{
43 int start_index
; /* Index of first node of a level in a cpuinfo tree */
44 int end_index
; /* Index of last node of a level in a cpuinfo tree */
45 int num_nodes
; /* Number of nodes in a level in a cpuinfo tree */
51 /* Offsets into nodes[] for each level of the tree */
52 struct cpuinfo_level level
[CPUINFO_LVL_MAX
];
53 struct cpuinfo_node nodes
[0];
57 static struct cpuinfo_tree
*cpuinfo_tree
;
59 static u16 cpu_distribution_map
[NR_CPUS
];
60 static DEFINE_SPINLOCK(cpu_map_lock
);
63 /* Niagara optimized cpuinfo tree traversal. */
64 static const int niagara_iterate_method
[] = {
65 [CPUINFO_LVL_ROOT
] = ROVER_NO_OP
,
67 /* Strands (or virtual CPUs) within a core may not run concurrently
68 * on the Niagara, as instruction pipeline(s) are shared. Distribute
69 * work to strands in different cores first for better concurrency.
70 * Go to next NUMA node when all cores are used.
72 [CPUINFO_LVL_NODE
] = ROVER_INC_ON_VISIT
|ROVER_INC_PARENT_ON_LOOP
,
74 /* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
75 * a proc_id represents an instruction pipeline. Distribute work to
76 * strands in different proc_id groups if the core has multiple
77 * instruction pipelines (e.g. the Niagara 2/2+ has two).
79 [CPUINFO_LVL_CORE
] = ROVER_INC_ON_VISIT
,
81 /* Pick the next strand in the proc_id group. */
82 [CPUINFO_LVL_PROC
] = ROVER_INC_ON_VISIT
,
85 /* Generic cpuinfo tree traversal. Distribute work round robin across NUMA
88 static const int generic_iterate_method
[] = {
89 [CPUINFO_LVL_ROOT
] = ROVER_INC_ON_VISIT
,
90 [CPUINFO_LVL_NODE
] = ROVER_NO_OP
,
91 [CPUINFO_LVL_CORE
] = ROVER_INC_PARENT_ON_LOOP
,
92 [CPUINFO_LVL_PROC
] = ROVER_INC_ON_VISIT
|ROVER_INC_PARENT_ON_LOOP
,
96 static int cpuinfo_id(int cpu
, int level
)
101 case CPUINFO_LVL_ROOT
:
104 case CPUINFO_LVL_NODE
:
105 id
= cpu_to_node(cpu
);
107 case CPUINFO_LVL_CORE
:
108 id
= cpu_data(cpu
).core_id
;
110 case CPUINFO_LVL_PROC
:
111 id
= cpu_data(cpu
).proc_id
;
120 * Enumerate the CPU information in __cpu_data to determine the start index,
121 * end index, and number of nodes for each level in the cpuinfo tree. The
122 * total number of cpuinfo nodes required to build the tree is returned.
124 static int enumerate_cpuinfo_nodes(struct cpuinfo_level
*tree_level
)
126 int prev_id
[CPUINFO_LVL_MAX
];
129 for (i
= CPUINFO_LVL_ROOT
; i
< CPUINFO_LVL_MAX
; i
++) {
130 struct cpuinfo_level
*lv
= &tree_level
[i
];
133 lv
->start_index
= lv
->end_index
= lv
->num_nodes
= 0;
136 num_nodes
= 1; /* Include the root node */
138 for (i
= 0; i
< num_possible_cpus(); i
++) {
142 n
= cpuinfo_id(i
, CPUINFO_LVL_NODE
);
143 if (n
> prev_id
[CPUINFO_LVL_NODE
]) {
144 tree_level
[CPUINFO_LVL_NODE
].num_nodes
++;
145 prev_id
[CPUINFO_LVL_NODE
] = n
;
148 n
= cpuinfo_id(i
, CPUINFO_LVL_CORE
);
149 if (n
> prev_id
[CPUINFO_LVL_CORE
]) {
150 tree_level
[CPUINFO_LVL_CORE
].num_nodes
++;
151 prev_id
[CPUINFO_LVL_CORE
] = n
;
154 n
= cpuinfo_id(i
, CPUINFO_LVL_PROC
);
155 if (n
> prev_id
[CPUINFO_LVL_PROC
]) {
156 tree_level
[CPUINFO_LVL_PROC
].num_nodes
++;
157 prev_id
[CPUINFO_LVL_PROC
] = n
;
162 tree_level
[CPUINFO_LVL_ROOT
].num_nodes
= 1;
164 n
= tree_level
[CPUINFO_LVL_NODE
].num_nodes
;
165 tree_level
[CPUINFO_LVL_NODE
].start_index
= 1;
166 tree_level
[CPUINFO_LVL_NODE
].end_index
= n
;
169 tree_level
[CPUINFO_LVL_CORE
].start_index
= n
;
170 n
+= tree_level
[CPUINFO_LVL_CORE
].num_nodes
;
171 tree_level
[CPUINFO_LVL_CORE
].end_index
= n
- 1;
173 tree_level
[CPUINFO_LVL_PROC
].start_index
= n
;
174 n
+= tree_level
[CPUINFO_LVL_PROC
].num_nodes
;
175 tree_level
[CPUINFO_LVL_PROC
].end_index
= n
- 1;
180 /* Build a tree representation of the CPU hierarchy using the per CPU
181 * information in __cpu_data. Entries in __cpu_data[0..NR_CPUS] are
182 * assumed to be sorted in ascending order based on node, core_id, and
183 * proc_id (in order of significance).
185 static struct cpuinfo_tree
*build_cpuinfo_tree(void)
187 struct cpuinfo_tree
*new_tree
;
188 struct cpuinfo_node
*node
;
189 struct cpuinfo_level tmp_level
[CPUINFO_LVL_MAX
];
190 int num_cpus
[CPUINFO_LVL_MAX
];
191 int level_rover
[CPUINFO_LVL_MAX
];
192 int prev_id
[CPUINFO_LVL_MAX
];
193 int n
, id
, cpu
, prev_cpu
, last_cpu
, level
;
195 n
= enumerate_cpuinfo_nodes(tmp_level
);
197 new_tree
= kzalloc(sizeof(struct cpuinfo_tree
) +
198 (sizeof(struct cpuinfo_node
) * n
), GFP_ATOMIC
);
202 new_tree
->total_nodes
= n
;
203 memcpy(&new_tree
->level
, tmp_level
, sizeof(tmp_level
));
205 prev_cpu
= cpu
= cpumask_first(cpu_online_mask
);
207 /* Initialize all levels in the tree with the first CPU */
208 for (level
= CPUINFO_LVL_PROC
; level
>= CPUINFO_LVL_ROOT
; level
--) {
209 n
= new_tree
->level
[level
].start_index
;
211 level_rover
[level
] = n
;
212 node
= &new_tree
->nodes
[n
];
214 id
= cpuinfo_id(cpu
, level
);
215 if (unlikely(id
< 0)) {
223 node
->parent_index
= (level
> CPUINFO_LVL_ROOT
)
224 ? new_tree
->level
[level
- 1].start_index
: -1;
226 node
->child_start
= node
->child_end
= node
->rover
=
227 (level
== CPUINFO_LVL_PROC
)
228 ? cpu
: new_tree
->level
[level
+ 1].start_index
;
230 prev_id
[level
] = node
->id
;
234 for (last_cpu
= (num_possible_cpus() - 1); last_cpu
>= 0; last_cpu
--) {
235 if (cpu_online(last_cpu
))
239 while (++cpu
<= last_cpu
) {
240 if (!cpu_online(cpu
))
243 for (level
= CPUINFO_LVL_PROC
; level
>= CPUINFO_LVL_ROOT
;
245 id
= cpuinfo_id(cpu
, level
);
246 if (unlikely(id
< 0)) {
251 if ((id
!= prev_id
[level
]) || (cpu
== last_cpu
)) {
253 node
= &new_tree
->nodes
[level_rover
[level
]];
254 node
->num_cpus
= num_cpus
[level
];
260 /* Connect tree node to parent */
261 if (level
== CPUINFO_LVL_ROOT
)
262 node
->parent_index
= -1;
265 level_rover
[level
- 1];
267 if (level
== CPUINFO_LVL_PROC
) {
269 (cpu
== last_cpu
) ? cpu
: prev_cpu
;
272 level_rover
[level
+ 1] - 1;
275 /* Initialize the next node in the same level */
276 n
= ++level_rover
[level
];
277 if (n
<= new_tree
->level
[level
].end_index
) {
278 node
= &new_tree
->nodes
[n
];
282 /* Connect node to child */
283 node
->child_start
= node
->child_end
=
285 (level
== CPUINFO_LVL_PROC
)
286 ? cpu
: level_rover
[level
+ 1];
297 static void increment_rover(struct cpuinfo_tree
*t
, int node_index
,
298 int root_index
, const int *rover_inc_table
)
300 struct cpuinfo_node
*node
= &t
->nodes
[node_index
];
301 int top_level
, level
;
303 top_level
= t
->nodes
[root_index
].level
;
304 for (level
= node
->level
; level
>= top_level
; level
--) {
306 if (node
->rover
<= node
->child_end
)
309 node
->rover
= node
->child_start
;
310 /* If parent's rover does not need to be adjusted, stop here. */
311 if ((level
== top_level
) ||
312 !(rover_inc_table
[level
] & ROVER_INC_PARENT_ON_LOOP
))
315 node
= &t
->nodes
[node
->parent_index
];
319 static int iterate_cpu(struct cpuinfo_tree
*t
, unsigned int root_index
)
321 const int *rover_inc_table
;
322 int level
, new_index
, index
= root_index
;
324 switch (sun4v_chip_type
) {
325 case SUN4V_CHIP_NIAGARA1
:
326 case SUN4V_CHIP_NIAGARA2
:
327 case SUN4V_CHIP_NIAGARA3
:
328 case SUN4V_CHIP_NIAGARA4
:
329 case SUN4V_CHIP_NIAGARA5
:
330 case SUN4V_CHIP_SPARC64X
:
331 rover_inc_table
= niagara_iterate_method
;
334 rover_inc_table
= generic_iterate_method
;
337 for (level
= t
->nodes
[root_index
].level
; level
< CPUINFO_LVL_MAX
;
339 new_index
= t
->nodes
[index
].rover
;
340 if (rover_inc_table
[level
] & ROVER_INC_ON_VISIT
)
341 increment_rover(t
, index
, root_index
, rover_inc_table
);
348 static void _cpu_map_rebuild(void)
357 cpuinfo_tree
= build_cpuinfo_tree();
361 /* Build CPU distribution map that spans all online CPUs. No need
362 * to check if the CPU is online, as that is done when the cpuinfo
363 * tree is being built.
365 for (i
= 0; i
< cpuinfo_tree
->nodes
[0].num_cpus
; i
++)
366 cpu_distribution_map
[i
] = iterate_cpu(cpuinfo_tree
, 0);
369 /* Fallback if the cpuinfo tree could not be built. CPU mapping is linear
372 static int simple_map_to_cpu(unsigned int index
)
374 int i
, end
, cpu_rover
;
377 end
= index
% num_online_cpus();
378 for (i
= 0; i
< num_possible_cpus(); i
++) {
379 if (cpu_online(cpu_rover
)) {
380 if (cpu_rover
>= end
)
387 /* Impossible, since num_online_cpus() <= num_possible_cpus() */
388 return cpumask_first(cpu_online_mask
);
391 static int _map_to_cpu(unsigned int index
)
393 struct cpuinfo_node
*root_node
;
395 if (unlikely(!cpuinfo_tree
)) {
398 return simple_map_to_cpu(index
);
401 root_node
= &cpuinfo_tree
->nodes
[0];
402 #ifdef CONFIG_HOTPLUG_CPU
403 if (unlikely(root_node
->num_cpus
!= num_online_cpus())) {
406 return simple_map_to_cpu(index
);
409 return cpu_distribution_map
[index
% root_node
->num_cpus
];
412 int map_to_cpu(unsigned int index
)
417 spin_lock_irqsave(&cpu_map_lock
, flag
);
418 mapped_cpu
= _map_to_cpu(index
);
420 #ifdef CONFIG_HOTPLUG_CPU
421 while (unlikely(!cpu_online(mapped_cpu
)))
422 mapped_cpu
= _map_to_cpu(index
);
424 spin_unlock_irqrestore(&cpu_map_lock
, flag
);
427 EXPORT_SYMBOL(map_to_cpu
);
429 void cpu_map_rebuild(void)
433 spin_lock_irqsave(&cpu_map_lock
, flag
);
435 spin_unlock_irqrestore(&cpu_map_lock
, flag
);