2 * arch/ia64/kernel/domain.c
3 * Architecture specific sched-domains builder.
5 * Copyright (C) 2004 Jesse Barnes
6 * Copyright (C) 2004 Silicon Graphics, Inc.
9 #include <linux/sched.h>
10 #include <linux/percpu.h>
11 #include <linux/slab.h>
12 #include <linux/cpumask.h>
13 #include <linux/init.h>
14 #include <linux/topology.h>
15 #include <linux/nodemask.h>
17 #define SD_NODES_PER_DOMAIN 6
21 * find_next_best_node - find the next node to include in a sched_domain
22 * @node: node whose sched_domain we're building
23 * @used_nodes: nodes already in the sched_domain
25 * Find the next node to include in a given scheduling domain. Simply
26 * finds the closest node not already in the @used_nodes map.
28 * Should use nodemask_t.
30 static int __devinit
find_next_best_node(int node
, unsigned long *used_nodes
)
32 int i
, n
, val
, min_val
, best_node
= 0;
36 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
38 n
= (node
+ i
) % MAX_NUMNODES
;
43 /* Skip already used nodes */
44 if (test_bit(n
, used_nodes
))
47 /* Simple min distance search */
48 val
= node_distance(node
, n
);
56 set_bit(best_node
, used_nodes
);
61 * sched_domain_node_span - get a cpumask for a node's sched_domain
62 * @node: node whose cpumask we're constructing
63 * @size: number of nodes to include in this span
65 * Given a node, construct a good cpumask for its sched_domain to span. It
66 * should be one that prevents unnecessary balancing, but also spreads tasks
69 static cpumask_t __devinit
sched_domain_node_span(int node
)
72 cpumask_t span
, nodemask
;
73 DECLARE_BITMAP(used_nodes
, MAX_NUMNODES
);
76 bitmap_zero(used_nodes
, MAX_NUMNODES
);
78 nodemask
= node_to_cpumask(node
);
79 cpus_or(span
, span
, nodemask
);
80 set_bit(node
, used_nodes
);
82 for (i
= 1; i
< SD_NODES_PER_DOMAIN
; i
++) {
83 int next_node
= find_next_best_node(node
, used_nodes
);
84 nodemask
= node_to_cpumask(next_node
);
85 cpus_or(span
, span
, nodemask
);
93 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
94 * can switch it on easily if needed.
96 #ifdef CONFIG_SCHED_SMT
97 static DEFINE_PER_CPU(struct sched_domain
, cpu_domains
);
98 static struct sched_group sched_group_cpus
[NR_CPUS
];
99 static int __devinit
cpu_to_cpu_group(int cpu
)
105 static DEFINE_PER_CPU(struct sched_domain
, phys_domains
);
106 static struct sched_group sched_group_phys
[NR_CPUS
];
107 static int __devinit
cpu_to_phys_group(int cpu
)
109 #ifdef CONFIG_SCHED_SMT
110 return first_cpu(cpu_sibling_map
[cpu
]);
118 * The init_sched_build_groups can't handle what we want to do with node
119 * groups, so roll our own. Now each node has its own list of groups which
120 * gets dynamically allocated.
122 static DEFINE_PER_CPU(struct sched_domain
, node_domains
);
123 static struct sched_group
*sched_group_nodes
[MAX_NUMNODES
];
125 static DEFINE_PER_CPU(struct sched_domain
, allnodes_domains
);
126 static struct sched_group sched_group_allnodes
[MAX_NUMNODES
];
128 static int __devinit
cpu_to_allnodes_group(int cpu
)
130 return cpu_to_node(cpu
);
135 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
137 void __devinit
arch_init_sched_domains(void)
140 cpumask_t cpu_default_map
;
143 * Setup mask for cpus without special case scheduling requirements.
144 * For now this just excludes isolated cpus, but could be used to
145 * exclude other special cases in the future.
147 cpus_complement(cpu_default_map
, cpu_isolated_map
);
148 cpus_and(cpu_default_map
, cpu_default_map
, cpu_online_map
);
151 * Set up domains. Isolated domains just stay on the dummy domain.
153 for_each_cpu_mask(i
, cpu_default_map
) {
155 struct sched_domain
*sd
= NULL
, *p
;
156 cpumask_t nodemask
= node_to_cpumask(cpu_to_node(i
));
158 cpus_and(nodemask
, nodemask
, cpu_default_map
);
161 if (num_online_cpus()
162 > SD_NODES_PER_DOMAIN
*cpus_weight(nodemask
)) {
163 sd
= &per_cpu(allnodes_domains
, i
);
164 *sd
= SD_ALLNODES_INIT
;
165 sd
->span
= cpu_default_map
;
166 group
= cpu_to_allnodes_group(i
);
167 sd
->groups
= &sched_group_allnodes
[group
];
172 sd
= &per_cpu(node_domains
, i
);
174 sd
->span
= sched_domain_node_span(cpu_to_node(i
));
176 cpus_and(sd
->span
, sd
->span
, cpu_default_map
);
180 sd
= &per_cpu(phys_domains
, i
);
181 group
= cpu_to_phys_group(i
);
185 sd
->groups
= &sched_group_phys
[group
];
187 #ifdef CONFIG_SCHED_SMT
189 sd
= &per_cpu(cpu_domains
, i
);
190 group
= cpu_to_cpu_group(i
);
191 *sd
= SD_SIBLING_INIT
;
192 sd
->span
= cpu_sibling_map
[i
];
193 cpus_and(sd
->span
, sd
->span
, cpu_default_map
);
195 sd
->groups
= &sched_group_cpus
[group
];
199 #ifdef CONFIG_SCHED_SMT
200 /* Set up CPU (sibling) groups */
201 for_each_cpu_mask(i
, cpu_default_map
) {
202 cpumask_t this_sibling_map
= cpu_sibling_map
[i
];
203 cpus_and(this_sibling_map
, this_sibling_map
, cpu_default_map
);
204 if (i
!= first_cpu(this_sibling_map
))
207 init_sched_build_groups(sched_group_cpus
, this_sibling_map
,
212 /* Set up physical groups */
213 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
214 cpumask_t nodemask
= node_to_cpumask(i
);
216 cpus_and(nodemask
, nodemask
, cpu_default_map
);
217 if (cpus_empty(nodemask
))
220 init_sched_build_groups(sched_group_phys
, nodemask
,
225 init_sched_build_groups(sched_group_allnodes
, cpu_default_map
,
226 &cpu_to_allnodes_group
);
228 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
229 /* Set up node groups */
230 struct sched_group
*sg
, *prev
;
231 cpumask_t nodemask
= node_to_cpumask(i
);
232 cpumask_t domainspan
;
233 cpumask_t covered
= CPU_MASK_NONE
;
236 cpus_and(nodemask
, nodemask
, cpu_default_map
);
237 if (cpus_empty(nodemask
))
240 domainspan
= sched_domain_node_span(i
);
241 cpus_and(domainspan
, domainspan
, cpu_default_map
);
243 sg
= kmalloc(sizeof(struct sched_group
), GFP_KERNEL
);
244 sched_group_nodes
[i
] = sg
;
245 for_each_cpu_mask(j
, nodemask
) {
246 struct sched_domain
*sd
;
247 sd
= &per_cpu(node_domains
, j
);
249 if (sd
->groups
== NULL
) {
250 /* Turn off balancing if we have no groups */
256 "Can not alloc domain group for node %d\n", i
);
260 sg
->cpumask
= nodemask
;
261 cpus_or(covered
, covered
, nodemask
);
264 for (j
= 0; j
< MAX_NUMNODES
; j
++) {
265 cpumask_t tmp
, notcovered
;
266 int n
= (i
+ j
) % MAX_NUMNODES
;
268 cpus_complement(notcovered
, covered
);
269 cpus_and(tmp
, notcovered
, cpu_default_map
);
270 cpus_and(tmp
, tmp
, domainspan
);
274 nodemask
= node_to_cpumask(n
);
275 cpus_and(tmp
, tmp
, nodemask
);
279 sg
= kmalloc(sizeof(struct sched_group
), GFP_KERNEL
);
282 "Can not alloc domain group for node %d\n", j
);
287 cpus_or(covered
, covered
, tmp
);
291 prev
->next
= sched_group_nodes
[i
];
295 /* Calculate CPU power for physical packages and nodes */
296 for_each_cpu_mask(i
, cpu_default_map
) {
298 struct sched_domain
*sd
;
299 #ifdef CONFIG_SCHED_SMT
300 sd
= &per_cpu(cpu_domains
, i
);
301 power
= SCHED_LOAD_SCALE
;
302 sd
->groups
->cpu_power
= power
;
305 sd
= &per_cpu(phys_domains
, i
);
306 power
= SCHED_LOAD_SCALE
+ SCHED_LOAD_SCALE
*
307 (cpus_weight(sd
->groups
->cpumask
)-1) / 10;
308 sd
->groups
->cpu_power
= power
;
311 sd
= &per_cpu(allnodes_domains
, i
);
313 power
= SCHED_LOAD_SCALE
+ SCHED_LOAD_SCALE
*
314 (cpus_weight(sd
->groups
->cpumask
)-1) / 10;
315 sd
->groups
->cpu_power
= power
;
321 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
322 struct sched_group
*sg
= sched_group_nodes
[i
];
328 for_each_cpu_mask(j
, sg
->cpumask
) {
329 struct sched_domain
*sd
;
332 sd
= &per_cpu(phys_domains
, j
);
333 if (j
!= first_cpu(sd
->groups
->cpumask
)) {
335 * Only add "power" once for each
340 power
= SCHED_LOAD_SCALE
+ SCHED_LOAD_SCALE
*
341 (cpus_weight(sd
->groups
->cpumask
)-1) / 10;
343 sg
->cpu_power
+= power
;
346 if (sg
!= sched_group_nodes
[i
])
351 /* Attach the domains */
352 for_each_online_cpu(i
) {
353 struct sched_domain
*sd
;
354 #ifdef CONFIG_SCHED_SMT
355 sd
= &per_cpu(cpu_domains
, i
);
357 sd
= &per_cpu(phys_domains
, i
);
359 cpu_attach_domain(sd
, i
);
363 void __devinit
arch_destroy_sched_domains(void)
367 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
368 struct sched_group
*oldsg
, *sg
= sched_group_nodes
[i
];
376 if (oldsg
!= sched_group_nodes
[i
])
378 sched_group_nodes
[i
] = NULL
;