2 * acpi_numa.c - ACPI NUMA support
4 * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
22 #define pr_fmt(fmt) "ACPI: " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/kernel.h>
27 #include <linux/types.h>
28 #include <linux/errno.h>
29 #include <linux/acpi.h>
30 #include <linux/bootmem.h>
31 #include <linux/memblock.h>
32 #include <linux/numa.h>
33 #include <linux/nodemask.h>
34 #include <linux/topology.h>
36 static nodemask_t nodes_found_map
= NODE_MASK_NONE
;
38 /* maps to convert between proximity domain and logical node ID */
39 static int pxm_to_node_map
[MAX_PXM_DOMAINS
]
40 = { [0 ... MAX_PXM_DOMAINS
- 1] = NUMA_NO_NODE
};
41 static int node_to_pxm_map
[MAX_NUMNODES
]
42 = { [0 ... MAX_NUMNODES
- 1] = PXM_INVAL
};
44 unsigned char acpi_srat_revision __initdata
;
45 int acpi_numa __initdata
;
47 int pxm_to_node(int pxm
)
51 return pxm_to_node_map
[pxm
];
54 int node_to_pxm(int node
)
58 return node_to_pxm_map
[node
];
61 static void __acpi_map_pxm_to_node(int pxm
, int node
)
63 if (pxm_to_node_map
[pxm
] == NUMA_NO_NODE
|| node
< pxm_to_node_map
[pxm
])
64 pxm_to_node_map
[pxm
] = node
;
65 if (node_to_pxm_map
[node
] == PXM_INVAL
|| pxm
< node_to_pxm_map
[node
])
66 node_to_pxm_map
[node
] = pxm
;
69 int acpi_map_pxm_to_node(int pxm
)
73 if (pxm
< 0 || pxm
>= MAX_PXM_DOMAINS
|| numa_off
)
76 node
= pxm_to_node_map
[pxm
];
78 if (node
== NUMA_NO_NODE
) {
79 if (nodes_weight(nodes_found_map
) >= MAX_NUMNODES
)
81 node
= first_unset_node(nodes_found_map
);
82 __acpi_map_pxm_to_node(pxm
, node
);
83 node_set(node
, nodes_found_map
);
90 * acpi_map_pxm_to_online_node - Map proximity ID to online node
91 * @pxm: ACPI proximity ID
93 * This is similar to acpi_map_pxm_to_node(), but always returns an online
94 * node. When the mapped node from a given proximity ID is offline, it
95 * looks up the node distance table and returns the nearest online node.
97 * ACPI device drivers, which are called after the NUMA initialization has
98 * completed in the kernel, can call this interface to obtain their device
99 * NUMA topology from ACPI tables. Such drivers do not have to deal with
100 * offline nodes. A node may be offline when a device proximity ID is
101 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
104 int acpi_map_pxm_to_online_node(int pxm
)
108 node
= acpi_map_pxm_to_node(pxm
);
110 if (node
== NUMA_NO_NODE
)
114 if (!node_online(node
)) {
115 int min_dist
= INT_MAX
, dist
, n
;
117 for_each_online_node(n
) {
118 dist
= node_distance(node
, n
);
119 if (dist
< min_dist
) {
128 EXPORT_SYMBOL(acpi_map_pxm_to_online_node
);
131 acpi_table_print_srat_entry(struct acpi_subtable_header
*header
)
133 switch (header
->type
) {
134 case ACPI_SRAT_TYPE_CPU_AFFINITY
:
136 struct acpi_srat_cpu_affinity
*p
=
137 (struct acpi_srat_cpu_affinity
*)header
;
138 pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
139 p
->apic_id
, p
->local_sapic_eid
,
140 p
->proximity_domain_lo
,
141 (p
->flags
& ACPI_SRAT_CPU_ENABLED
) ?
142 "enabled" : "disabled");
146 case ACPI_SRAT_TYPE_MEMORY_AFFINITY
:
148 struct acpi_srat_mem_affinity
*p
=
149 (struct acpi_srat_mem_affinity
*)header
;
150 pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
151 (unsigned long)p
->base_address
,
152 (unsigned long)p
->length
,
154 (p
->flags
& ACPI_SRAT_MEM_ENABLED
) ?
155 "enabled" : "disabled",
156 (p
->flags
& ACPI_SRAT_MEM_HOT_PLUGGABLE
) ?
157 " hot-pluggable" : "",
158 (p
->flags
& ACPI_SRAT_MEM_NON_VOLATILE
) ?
159 " non-volatile" : "");
163 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY
:
165 struct acpi_srat_x2apic_cpu_affinity
*p
=
166 (struct acpi_srat_x2apic_cpu_affinity
*)header
;
167 pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
170 (p
->flags
& ACPI_SRAT_CPU_ENABLED
) ?
171 "enabled" : "disabled");
175 case ACPI_SRAT_TYPE_GICC_AFFINITY
:
177 struct acpi_srat_gicc_affinity
*p
=
178 (struct acpi_srat_gicc_affinity
*)header
;
179 pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
180 p
->acpi_processor_uid
,
182 (p
->flags
& ACPI_SRAT_GICC_ENABLED
) ?
183 "enabled" : "disabled");
188 pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
195 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
196 * up the NUMA heuristics which wants the local node to have a smaller
197 * distance than the others.
198 * Do some quick checks here and only use the SLIT if it passes.
200 static int __init
slit_valid(struct acpi_table_slit
*slit
)
203 int d
= slit
->locality_count
;
204 for (i
= 0; i
< d
; i
++) {
205 for (j
= 0; j
< d
; j
++) {
206 u8 val
= slit
->entry
[d
*i
+ j
];
208 if (val
!= LOCAL_DISTANCE
)
210 } else if (val
<= LOCAL_DISTANCE
)
217 void __init
bad_srat(void)
219 pr_err("SRAT: SRAT not used.\n");
223 int __init
srat_disabled(void)
225 return acpi_numa
< 0;
228 #if defined(CONFIG_X86) || defined(CONFIG_ARM64)
230 * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
231 * I/O localities since SRAT does not list them. I/O localities are
232 * not supported at this point.
234 void __init
acpi_numa_slit_init(struct acpi_table_slit
*slit
)
238 for (i
= 0; i
< slit
->locality_count
; i
++) {
239 const int from_node
= pxm_to_node(i
);
241 if (from_node
== NUMA_NO_NODE
)
244 for (j
= 0; j
< slit
->locality_count
; j
++) {
245 const int to_node
= pxm_to_node(j
);
247 if (to_node
== NUMA_NO_NODE
)
250 numa_set_distance(from_node
, to_node
,
251 slit
->entry
[slit
->locality_count
* i
+ j
]);
257 * Default callback for parsing of the Proximity Domain <-> Memory
261 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity
*ma
)
269 if (ma
->header
.length
< sizeof(struct acpi_srat_mem_affinity
)) {
270 pr_err("SRAT: Unexpected header length: %d\n",
272 goto out_err_bad_srat
;
274 if ((ma
->flags
& ACPI_SRAT_MEM_ENABLED
) == 0)
276 hotpluggable
= ma
->flags
& ACPI_SRAT_MEM_HOT_PLUGGABLE
;
277 if (hotpluggable
&& !IS_ENABLED(CONFIG_MEMORY_HOTPLUG
))
280 start
= ma
->base_address
;
281 end
= start
+ ma
->length
;
282 pxm
= ma
->proximity_domain
;
283 if (acpi_srat_revision
<= 1)
286 node
= acpi_map_pxm_to_node(pxm
);
287 if (node
== NUMA_NO_NODE
|| node
>= MAX_NUMNODES
) {
288 pr_err("SRAT: Too many proximity domains.\n");
289 goto out_err_bad_srat
;
292 if (numa_add_memblk(node
, start
, end
) < 0) {
293 pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
294 node
, (unsigned long long) start
,
295 (unsigned long long) end
- 1);
296 goto out_err_bad_srat
;
299 node_set(node
, numa_nodes_parsed
);
301 pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
303 (unsigned long long) start
, (unsigned long long) end
- 1,
304 hotpluggable
? " hotplug" : "",
305 ma
->flags
& ACPI_SRAT_MEM_NON_VOLATILE
? " non-volatile" : "");
307 /* Mark hotplug range in memblock. */
308 if (hotpluggable
&& memblock_mark_hotplug(start
, ma
->length
))
309 pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
310 (unsigned long long)start
, (unsigned long long)end
- 1);
312 max_possible_pfn
= max(max_possible_pfn
, PFN_UP(end
- 1));
320 #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
322 static int __init
acpi_parse_slit(struct acpi_table_header
*table
)
324 struct acpi_table_slit
*slit
= (struct acpi_table_slit
*)table
;
326 if (!slit_valid(slit
)) {
327 pr_info("SLIT table looks invalid. Not used.\n");
330 acpi_numa_slit_init(slit
);
336 acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity
*pa
)
338 pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa
->apic_id
);
342 acpi_parse_x2apic_affinity(struct acpi_subtable_header
*header
,
343 const unsigned long end
)
345 struct acpi_srat_x2apic_cpu_affinity
*processor_affinity
;
347 processor_affinity
= (struct acpi_srat_x2apic_cpu_affinity
*)header
;
348 if (!processor_affinity
)
351 acpi_table_print_srat_entry(header
);
353 /* let architecture-dependent part to do it */
354 acpi_numa_x2apic_affinity_init(processor_affinity
);
360 acpi_parse_processor_affinity(struct acpi_subtable_header
*header
,
361 const unsigned long end
)
363 struct acpi_srat_cpu_affinity
*processor_affinity
;
365 processor_affinity
= (struct acpi_srat_cpu_affinity
*)header
;
366 if (!processor_affinity
)
369 acpi_table_print_srat_entry(header
);
371 /* let architecture-dependent part to do it */
372 acpi_numa_processor_affinity_init(processor_affinity
);
378 acpi_parse_gicc_affinity(struct acpi_subtable_header
*header
,
379 const unsigned long end
)
381 struct acpi_srat_gicc_affinity
*processor_affinity
;
383 processor_affinity
= (struct acpi_srat_gicc_affinity
*)header
;
384 if (!processor_affinity
)
387 acpi_table_print_srat_entry(header
);
389 /* let architecture-dependent part to do it */
390 acpi_numa_gicc_affinity_init(processor_affinity
);
395 static int __initdata parsed_numa_memblks
;
398 acpi_parse_memory_affinity(struct acpi_subtable_header
* header
,
399 const unsigned long end
)
401 struct acpi_srat_mem_affinity
*memory_affinity
;
403 memory_affinity
= (struct acpi_srat_mem_affinity
*)header
;
404 if (!memory_affinity
)
407 acpi_table_print_srat_entry(header
);
409 /* let architecture-dependent part to do it */
410 if (!acpi_numa_memory_affinity_init(memory_affinity
))
411 parsed_numa_memblks
++;
415 static int __init
acpi_parse_srat(struct acpi_table_header
*table
)
417 struct acpi_table_srat
*srat
= (struct acpi_table_srat
*)table
;
419 acpi_srat_revision
= srat
->header
.revision
;
421 /* Real work done in acpi_table_parse_srat below. */
427 acpi_table_parse_srat(enum acpi_srat_type id
,
428 acpi_tbl_entry_handler handler
, unsigned int max_entries
)
430 return acpi_table_parse_entries(ACPI_SIG_SRAT
,
431 sizeof(struct acpi_table_srat
), id
,
432 handler
, max_entries
);
435 int __init
acpi_numa_init(void)
443 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
444 * SRAT cpu entries could have different order with that in MADT.
445 * So go over all cpu entries in SRAT to get apicid to node mapping.
448 /* SRAT: System Resource Affinity Table */
449 if (!acpi_table_parse(ACPI_SIG_SRAT
, acpi_parse_srat
)) {
450 struct acpi_subtable_proc srat_proc
[3];
452 memset(srat_proc
, 0, sizeof(srat_proc
));
453 srat_proc
[0].id
= ACPI_SRAT_TYPE_CPU_AFFINITY
;
454 srat_proc
[0].handler
= acpi_parse_processor_affinity
;
455 srat_proc
[1].id
= ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY
;
456 srat_proc
[1].handler
= acpi_parse_x2apic_affinity
;
457 srat_proc
[2].id
= ACPI_SRAT_TYPE_GICC_AFFINITY
;
458 srat_proc
[2].handler
= acpi_parse_gicc_affinity
;
460 acpi_table_parse_entries_array(ACPI_SIG_SRAT
,
461 sizeof(struct acpi_table_srat
),
462 srat_proc
, ARRAY_SIZE(srat_proc
), 0);
464 cnt
= acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY
,
465 acpi_parse_memory_affinity
, 0);
468 /* SLIT: System Locality Information Table */
469 acpi_table_parse(ACPI_SIG_SLIT
, acpi_parse_slit
);
473 else if (!parsed_numa_memblks
)
478 static int acpi_get_pxm(acpi_handle h
)
480 unsigned long long pxm
;
483 acpi_handle phandle
= h
;
487 status
= acpi_evaluate_integer(handle
, "_PXM", NULL
, &pxm
);
488 if (ACPI_SUCCESS(status
))
490 status
= acpi_get_parent(handle
, &phandle
);
491 } while (ACPI_SUCCESS(status
));
495 int acpi_get_node(acpi_handle handle
)
499 pxm
= acpi_get_pxm(handle
);
501 return acpi_map_pxm_to_node(pxm
);
503 EXPORT_SYMBOL(acpi_get_node
);