2 * acpi_numa.c - ACPI NUMA support
4 * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
22 #define pr_fmt(fmt) "ACPI: " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/kernel.h>
27 #include <linux/types.h>
28 #include <linux/errno.h>
29 #include <linux/acpi.h>
30 #include <linux/bootmem.h>
31 #include <linux/memblock.h>
32 #include <linux/numa.h>
33 #include <linux/nodemask.h>
34 #include <linux/topology.h>
36 static nodemask_t nodes_found_map
= NODE_MASK_NONE
;
38 /* maps to convert between proximity domain and logical node ID */
39 static int pxm_to_node_map
[MAX_PXM_DOMAINS
]
40 = { [0 ... MAX_PXM_DOMAINS
- 1] = NUMA_NO_NODE
};
41 static int node_to_pxm_map
[MAX_NUMNODES
]
42 = { [0 ... MAX_NUMNODES
- 1] = PXM_INVAL
};
44 unsigned char acpi_srat_revision __initdata
;
45 int acpi_numa __initdata
;
47 int pxm_to_node(int pxm
)
51 return pxm_to_node_map
[pxm
];
54 int node_to_pxm(int node
)
58 return node_to_pxm_map
[node
];
61 static void __acpi_map_pxm_to_node(int pxm
, int node
)
63 if (pxm_to_node_map
[pxm
] == NUMA_NO_NODE
|| node
< pxm_to_node_map
[pxm
])
64 pxm_to_node_map
[pxm
] = node
;
65 if (node_to_pxm_map
[node
] == PXM_INVAL
|| pxm
< node_to_pxm_map
[node
])
66 node_to_pxm_map
[node
] = pxm
;
69 int acpi_map_pxm_to_node(int pxm
)
73 if (pxm
< 0 || pxm
>= MAX_PXM_DOMAINS
|| numa_off
)
76 node
= pxm_to_node_map
[pxm
];
78 if (node
== NUMA_NO_NODE
) {
79 if (nodes_weight(nodes_found_map
) >= MAX_NUMNODES
)
81 node
= first_unset_node(nodes_found_map
);
82 __acpi_map_pxm_to_node(pxm
, node
);
83 node_set(node
, nodes_found_map
);
90 * acpi_map_pxm_to_online_node - Map proximity ID to online node
91 * @pxm: ACPI proximity ID
93 * This is similar to acpi_map_pxm_to_node(), but always returns an online
94 * node. When the mapped node from a given proximity ID is offline, it
95 * looks up the node distance table and returns the nearest online node.
97 * ACPI device drivers, which are called after the NUMA initialization has
98 * completed in the kernel, can call this interface to obtain their device
99 * NUMA topology from ACPI tables. Such drivers do not have to deal with
100 * offline nodes. A node may be offline when a device proximity ID is
101 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
104 int acpi_map_pxm_to_online_node(int pxm
)
106 int node
, n
, dist
, min_dist
;
108 node
= acpi_map_pxm_to_node(pxm
);
110 if (node
== NUMA_NO_NODE
)
113 if (!node_online(node
)) {
115 for_each_online_node(n
) {
116 dist
= node_distance(node
, n
);
117 if (dist
< min_dist
) {
126 EXPORT_SYMBOL(acpi_map_pxm_to_online_node
);
129 acpi_table_print_srat_entry(struct acpi_subtable_header
*header
)
131 switch (header
->type
) {
132 case ACPI_SRAT_TYPE_CPU_AFFINITY
:
134 struct acpi_srat_cpu_affinity
*p
=
135 (struct acpi_srat_cpu_affinity
*)header
;
136 pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
137 p
->apic_id
, p
->local_sapic_eid
,
138 p
->proximity_domain_lo
,
139 (p
->flags
& ACPI_SRAT_CPU_ENABLED
) ?
140 "enabled" : "disabled");
144 case ACPI_SRAT_TYPE_MEMORY_AFFINITY
:
146 struct acpi_srat_mem_affinity
*p
=
147 (struct acpi_srat_mem_affinity
*)header
;
148 pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
149 (unsigned long)p
->base_address
,
150 (unsigned long)p
->length
,
152 (p
->flags
& ACPI_SRAT_MEM_ENABLED
) ?
153 "enabled" : "disabled",
154 (p
->flags
& ACPI_SRAT_MEM_HOT_PLUGGABLE
) ?
155 " hot-pluggable" : "",
156 (p
->flags
& ACPI_SRAT_MEM_NON_VOLATILE
) ?
157 " non-volatile" : "");
161 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY
:
163 struct acpi_srat_x2apic_cpu_affinity
*p
=
164 (struct acpi_srat_x2apic_cpu_affinity
*)header
;
165 pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
168 (p
->flags
& ACPI_SRAT_CPU_ENABLED
) ?
169 "enabled" : "disabled");
173 case ACPI_SRAT_TYPE_GICC_AFFINITY
:
175 struct acpi_srat_gicc_affinity
*p
=
176 (struct acpi_srat_gicc_affinity
*)header
;
177 pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
178 p
->acpi_processor_uid
,
180 (p
->flags
& ACPI_SRAT_GICC_ENABLED
) ?
181 "enabled" : "disabled");
186 pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
193 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
194 * up the NUMA heuristics which wants the local node to have a smaller
195 * distance than the others.
196 * Do some quick checks here and only use the SLIT if it passes.
198 static int __init
slit_valid(struct acpi_table_slit
*slit
)
201 int d
= slit
->locality_count
;
202 for (i
= 0; i
< d
; i
++) {
203 for (j
= 0; j
< d
; j
++) {
204 u8 val
= slit
->entry
[d
*i
+ j
];
206 if (val
!= LOCAL_DISTANCE
)
208 } else if (val
<= LOCAL_DISTANCE
)
215 void __init
bad_srat(void)
217 pr_err("SRAT: SRAT not used.\n");
221 int __init
srat_disabled(void)
223 return acpi_numa
< 0;
226 #if defined(CONFIG_X86) || defined(CONFIG_ARM64)
228 * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
229 * I/O localities since SRAT does not list them. I/O localities are
230 * not supported at this point.
232 void __init
acpi_numa_slit_init(struct acpi_table_slit
*slit
)
236 for (i
= 0; i
< slit
->locality_count
; i
++) {
237 const int from_node
= pxm_to_node(i
);
239 if (from_node
== NUMA_NO_NODE
)
242 for (j
= 0; j
< slit
->locality_count
; j
++) {
243 const int to_node
= pxm_to_node(j
);
245 if (to_node
== NUMA_NO_NODE
)
248 numa_set_distance(from_node
, to_node
,
249 slit
->entry
[slit
->locality_count
* i
+ j
]);
255 * Default callback for parsing of the Proximity Domain <-> Memory
259 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity
*ma
)
267 if (ma
->header
.length
< sizeof(struct acpi_srat_mem_affinity
)) {
268 pr_err("SRAT: Unexpected header length: %d\n",
270 goto out_err_bad_srat
;
272 if ((ma
->flags
& ACPI_SRAT_MEM_ENABLED
) == 0)
274 hotpluggable
= ma
->flags
& ACPI_SRAT_MEM_HOT_PLUGGABLE
;
275 if (hotpluggable
&& !IS_ENABLED(CONFIG_MEMORY_HOTPLUG
))
278 start
= ma
->base_address
;
279 end
= start
+ ma
->length
;
280 pxm
= ma
->proximity_domain
;
281 if (acpi_srat_revision
<= 1)
284 node
= acpi_map_pxm_to_node(pxm
);
285 if (node
== NUMA_NO_NODE
|| node
>= MAX_NUMNODES
) {
286 pr_err("SRAT: Too many proximity domains.\n");
287 goto out_err_bad_srat
;
290 if (numa_add_memblk(node
, start
, end
) < 0) {
291 pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
292 node
, (unsigned long long) start
,
293 (unsigned long long) end
- 1);
294 goto out_err_bad_srat
;
297 node_set(node
, numa_nodes_parsed
);
299 pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
301 (unsigned long long) start
, (unsigned long long) end
- 1,
302 hotpluggable
? " hotplug" : "",
303 ma
->flags
& ACPI_SRAT_MEM_NON_VOLATILE
? " non-volatile" : "");
305 /* Mark hotplug range in memblock. */
306 if (hotpluggable
&& memblock_mark_hotplug(start
, ma
->length
))
307 pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
308 (unsigned long long)start
, (unsigned long long)end
- 1);
310 max_possible_pfn
= max(max_possible_pfn
, PFN_UP(end
- 1));
318 #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
320 static int __init
acpi_parse_slit(struct acpi_table_header
*table
)
322 struct acpi_table_slit
*slit
= (struct acpi_table_slit
*)table
;
324 if (!slit_valid(slit
)) {
325 pr_info("SLIT table looks invalid. Not used.\n");
328 acpi_numa_slit_init(slit
);
334 acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity
*pa
)
336 pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa
->apic_id
);
340 acpi_parse_x2apic_affinity(struct acpi_subtable_header
*header
,
341 const unsigned long end
)
343 struct acpi_srat_x2apic_cpu_affinity
*processor_affinity
;
345 processor_affinity
= (struct acpi_srat_x2apic_cpu_affinity
*)header
;
346 if (!processor_affinity
)
349 acpi_table_print_srat_entry(header
);
351 /* let architecture-dependent part to do it */
352 acpi_numa_x2apic_affinity_init(processor_affinity
);
358 acpi_parse_processor_affinity(struct acpi_subtable_header
*header
,
359 const unsigned long end
)
361 struct acpi_srat_cpu_affinity
*processor_affinity
;
363 processor_affinity
= (struct acpi_srat_cpu_affinity
*)header
;
364 if (!processor_affinity
)
367 acpi_table_print_srat_entry(header
);
369 /* let architecture-dependent part to do it */
370 acpi_numa_processor_affinity_init(processor_affinity
);
376 acpi_parse_gicc_affinity(struct acpi_subtable_header
*header
,
377 const unsigned long end
)
379 struct acpi_srat_gicc_affinity
*processor_affinity
;
381 processor_affinity
= (struct acpi_srat_gicc_affinity
*)header
;
382 if (!processor_affinity
)
385 acpi_table_print_srat_entry(header
);
387 /* let architecture-dependent part to do it */
388 acpi_numa_gicc_affinity_init(processor_affinity
);
393 static int __initdata parsed_numa_memblks
;
396 acpi_parse_memory_affinity(struct acpi_subtable_header
* header
,
397 const unsigned long end
)
399 struct acpi_srat_mem_affinity
*memory_affinity
;
401 memory_affinity
= (struct acpi_srat_mem_affinity
*)header
;
402 if (!memory_affinity
)
405 acpi_table_print_srat_entry(header
);
407 /* let architecture-dependent part to do it */
408 if (!acpi_numa_memory_affinity_init(memory_affinity
))
409 parsed_numa_memblks
++;
413 static int __init
acpi_parse_srat(struct acpi_table_header
*table
)
415 struct acpi_table_srat
*srat
= (struct acpi_table_srat
*)table
;
417 acpi_srat_revision
= srat
->header
.revision
;
419 /* Real work done in acpi_table_parse_srat below. */
425 acpi_table_parse_srat(enum acpi_srat_type id
,
426 acpi_tbl_entry_handler handler
, unsigned int max_entries
)
428 return acpi_table_parse_entries(ACPI_SIG_SRAT
,
429 sizeof(struct acpi_table_srat
), id
,
430 handler
, max_entries
);
433 int __init
acpi_numa_init(void)
441 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
442 * SRAT cpu entries could have different order with that in MADT.
443 * So go over all cpu entries in SRAT to get apicid to node mapping.
446 /* SRAT: System Resource Affinity Table */
447 if (!acpi_table_parse(ACPI_SIG_SRAT
, acpi_parse_srat
)) {
448 struct acpi_subtable_proc srat_proc
[3];
450 memset(srat_proc
, 0, sizeof(srat_proc
));
451 srat_proc
[0].id
= ACPI_SRAT_TYPE_CPU_AFFINITY
;
452 srat_proc
[0].handler
= acpi_parse_processor_affinity
;
453 srat_proc
[1].id
= ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY
;
454 srat_proc
[1].handler
= acpi_parse_x2apic_affinity
;
455 srat_proc
[2].id
= ACPI_SRAT_TYPE_GICC_AFFINITY
;
456 srat_proc
[2].handler
= acpi_parse_gicc_affinity
;
458 acpi_table_parse_entries_array(ACPI_SIG_SRAT
,
459 sizeof(struct acpi_table_srat
),
460 srat_proc
, ARRAY_SIZE(srat_proc
), 0);
462 cnt
= acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY
,
463 acpi_parse_memory_affinity
, 0);
466 /* SLIT: System Locality Information Table */
467 acpi_table_parse(ACPI_SIG_SLIT
, acpi_parse_slit
);
471 else if (!parsed_numa_memblks
)
476 static int acpi_get_pxm(acpi_handle h
)
478 unsigned long long pxm
;
481 acpi_handle phandle
= h
;
485 status
= acpi_evaluate_integer(handle
, "_PXM", NULL
, &pxm
);
486 if (ACPI_SUCCESS(status
))
488 status
= acpi_get_parent(handle
, &phandle
);
489 } while (ACPI_SUCCESS(status
));
493 int acpi_get_node(acpi_handle handle
)
497 pxm
= acpi_get_pxm(handle
);
499 return acpi_map_pxm_to_node(pxm
);
501 EXPORT_SYMBOL(acpi_get_node
);