2 * acpi_numa.c - ACPI NUMA support
4 * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
22 #define pr_fmt(fmt) "ACPI: " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/kernel.h>
27 #include <linux/types.h>
28 #include <linux/errno.h>
29 #include <linux/acpi.h>
30 #include <linux/memblock.h>
31 #include <linux/numa.h>
32 #include <linux/nodemask.h>
33 #include <linux/topology.h>
35 static nodemask_t nodes_found_map
= NODE_MASK_NONE
;
37 /* maps to convert between proximity domain and logical node ID */
38 static int pxm_to_node_map
[MAX_PXM_DOMAINS
]
39 = { [0 ... MAX_PXM_DOMAINS
- 1] = NUMA_NO_NODE
};
40 static int node_to_pxm_map
[MAX_NUMNODES
]
41 = { [0 ... MAX_NUMNODES
- 1] = PXM_INVAL
};
43 unsigned char acpi_srat_revision __initdata
;
44 int acpi_numa __initdata
;
46 int pxm_to_node(int pxm
)
50 return pxm_to_node_map
[pxm
];
53 int node_to_pxm(int node
)
57 return node_to_pxm_map
[node
];
60 static void __acpi_map_pxm_to_node(int pxm
, int node
)
62 if (pxm_to_node_map
[pxm
] == NUMA_NO_NODE
|| node
< pxm_to_node_map
[pxm
])
63 pxm_to_node_map
[pxm
] = node
;
64 if (node_to_pxm_map
[node
] == PXM_INVAL
|| pxm
< node_to_pxm_map
[node
])
65 node_to_pxm_map
[node
] = pxm
;
68 int acpi_map_pxm_to_node(int pxm
)
72 if (pxm
< 0 || pxm
>= MAX_PXM_DOMAINS
|| numa_off
)
75 node
= pxm_to_node_map
[pxm
];
77 if (node
== NUMA_NO_NODE
) {
78 if (nodes_weight(nodes_found_map
) >= MAX_NUMNODES
)
80 node
= first_unset_node(nodes_found_map
);
81 __acpi_map_pxm_to_node(pxm
, node
);
82 node_set(node
, nodes_found_map
);
89 * acpi_map_pxm_to_online_node - Map proximity ID to online node
90 * @pxm: ACPI proximity ID
92 * This is similar to acpi_map_pxm_to_node(), but always returns an online
93 * node. When the mapped node from a given proximity ID is offline, it
94 * looks up the node distance table and returns the nearest online node.
96 * ACPI device drivers, which are called after the NUMA initialization has
97 * completed in the kernel, can call this interface to obtain their device
98 * NUMA topology from ACPI tables. Such drivers do not have to deal with
99 * offline nodes. A node may be offline when a device proximity ID is
100 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
103 int acpi_map_pxm_to_online_node(int pxm
)
107 node
= acpi_map_pxm_to_node(pxm
);
109 if (node
== NUMA_NO_NODE
)
113 if (!node_online(node
)) {
114 int min_dist
= INT_MAX
, dist
, n
;
116 for_each_online_node(n
) {
117 dist
= node_distance(node
, n
);
118 if (dist
< min_dist
) {
127 EXPORT_SYMBOL(acpi_map_pxm_to_online_node
);
130 acpi_table_print_srat_entry(struct acpi_subtable_header
*header
)
132 switch (header
->type
) {
133 case ACPI_SRAT_TYPE_CPU_AFFINITY
:
135 struct acpi_srat_cpu_affinity
*p
=
136 (struct acpi_srat_cpu_affinity
*)header
;
137 pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
138 p
->apic_id
, p
->local_sapic_eid
,
139 p
->proximity_domain_lo
,
140 (p
->flags
& ACPI_SRAT_CPU_ENABLED
) ?
141 "enabled" : "disabled");
145 case ACPI_SRAT_TYPE_MEMORY_AFFINITY
:
147 struct acpi_srat_mem_affinity
*p
=
148 (struct acpi_srat_mem_affinity
*)header
;
149 pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
150 (unsigned long)p
->base_address
,
151 (unsigned long)p
->length
,
153 (p
->flags
& ACPI_SRAT_MEM_ENABLED
) ?
154 "enabled" : "disabled",
155 (p
->flags
& ACPI_SRAT_MEM_HOT_PLUGGABLE
) ?
156 " hot-pluggable" : "",
157 (p
->flags
& ACPI_SRAT_MEM_NON_VOLATILE
) ?
158 " non-volatile" : "");
162 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY
:
164 struct acpi_srat_x2apic_cpu_affinity
*p
=
165 (struct acpi_srat_x2apic_cpu_affinity
*)header
;
166 pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
169 (p
->flags
& ACPI_SRAT_CPU_ENABLED
) ?
170 "enabled" : "disabled");
174 case ACPI_SRAT_TYPE_GICC_AFFINITY
:
176 struct acpi_srat_gicc_affinity
*p
=
177 (struct acpi_srat_gicc_affinity
*)header
;
178 pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
179 p
->acpi_processor_uid
,
181 (p
->flags
& ACPI_SRAT_GICC_ENABLED
) ?
182 "enabled" : "disabled");
187 pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
194 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
195 * up the NUMA heuristics which wants the local node to have a smaller
196 * distance than the others.
197 * Do some quick checks here and only use the SLIT if it passes.
199 static int __init
slit_valid(struct acpi_table_slit
*slit
)
202 int d
= slit
->locality_count
;
203 for (i
= 0; i
< d
; i
++) {
204 for (j
= 0; j
< d
; j
++) {
205 u8 val
= slit
->entry
[d
*i
+ j
];
207 if (val
!= LOCAL_DISTANCE
)
209 } else if (val
<= LOCAL_DISTANCE
)
216 void __init
bad_srat(void)
218 pr_err("SRAT: SRAT not used.\n");
222 int __init
srat_disabled(void)
224 return acpi_numa
< 0;
227 #if defined(CONFIG_X86) || defined(CONFIG_ARM64)
229 * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
230 * I/O localities since SRAT does not list them. I/O localities are
231 * not supported at this point.
233 void __init
acpi_numa_slit_init(struct acpi_table_slit
*slit
)
237 for (i
= 0; i
< slit
->locality_count
; i
++) {
238 const int from_node
= pxm_to_node(i
);
240 if (from_node
== NUMA_NO_NODE
)
243 for (j
= 0; j
< slit
->locality_count
; j
++) {
244 const int to_node
= pxm_to_node(j
);
246 if (to_node
== NUMA_NO_NODE
)
249 numa_set_distance(from_node
, to_node
,
250 slit
->entry
[slit
->locality_count
* i
+ j
]);
256 * Default callback for parsing of the Proximity Domain <-> Memory
260 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity
*ma
)
268 if (ma
->header
.length
< sizeof(struct acpi_srat_mem_affinity
)) {
269 pr_err("SRAT: Unexpected header length: %d\n",
271 goto out_err_bad_srat
;
273 if ((ma
->flags
& ACPI_SRAT_MEM_ENABLED
) == 0)
275 hotpluggable
= ma
->flags
& ACPI_SRAT_MEM_HOT_PLUGGABLE
;
276 if (hotpluggable
&& !IS_ENABLED(CONFIG_MEMORY_HOTPLUG
))
279 start
= ma
->base_address
;
280 end
= start
+ ma
->length
;
281 pxm
= ma
->proximity_domain
;
282 if (acpi_srat_revision
<= 1)
285 node
= acpi_map_pxm_to_node(pxm
);
286 if (node
== NUMA_NO_NODE
|| node
>= MAX_NUMNODES
) {
287 pr_err("SRAT: Too many proximity domains.\n");
288 goto out_err_bad_srat
;
291 if (numa_add_memblk(node
, start
, end
) < 0) {
292 pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
293 node
, (unsigned long long) start
,
294 (unsigned long long) end
- 1);
295 goto out_err_bad_srat
;
298 node_set(node
, numa_nodes_parsed
);
300 pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
302 (unsigned long long) start
, (unsigned long long) end
- 1,
303 hotpluggable
? " hotplug" : "",
304 ma
->flags
& ACPI_SRAT_MEM_NON_VOLATILE
? " non-volatile" : "");
306 /* Mark hotplug range in memblock. */
307 if (hotpluggable
&& memblock_mark_hotplug(start
, ma
->length
))
308 pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
309 (unsigned long long)start
, (unsigned long long)end
- 1);
311 max_possible_pfn
= max(max_possible_pfn
, PFN_UP(end
- 1));
319 #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
321 static int __init
acpi_parse_slit(struct acpi_table_header
*table
)
323 struct acpi_table_slit
*slit
= (struct acpi_table_slit
*)table
;
325 if (!slit_valid(slit
)) {
326 pr_info("SLIT table looks invalid. Not used.\n");
329 acpi_numa_slit_init(slit
);
335 acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity
*pa
)
337 pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa
->apic_id
);
341 acpi_parse_x2apic_affinity(struct acpi_subtable_header
*header
,
342 const unsigned long end
)
344 struct acpi_srat_x2apic_cpu_affinity
*processor_affinity
;
346 processor_affinity
= (struct acpi_srat_x2apic_cpu_affinity
*)header
;
347 if (!processor_affinity
)
350 acpi_table_print_srat_entry(header
);
352 /* let architecture-dependent part to do it */
353 acpi_numa_x2apic_affinity_init(processor_affinity
);
359 acpi_parse_processor_affinity(struct acpi_subtable_header
*header
,
360 const unsigned long end
)
362 struct acpi_srat_cpu_affinity
*processor_affinity
;
364 processor_affinity
= (struct acpi_srat_cpu_affinity
*)header
;
365 if (!processor_affinity
)
368 acpi_table_print_srat_entry(header
);
370 /* let architecture-dependent part to do it */
371 acpi_numa_processor_affinity_init(processor_affinity
);
377 acpi_parse_gicc_affinity(struct acpi_subtable_header
*header
,
378 const unsigned long end
)
380 struct acpi_srat_gicc_affinity
*processor_affinity
;
382 processor_affinity
= (struct acpi_srat_gicc_affinity
*)header
;
383 if (!processor_affinity
)
386 acpi_table_print_srat_entry(header
);
388 /* let architecture-dependent part to do it */
389 acpi_numa_gicc_affinity_init(processor_affinity
);
394 static int __initdata parsed_numa_memblks
;
397 acpi_parse_memory_affinity(struct acpi_subtable_header
* header
,
398 const unsigned long end
)
400 struct acpi_srat_mem_affinity
*memory_affinity
;
402 memory_affinity
= (struct acpi_srat_mem_affinity
*)header
;
403 if (!memory_affinity
)
406 acpi_table_print_srat_entry(header
);
408 /* let architecture-dependent part to do it */
409 if (!acpi_numa_memory_affinity_init(memory_affinity
))
410 parsed_numa_memblks
++;
414 static int __init
acpi_parse_srat(struct acpi_table_header
*table
)
416 struct acpi_table_srat
*srat
= (struct acpi_table_srat
*)table
;
418 acpi_srat_revision
= srat
->header
.revision
;
420 /* Real work done in acpi_table_parse_srat below. */
426 acpi_table_parse_srat(enum acpi_srat_type id
,
427 acpi_tbl_entry_handler handler
, unsigned int max_entries
)
429 return acpi_table_parse_entries(ACPI_SIG_SRAT
,
430 sizeof(struct acpi_table_srat
), id
,
431 handler
, max_entries
);
434 int __init
acpi_numa_init(void)
442 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
443 * SRAT cpu entries could have different order with that in MADT.
444 * So go over all cpu entries in SRAT to get apicid to node mapping.
447 /* SRAT: System Resource Affinity Table */
448 if (!acpi_table_parse(ACPI_SIG_SRAT
, acpi_parse_srat
)) {
449 struct acpi_subtable_proc srat_proc
[3];
451 memset(srat_proc
, 0, sizeof(srat_proc
));
452 srat_proc
[0].id
= ACPI_SRAT_TYPE_CPU_AFFINITY
;
453 srat_proc
[0].handler
= acpi_parse_processor_affinity
;
454 srat_proc
[1].id
= ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY
;
455 srat_proc
[1].handler
= acpi_parse_x2apic_affinity
;
456 srat_proc
[2].id
= ACPI_SRAT_TYPE_GICC_AFFINITY
;
457 srat_proc
[2].handler
= acpi_parse_gicc_affinity
;
459 acpi_table_parse_entries_array(ACPI_SIG_SRAT
,
460 sizeof(struct acpi_table_srat
),
461 srat_proc
, ARRAY_SIZE(srat_proc
), 0);
463 cnt
= acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY
,
464 acpi_parse_memory_affinity
, 0);
467 /* SLIT: System Locality Information Table */
468 acpi_table_parse(ACPI_SIG_SLIT
, acpi_parse_slit
);
472 else if (!parsed_numa_memblks
)
477 static int acpi_get_pxm(acpi_handle h
)
479 unsigned long long pxm
;
482 acpi_handle phandle
= h
;
486 status
= acpi_evaluate_integer(handle
, "_PXM", NULL
, &pxm
);
487 if (ACPI_SUCCESS(status
))
489 status
= acpi_get_parent(handle
, &phandle
);
490 } while (ACPI_SUCCESS(status
));
494 int acpi_get_node(acpi_handle handle
)
498 pxm
= acpi_get_pxm(handle
);
500 return acpi_map_pxm_to_node(pxm
);
502 EXPORT_SYMBOL(acpi_get_node
);