4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
40 #include <dev/acpi/acpica.h>
41 #include <dev/acpi/acpivar.h>
42 #include <dev/acpi/acpi_srat.h>
44 static ACPI_TABLE_SRAT
*srat
;
46 struct acpisrat_node
{
47 acpisrat_nodeid_t nodeid
;
48 uint32_t ncpus
; /* Number of cpus in this node */
49 struct acpisrat_cpu
**cpu
; /* Array of cpus */
50 uint32_t nmems
; /* Number of memory ranges in this node */
51 struct acpisrat_mem
**mem
; /* Array of memory ranges */
54 static uint32_t nnodes
; /* Number of NUMA nodes */
55 static struct acpisrat_node
*node_array
; /* Array of NUMA nodes */
56 static uint32_t ncpus
; /* Number of CPUs */
57 static struct acpisrat_cpu
*cpu_array
; /* Array of cpus */
58 static uint32_t nmems
; /* Number of Memory ranges */
59 static struct acpisrat_mem
*mem_array
;
63 struct acpisrat_cpu cpu
;
64 TAILQ_ENTRY(cpulist
) entry
;
67 static TAILQ_HEAD(, cpulist
) cpulisthead
;
69 #define CPU_INIT TAILQ_INIT(&cpulisthead);
70 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry)
71 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
72 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry)
73 #define CPU_FIRST TAILQ_FIRST(&cpulisthead)
77 struct acpisrat_mem mem
;
78 TAILQ_ENTRY(memlist
) entry
;
81 static TAILQ_HEAD(, memlist
) memlisthead
;
83 #define MEM_INIT TAILQ_INIT(&memlisthead)
84 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry)
85 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
86 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry)
87 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry)
88 #define MEM_FIRST TAILQ_FIRST(&memlisthead)
91 static struct cpulist
*
94 return kmem_zalloc(sizeof(struct cpulist
), KM_NOSLEEP
);
98 cpu_free(struct cpulist
*c
)
100 kmem_free(c
, sizeof(struct cpulist
));
104 static struct cpulist
*
105 cpu_get(acpisrat_nodeid_t nodeid
)
110 if (tmp
->cpu
.nodeid
== nodeid
)
118 static struct memlist
*
121 return kmem_zalloc(sizeof(struct memlist
), KM_NOSLEEP
);
125 mem_free(struct memlist
*m
)
127 kmem_free(m
, sizeof(struct memlist
));
130 static struct memlist
*
131 mem_get(acpisrat_nodeid_t nodeid
)
136 if (tmp
->mem
.nodeid
== nodeid
)
147 ACPI_TABLE_HEADER
*table
;
150 rv
= AcpiGetTable(ACPI_SIG_SRAT
, 1, (ACPI_TABLE_HEADER
**)&table
);
151 if (ACPI_FAILURE(rv
))
154 /* Check if header is valid */
158 if (table
->Length
== 0xffffffff)
161 srat
= (ACPI_TABLE_SRAT
*)table
;
169 ACPI_SUBTABLE_HEADER
*subtable
;
170 ACPI_SRAT_CPU_AFFINITY
*srat_cpu
;
171 ACPI_SRAT_MEM_AFFINITY
*srat_mem
;
172 ACPI_SRAT_X2APIC_CPU_AFFINITY
*srat_x2apic
;
174 acpisrat_nodeid_t nodeid
;
175 struct cpulist
*cpuentry
= NULL
;
176 struct memlist
*mementry
;
178 bool ignore_cpu_affinity
= false;
180 KASSERT(srat
!= NULL
);
182 /* Content starts right after the header */
183 srat_pos
= sizeof(ACPI_TABLE_SRAT
);
185 while (srat_pos
< srat
->Header
.Length
) {
186 subtable
= (ACPI_SUBTABLE_HEADER
*)((char *)srat
+ srat_pos
);
187 srat_pos
+= subtable
->Length
;
189 switch (subtable
->Type
) {
190 case ACPI_SRAT_TYPE_CPU_AFFINITY
:
191 if (ignore_cpu_affinity
)
194 srat_cpu
= (ACPI_SRAT_CPU_AFFINITY
*)subtable
;
195 nodeid
= (srat_cpu
->ProximityDomainHi
[2] << 24) |
196 (srat_cpu
->ProximityDomainHi
[1] << 16) |
197 (srat_cpu
->ProximityDomainHi
[0] << 8) |
198 (srat_cpu
->ProximityDomainLo
);
200 cpuentry
= cpu_alloc();
201 if (cpuentry
== NULL
)
205 cpuentry
->cpu
.nodeid
= nodeid
;
206 cpuentry
->cpu
.apicid
= srat_cpu
->ApicId
;
207 cpuentry
->cpu
.sapiceid
= srat_cpu
->LocalSapicEid
;
208 cpuentry
->cpu
.flags
= srat_cpu
->Flags
;
209 cpuentry
->cpu
.clockdomain
= srat_cpu
->ClockDomain
;
212 case ACPI_SRAT_TYPE_MEMORY_AFFINITY
:
213 srat_mem
= (ACPI_SRAT_MEM_AFFINITY
*)subtable
;
214 nodeid
= srat_mem
->ProximityDomain
;
216 mementry
= mem_alloc();
217 if (mementry
== NULL
)
221 mementry
->mem
.nodeid
= nodeid
;
222 mementry
->mem
.baseaddress
= srat_mem
->BaseAddress
;
223 mementry
->mem
.length
= srat_mem
->Length
;
224 mementry
->mem
.flags
= srat_mem
->Flags
;
227 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY
:
228 srat_x2apic
= (ACPI_SRAT_X2APIC_CPU_AFFINITY
*)subtable
;
229 nodeid
= srat_x2apic
->ProximityDomain
;
231 /* This table entry overrides
232 * ACPI_SRAT_TYPE_CPU_AFFINITY.
234 if (!ignore_cpu_affinity
) {
235 struct cpulist
*citer
;
236 while ((citer
= CPU_FIRST
) != NULL
) {
240 ignore_cpu_affinity
= true;
243 cpuentry
= cpu_alloc();
244 if (cpuentry
== NULL
)
248 cpuentry
->cpu
.nodeid
= nodeid
;
249 cpuentry
->cpu
.apicid
= srat_x2apic
->ApicId
;
250 cpuentry
->cpu
.clockdomain
= srat_x2apic
->ClockDomain
;
251 cpuentry
->cpu
.flags
= srat_x2apic
->Flags
;
254 case ACPI_SRAT_TYPE_RESERVED
:
255 printf("ACPI SRAT subtable reserved, length: 0x%x\n",
265 acpisrat_quirks(void)
267 struct cpulist
*citer
;
268 struct memlist
*mem
, *miter
;
270 /* Some sanity checks. */
272 /* Deal with holes in the memory nodes.
273 * BIOS doesn't enlist memory nodes which
274 * don't have any memory modules plugged in.
275 * This behaviour has been observed on AMD machines.
277 * Do that by searching for CPUs in NUMA nodes
278 * which don't exist in the memory and then insert
279 * a zero memory range for the missing node.
282 mem
= mem_get(citer
->cpu
.nodeid
);
288 mem
->mem
.nodeid
= citer
->cpu
.nodeid
;
289 /* all other fields are already zero filled */
292 if (miter
->mem
.nodeid
< citer
->cpu
.nodeid
)
294 MEM_ADD_BEFORE(mem
, miter
);
305 if (!acpisrat_exist())
307 return acpisrat_refresh();
311 acpisrat_refresh(void)
314 struct cpulist
*citer
;
315 struct memlist
*miter
;
316 uint32_t cnodes
= 0, mnodes
= 0;
321 rc
= acpisrat_parse();
325 rc
= acpisrat_quirks();
329 /* cleanup resources */
330 rc
= acpisrat_exit();
337 cnodes
= MAX(citer
->cpu
.nodeid
, cnodes
);
343 mnodes
= MAX(miter
->mem
.nodeid
, mnodes
);
347 nnodes
= MAX(cnodes
, mnodes
) + 1;
349 node_array
= kmem_zalloc(nnodes
* sizeof(struct acpisrat_node
),
351 if (node_array
== NULL
)
354 cpu_array
= kmem_zalloc(ncpus
* sizeof(struct acpisrat_cpu
),
356 if (cpu_array
== NULL
)
359 mem_array
= kmem_zalloc(nmems
* sizeof(struct acpisrat_mem
),
361 if (mem_array
== NULL
)
366 memcpy(&cpu_array
[i
], &citer
->cpu
, sizeof(struct acpisrat_cpu
));
368 node_array
[citer
->cpu
.nodeid
].ncpus
++;
373 memcpy(&mem_array
[i
], &miter
->mem
, sizeof(struct acpisrat_mem
));
375 node_array
[miter
->mem
.nodeid
].nmems
++;
378 for (i
= 0; i
< nnodes
; i
++) {
379 node_array
[i
].nodeid
= i
;
381 node_array
[i
].cpu
= kmem_zalloc(node_array
[i
].ncpus
*
382 sizeof(struct acpisrat_cpu
*), KM_NOSLEEP
);
383 node_array
[i
].mem
= kmem_zalloc(node_array
[i
].nmems
*
384 sizeof(struct acpisrat_mem
*), KM_NOSLEEP
);
387 for (j
= 0; j
< ncpus
; j
++) {
388 if (cpu_array
[j
].nodeid
!= i
)
390 node_array
[i
].cpu
[k
] = &cpu_array
[j
];
395 for (j
= 0; j
< nmems
; j
++) {
396 if (mem_array
[j
].nodeid
!= i
)
398 node_array
[i
].mem
[k
] = &mem_array
[j
];
403 while ((citer
= CPU_FIRST
) != NULL
) {
408 while ((miter
= MEM_FIRST
) != NULL
) {
423 for (i
= 0; i
< nnodes
; i
++) {
424 if (node_array
[i
].cpu
)
425 kmem_free(node_array
[i
].cpu
,
426 node_array
[i
].ncpus
* sizeof(struct acpisrat_cpu
*));
427 if (node_array
[i
].mem
)
428 kmem_free(node_array
[i
].mem
,
429 node_array
[i
].nmems
* sizeof(struct acpisrat_mem
*));
431 kmem_free(node_array
, nnodes
* sizeof(struct acpisrat_node
));
436 kmem_free(cpu_array
, ncpus
* sizeof(struct acpisrat_cpu
));
440 kmem_free(mem_array
, nmems
* sizeof(struct acpisrat_mem
));
454 uint32_t i
, j
, nn
, nc
, nm
;
455 struct acpisrat_cpu c
;
456 struct acpisrat_mem m
;
458 nn
= acpisrat_nodes();
459 aprint_debug("SRAT: %u NUMA nodes\n", nn
);
460 for (i
= 0; i
< nn
; i
++) {
461 nc
= acpisrat_node_cpus(i
);
462 for (j
= 0; j
< nc
; j
++) {
463 acpisrat_cpu(i
, j
, &c
);
464 aprint_debug("SRAT: node %u cpu %u "
465 "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
466 c
.nodeid
, j
, c
.apicid
, c
.sapiceid
, c
.flags
,
470 nm
= acpisrat_node_memoryranges(i
);
471 for (j
= 0; j
< nm
; j
++) {
472 acpisrat_mem(i
, j
, &m
);
473 aprint_debug("SRAT: node %u memory range %u (0x%"
474 PRIx64
" - 0x%"PRIx64
" flags %u)\n",
475 m
.nodeid
, j
, m
.baseaddress
,
476 m
.baseaddress
+ m
.length
, m
.flags
);
488 acpisrat_node_cpus(acpisrat_nodeid_t nodeid
)
490 return node_array
[nodeid
].ncpus
;
494 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid
)
496 return node_array
[nodeid
].nmems
;
500 acpisrat_cpu(acpisrat_nodeid_t nodeid
, uint32_t cpunum
,
501 struct acpisrat_cpu
*c
)
503 memcpy(c
, node_array
[nodeid
].cpu
[cpunum
],
504 sizeof(struct acpisrat_cpu
));
508 acpisrat_mem(acpisrat_nodeid_t nodeid
, uint32_t memrange
,
509 struct acpisrat_mem
*mem
)
511 memcpy(mem
, node_array
[nodeid
].mem
[memrange
],
512 sizeof(struct acpisrat_mem
));