1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2023 Intel Corporation. All rights reserved. */
3 #include <linux/acpi.h>
4 #include <linux/xarray.h>
5 #include <linux/fw_table.h>
6 #include <linux/node.h>
7 #include <linux/overflow.h>
14 struct range dpa_range
;
16 struct access_coordinate coord
[ACCESS_COORDINATE_MAX
];
17 struct access_coordinate cdat_coord
[ACCESS_COORDINATE_MAX
];
22 static u32
cdat_normalize(u16 entry
, u64 base
, u8 type
)
27 * Check for invalid and overflow values
29 if (entry
== 0xffff || !entry
)
31 else if (base
> (UINT_MAX
/ (entry
)))
35 * CDAT fields follow the format of HMAT fields. See table 5 Device
36 * Scoped Latency and Bandwidth Information Structure in Coherent Device
37 * Attribute Table (CDAT) Specification v1.01.
41 case ACPI_HMAT_ACCESS_LATENCY
:
42 case ACPI_HMAT_READ_LATENCY
:
43 case ACPI_HMAT_WRITE_LATENCY
:
44 value
= DIV_ROUND_UP(value
, 1000);
52 static int cdat_dsmas_handler(union acpi_subtable_headers
*header
, void *arg
,
53 const unsigned long end
)
55 struct acpi_cdat_header
*hdr
= &header
->cdat
;
56 struct acpi_cdat_dsmas
*dsmas
;
57 int size
= sizeof(*hdr
) + sizeof(*dsmas
);
58 struct xarray
*dsmas_xa
= arg
;
59 struct dsmas_entry
*dent
;
63 len
= le16_to_cpu((__force __le16
)hdr
->length
);
64 if (len
!= size
|| (unsigned long)hdr
+ len
> end
) {
65 pr_warn("Malformed DSMAS table length: (%u:%u)\n", size
, len
);
69 /* Skip common header */
70 dsmas
= (struct acpi_cdat_dsmas
*)(hdr
+ 1);
72 dent
= kzalloc(sizeof(*dent
), GFP_KERNEL
);
76 dent
->handle
= dsmas
->dsmad_handle
;
77 dent
->dpa_range
.start
= le64_to_cpu((__force __le64
)dsmas
->dpa_base_address
);
78 dent
->dpa_range
.end
= le64_to_cpu((__force __le64
)dsmas
->dpa_base_address
) +
79 le64_to_cpu((__force __le64
)dsmas
->dpa_length
) - 1;
81 rc
= xa_insert(dsmas_xa
, dent
->handle
, dent
, GFP_KERNEL
);
90 static void __cxl_access_coordinate_set(struct access_coordinate
*coord
,
91 int access
, unsigned int val
)
94 case ACPI_HMAT_ACCESS_LATENCY
:
95 coord
->read_latency
= val
;
96 coord
->write_latency
= val
;
98 case ACPI_HMAT_READ_LATENCY
:
99 coord
->read_latency
= val
;
101 case ACPI_HMAT_WRITE_LATENCY
:
102 coord
->write_latency
= val
;
104 case ACPI_HMAT_ACCESS_BANDWIDTH
:
105 coord
->read_bandwidth
= val
;
106 coord
->write_bandwidth
= val
;
108 case ACPI_HMAT_READ_BANDWIDTH
:
109 coord
->read_bandwidth
= val
;
111 case ACPI_HMAT_WRITE_BANDWIDTH
:
112 coord
->write_bandwidth
= val
;
117 static void cxl_access_coordinate_set(struct access_coordinate
*coord
,
118 int access
, unsigned int val
)
120 for (int i
= 0; i
< ACCESS_COORDINATE_MAX
; i
++)
121 __cxl_access_coordinate_set(&coord
[i
], access
, val
);
124 static int cdat_dslbis_handler(union acpi_subtable_headers
*header
, void *arg
,
125 const unsigned long end
)
127 struct acpi_cdat_header
*hdr
= &header
->cdat
;
128 struct acpi_cdat_dslbis
*dslbis
;
129 int size
= sizeof(*hdr
) + sizeof(*dslbis
);
130 struct xarray
*dsmas_xa
= arg
;
131 struct dsmas_entry
*dent
;
137 len
= le16_to_cpu((__force __le16
)hdr
->length
);
138 if (len
!= size
|| (unsigned long)hdr
+ len
> end
) {
139 pr_warn("Malformed DSLBIS table length: (%u:%u)\n", size
, len
);
143 /* Skip common header */
144 dslbis
= (struct acpi_cdat_dslbis
*)(hdr
+ 1);
146 /* Skip unrecognized data type */
147 if (dslbis
->data_type
> ACPI_HMAT_WRITE_BANDWIDTH
)
150 /* Not a memory type, skip */
151 if ((dslbis
->flags
& ACPI_HMAT_MEMORY_HIERARCHY
) != ACPI_HMAT_MEMORY
)
154 dent
= xa_load(dsmas_xa
, dslbis
->handle
);
156 pr_warn("No matching DSMAS entry for DSLBIS entry.\n");
160 le_base
= (__force __le64
)dslbis
->entry_base_unit
;
161 le_val
= (__force __le16
)dslbis
->entry
[0];
162 val
= cdat_normalize(le16_to_cpu(le_val
), le64_to_cpu(le_base
),
165 cxl_access_coordinate_set(dent
->cdat_coord
, dslbis
->data_type
, val
);
170 static int cdat_table_parse_output(int rc
)
180 static int cxl_cdat_endpoint_process(struct cxl_port
*port
,
181 struct xarray
*dsmas_xa
)
185 rc
= cdat_table_parse(ACPI_CDAT_TYPE_DSMAS
, cdat_dsmas_handler
,
186 dsmas_xa
, port
->cdat
.table
, port
->cdat
.length
);
187 rc
= cdat_table_parse_output(rc
);
191 rc
= cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS
, cdat_dslbis_handler
,
192 dsmas_xa
, port
->cdat
.table
, port
->cdat
.length
);
193 return cdat_table_parse_output(rc
);
196 static int cxl_port_perf_data_calculate(struct cxl_port
*port
,
197 struct xarray
*dsmas_xa
)
199 struct access_coordinate ep_c
[ACCESS_COORDINATE_MAX
];
200 struct dsmas_entry
*dent
;
201 int valid_entries
= 0;
205 rc
= cxl_endpoint_get_perf_coordinates(port
, ep_c
);
207 dev_dbg(&port
->dev
, "Failed to retrieve ep perf coordinates.\n");
211 struct cxl_root
*cxl_root
__free(put_cxl_root
) = find_cxl_root(port
);
216 if (!cxl_root
->ops
|| !cxl_root
->ops
->qos_class
)
219 xa_for_each(dsmas_xa
, index
, dent
) {
222 cxl_coordinates_combine(dent
->coord
, dent
->cdat_coord
, ep_c
);
224 rc
= cxl_root
->ops
->qos_class(cxl_root
,
225 &dent
->coord
[ACCESS_COORDINATE_CPU
],
231 dent
->qos_class
= qos_class
;
240 static void update_perf_entry(struct device
*dev
, struct dsmas_entry
*dent
,
241 struct cxl_dpa_perf
*dpa_perf
)
243 for (int i
= 0; i
< ACCESS_COORDINATE_MAX
; i
++) {
244 dpa_perf
->coord
[i
] = dent
->coord
[i
];
245 dpa_perf
->cdat_coord
[i
] = dent
->cdat_coord
[i
];
247 dpa_perf
->dpa_range
= dent
->dpa_range
;
248 dpa_perf
->qos_class
= dent
->qos_class
;
250 "DSMAS: dpa: %pra qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
251 &dent
->dpa_range
, dpa_perf
->qos_class
,
252 dent
->coord
[ACCESS_COORDINATE_CPU
].read_bandwidth
,
253 dent
->coord
[ACCESS_COORDINATE_CPU
].write_bandwidth
,
254 dent
->coord
[ACCESS_COORDINATE_CPU
].read_latency
,
255 dent
->coord
[ACCESS_COORDINATE_CPU
].write_latency
);
258 static void cxl_memdev_set_qos_class(struct cxl_dev_state
*cxlds
,
259 struct xarray
*dsmas_xa
)
261 struct cxl_memdev_state
*mds
= to_cxl_memdev_state(cxlds
);
262 struct device
*dev
= cxlds
->dev
;
263 struct range pmem_range
= {
264 .start
= cxlds
->pmem_res
.start
,
265 .end
= cxlds
->pmem_res
.end
,
267 struct range ram_range
= {
268 .start
= cxlds
->ram_res
.start
,
269 .end
= cxlds
->ram_res
.end
,
271 struct dsmas_entry
*dent
;
274 xa_for_each(dsmas_xa
, index
, dent
) {
275 if (resource_size(&cxlds
->ram_res
) &&
276 range_contains(&ram_range
, &dent
->dpa_range
))
277 update_perf_entry(dev
, dent
, &mds
->ram_perf
);
278 else if (resource_size(&cxlds
->pmem_res
) &&
279 range_contains(&pmem_range
, &dent
->dpa_range
))
280 update_perf_entry(dev
, dent
, &mds
->pmem_perf
);
282 dev_dbg(dev
, "no partition for dsmas dpa: %pra\n",
287 static int match_cxlrd_qos_class(struct device
*dev
, void *data
)
289 int dev_qos_class
= *(int *)data
;
290 struct cxl_root_decoder
*cxlrd
;
292 if (!is_root_decoder(dev
))
295 cxlrd
= to_cxl_root_decoder(dev
);
296 if (cxlrd
->qos_class
== CXL_QOS_CLASS_INVALID
)
299 if (cxlrd
->qos_class
== dev_qos_class
)
305 static void reset_dpa_perf(struct cxl_dpa_perf
*dpa_perf
)
307 *dpa_perf
= (struct cxl_dpa_perf
) {
308 .qos_class
= CXL_QOS_CLASS_INVALID
,
312 static bool cxl_qos_match(struct cxl_port
*root_port
,
313 struct cxl_dpa_perf
*dpa_perf
)
315 if (dpa_perf
->qos_class
== CXL_QOS_CLASS_INVALID
)
318 if (!device_for_each_child(&root_port
->dev
, &dpa_perf
->qos_class
,
319 match_cxlrd_qos_class
))
325 static int match_cxlrd_hb(struct device
*dev
, void *data
)
327 struct device
*host_bridge
= data
;
328 struct cxl_switch_decoder
*cxlsd
;
329 struct cxl_root_decoder
*cxlrd
;
331 if (!is_root_decoder(dev
))
334 cxlrd
= to_cxl_root_decoder(dev
);
335 cxlsd
= &cxlrd
->cxlsd
;
337 guard(rwsem_read
)(&cxl_region_rwsem
);
338 for (int i
= 0; i
< cxlsd
->nr_targets
; i
++) {
339 if (host_bridge
== cxlsd
->target
[i
]->dport_dev
)
346 static int cxl_qos_class_verify(struct cxl_memdev
*cxlmd
)
348 struct cxl_dev_state
*cxlds
= cxlmd
->cxlds
;
349 struct cxl_memdev_state
*mds
= to_cxl_memdev_state(cxlds
);
350 struct cxl_port
*root_port
;
353 struct cxl_root
*cxl_root
__free(put_cxl_root
) =
354 find_cxl_root(cxlmd
->endpoint
);
359 root_port
= &cxl_root
->port
;
361 /* Check that the QTG IDs are all sane between end device and root decoders */
362 if (!cxl_qos_match(root_port
, &mds
->ram_perf
))
363 reset_dpa_perf(&mds
->ram_perf
);
364 if (!cxl_qos_match(root_port
, &mds
->pmem_perf
))
365 reset_dpa_perf(&mds
->pmem_perf
);
367 /* Check to make sure that the device's host bridge is under a root decoder */
368 rc
= device_for_each_child(&root_port
->dev
,
369 cxlmd
->endpoint
->host_bridge
, match_cxlrd_hb
);
371 reset_dpa_perf(&mds
->ram_perf
);
372 reset_dpa_perf(&mds
->pmem_perf
);
378 static void discard_dsmas(struct xarray
*xa
)
383 xa_for_each(xa
, index
, ent
) {
389 DEFINE_FREE(dsmas
, struct xarray
*, if (_T
) discard_dsmas(_T
))
391 void cxl_endpoint_parse_cdat(struct cxl_port
*port
)
393 struct cxl_memdev
*cxlmd
= to_cxl_memdev(port
->uport_dev
);
394 struct cxl_dev_state
*cxlds
= cxlmd
->cxlds
;
395 struct xarray __dsmas_xa
;
396 struct xarray
*dsmas_xa
__free(dsmas
) = &__dsmas_xa
;
399 xa_init(&__dsmas_xa
);
400 if (!port
->cdat
.table
)
403 rc
= cxl_cdat_endpoint_process(port
, dsmas_xa
);
405 dev_dbg(&port
->dev
, "Failed to parse CDAT: %d\n", rc
);
409 rc
= cxl_port_perf_data_calculate(port
, dsmas_xa
);
411 dev_dbg(&port
->dev
, "Failed to do perf coord calculations.\n");
415 cxl_memdev_set_qos_class(cxlds
, dsmas_xa
);
416 cxl_qos_class_verify(cxlmd
);
417 cxl_memdev_update_perf(cxlmd
);
419 EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat
, "CXL");
421 static int cdat_sslbis_handler(union acpi_subtable_headers
*header
, void *arg
,
422 const unsigned long end
)
424 struct acpi_cdat_sslbis_table
{
425 struct acpi_cdat_header header
;
426 struct acpi_cdat_sslbis sslbis_header
;
427 struct acpi_cdat_sslbe entries
[];
428 } *tbl
= (struct acpi_cdat_sslbis_table
*)header
;
429 int size
= sizeof(header
->cdat
) + sizeof(tbl
->sslbis_header
);
430 struct acpi_cdat_sslbis
*sslbis
;
431 struct cxl_port
*port
= arg
;
432 struct device
*dev
= &port
->dev
;
433 int remain
, entries
, i
;
436 len
= le16_to_cpu((__force __le16
)header
->cdat
.length
);
438 if (!remain
|| remain
% sizeof(tbl
->entries
[0]) ||
439 (unsigned long)header
+ len
> end
) {
440 dev_warn(dev
, "Malformed SSLBIS table length: (%u)\n", len
);
444 sslbis
= &tbl
->sslbis_header
;
445 /* Unrecognized data type, we can skip */
446 if (sslbis
->data_type
> ACPI_HMAT_WRITE_BANDWIDTH
)
449 entries
= remain
/ sizeof(tbl
->entries
[0]);
450 if (struct_size(tbl
, entries
, entries
) != len
)
453 for (i
= 0; i
< entries
; i
++) {
454 u16 x
= le16_to_cpu((__force __le16
)tbl
->entries
[i
].portx_id
);
455 u16 y
= le16_to_cpu((__force __le16
)tbl
->entries
[i
].porty_id
);
458 struct cxl_dport
*dport
;
464 case ACPI_CDAT_SSLBIS_US_PORT
:
467 case ACPI_CDAT_SSLBIS_ANY_PORT
:
469 case ACPI_CDAT_SSLBIS_US_PORT
:
472 case ACPI_CDAT_SSLBIS_ANY_PORT
:
473 dsp_id
= ACPI_CDAT_SSLBIS_ANY_PORT
;
485 le_base
= (__force __le64
)tbl
->sslbis_header
.entry_base_unit
;
486 le_val
= (__force __le16
)tbl
->entries
[i
].latency_or_bandwidth
;
487 val
= cdat_normalize(le16_to_cpu(le_val
), le64_to_cpu(le_base
),
490 xa_for_each(&port
->dports
, index
, dport
) {
491 if (dsp_id
== ACPI_CDAT_SSLBIS_ANY_PORT
||
492 dsp_id
== dport
->port_id
) {
493 cxl_access_coordinate_set(dport
->coord
,
503 void cxl_switch_parse_cdat(struct cxl_port
*port
)
507 if (!port
->cdat
.table
)
510 rc
= cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS
, cdat_sslbis_handler
,
511 port
, port
->cdat
.table
, port
->cdat
.length
);
512 rc
= cdat_table_parse_output(rc
);
514 dev_dbg(&port
->dev
, "Failed to parse SSLBIS: %d\n", rc
);
516 EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat
, "CXL");
518 static void __cxl_coordinates_combine(struct access_coordinate
*out
,
519 struct access_coordinate
*c1
,
520 struct access_coordinate
*c2
)
522 if (c1
->write_bandwidth
&& c2
->write_bandwidth
)
523 out
->write_bandwidth
= min(c1
->write_bandwidth
,
524 c2
->write_bandwidth
);
525 out
->write_latency
= c1
->write_latency
+ c2
->write_latency
;
527 if (c1
->read_bandwidth
&& c2
->read_bandwidth
)
528 out
->read_bandwidth
= min(c1
->read_bandwidth
,
530 out
->read_latency
= c1
->read_latency
+ c2
->read_latency
;
534 * cxl_coordinates_combine - Combine the two input coordinates
536 * @out: Output coordinate of c1 and c2 combined
537 * @c1: input coordinates
538 * @c2: input coordinates
540 void cxl_coordinates_combine(struct access_coordinate
*out
,
541 struct access_coordinate
*c1
,
542 struct access_coordinate
*c2
)
544 for (int i
= 0; i
< ACCESS_COORDINATE_MAX
; i
++)
545 __cxl_coordinates_combine(&out
[i
], &c1
[i
], &c2
[i
]);
548 MODULE_IMPORT_NS("CXL");
550 static void cxl_bandwidth_add(struct access_coordinate
*coord
,
551 struct access_coordinate
*c1
,
552 struct access_coordinate
*c2
)
554 for (int i
= 0; i
< ACCESS_COORDINATE_MAX
; i
++) {
555 coord
[i
].read_bandwidth
= c1
[i
].read_bandwidth
+
556 c2
[i
].read_bandwidth
;
557 coord
[i
].write_bandwidth
= c1
[i
].write_bandwidth
+
558 c2
[i
].write_bandwidth
;
562 static bool dpa_perf_contains(struct cxl_dpa_perf
*perf
,
563 struct resource
*dpa_res
)
566 .start
= dpa_res
->start
,
570 return range_contains(&perf
->dpa_range
, &dpa
);
573 static struct cxl_dpa_perf
*cxled_get_dpa_perf(struct cxl_endpoint_decoder
*cxled
,
574 enum cxl_decoder_mode mode
)
576 struct cxl_memdev
*cxlmd
= cxled_to_memdev(cxled
);
577 struct cxl_memdev_state
*mds
= to_cxl_memdev_state(cxlmd
->cxlds
);
578 struct cxl_dpa_perf
*perf
;
581 case CXL_DECODER_RAM
:
582 perf
= &mds
->ram_perf
;
584 case CXL_DECODER_PMEM
:
585 perf
= &mds
->pmem_perf
;
588 return ERR_PTR(-EINVAL
);
591 if (!dpa_perf_contains(perf
, cxled
->dpa_res
))
592 return ERR_PTR(-EINVAL
);
598 * Transient context for containing the current calculation of bandwidth when
599 * doing walking the port hierarchy to deal with shared upstream link.
601 struct cxl_perf_ctx
{
602 struct access_coordinate coord
[ACCESS_COORDINATE_MAX
];
603 struct cxl_port
*port
;
607 * cxl_endpoint_gather_bandwidth - collect all the endpoint bandwidth in an xarray
608 * @cxlr: CXL region for the bandwidth calculation
609 * @cxled: endpoint decoder to start on
610 * @usp_xa: (output) the xarray that collects all the bandwidth coordinates
611 * indexed by the upstream device with data of 'struct cxl_perf_ctx'.
612 * @gp_is_root: (output) bool of whether the grandparent is cxl root.
614 * Return: 0 for success or -errno
616 * Collects aggregated endpoint bandwidth and store the bandwidth in
617 * an xarray indexed by the upstream device of the switch or the RP
618 * device. Each endpoint consists the minimum of the bandwidth from DSLBIS
619 * from the endpoint CDAT, the endpoint upstream link bandwidth, and the
620 * bandwidth from the SSLBIS of the switch CDAT for the switch upstream port to
621 * the downstream port that's associated with the endpoint. If the
622 * device is directly connected to a RP, then no SSLBIS is involved.
624 static int cxl_endpoint_gather_bandwidth(struct cxl_region
*cxlr
,
625 struct cxl_endpoint_decoder
*cxled
,
626 struct xarray
*usp_xa
,
629 struct cxl_port
*endpoint
= to_cxl_port(cxled
->cxld
.dev
.parent
);
630 struct cxl_port
*parent_port
= to_cxl_port(endpoint
->dev
.parent
);
631 struct cxl_port
*gp_port
= to_cxl_port(parent_port
->dev
.parent
);
632 struct access_coordinate pci_coord
[ACCESS_COORDINATE_MAX
];
633 struct access_coordinate sw_coord
[ACCESS_COORDINATE_MAX
];
634 struct access_coordinate ep_coord
[ACCESS_COORDINATE_MAX
];
635 struct cxl_memdev
*cxlmd
= cxled_to_memdev(cxled
);
636 struct cxl_dev_state
*cxlds
= cxlmd
->cxlds
;
637 struct pci_dev
*pdev
= to_pci_dev(cxlds
->dev
);
638 struct cxl_perf_ctx
*perf_ctx
;
639 struct cxl_dpa_perf
*perf
;
644 if (!dev_is_pci(cxlds
->dev
))
650 perf
= cxled_get_dpa_perf(cxled
, cxlr
->mode
);
652 return PTR_ERR(perf
);
654 gp_port
= to_cxl_port(parent_port
->dev
.parent
);
655 *gp_is_root
= is_cxl_root(gp_port
);
658 * If the grandparent is cxl root, then index is the root port,
659 * otherwise it's the parent switch upstream device.
662 index
= (unsigned long)endpoint
->parent_dport
->dport_dev
;
664 index
= (unsigned long)parent_port
->uport_dev
;
666 perf_ctx
= xa_load(usp_xa
, index
);
668 struct cxl_perf_ctx
*c
__free(kfree
) =
669 kzalloc(sizeof(*perf_ctx
), GFP_KERNEL
);
673 ptr
= xa_store(usp_xa
, index
, c
, GFP_KERNEL
);
676 perf_ctx
= no_free_ptr(c
);
677 perf_ctx
->port
= parent_port
;
680 /* Direct upstream link from EP bandwidth */
681 rc
= cxl_pci_get_bandwidth(pdev
, pci_coord
);
686 * Min of upstream link bandwidth and Endpoint CDAT bandwidth from
689 cxl_coordinates_combine(ep_coord
, pci_coord
, perf
->cdat_coord
);
692 * If grandparent port is root, then there's no switch involved and
693 * the endpoint is connected to a root port.
697 * Retrieve the switch SSLBIS for switch downstream port
698 * associated with the endpoint bandwidth.
700 rc
= cxl_port_get_switch_dport_bandwidth(endpoint
, sw_coord
);
705 * Min of the earlier coordinates with the switch SSLBIS
708 cxl_coordinates_combine(ep_coord
, ep_coord
, sw_coord
);
712 * Aggregate the computed bandwidth with the current aggregated bandwidth
713 * of the endpoints with the same switch upstream device or RP.
715 cxl_bandwidth_add(perf_ctx
->coord
, perf_ctx
->coord
, ep_coord
);
720 static void free_perf_xa(struct xarray
*xa
)
722 struct cxl_perf_ctx
*ctx
;
728 xa_for_each(xa
, index
, ctx
)
733 DEFINE_FREE(free_perf_xa
, struct xarray
*, if (_T
) free_perf_xa(_T
))
736 * cxl_switch_gather_bandwidth - collect all the bandwidth at switch level in an xarray
737 * @cxlr: The region being operated on
738 * @input_xa: xarray indexed by upstream device of a switch with data of 'struct
740 * @gp_is_root: (output) bool of whether the grandparent is cxl root.
742 * Return: a xarray of resulting cxl_perf_ctx per parent switch or root port
745 * Iterate through the xarray. Take the minimum of the downstream calculated
746 * bandwidth, the upstream link bandwidth, and the SSLBIS of the upstream
747 * switch if exists. Sum the resulting bandwidth under the switch upstream
748 * device or a RP device. The function can be iterated over multiple switches
749 * if the switches are present.
751 static struct xarray
*cxl_switch_gather_bandwidth(struct cxl_region
*cxlr
,
752 struct xarray
*input_xa
,
755 struct xarray
*res_xa
__free(free_perf_xa
) =
756 kzalloc(sizeof(*res_xa
), GFP_KERNEL
);
757 struct access_coordinate coords
[ACCESS_COORDINATE_MAX
];
758 struct cxl_perf_ctx
*ctx
, *us_ctx
;
759 unsigned long index
, us_index
;
766 return ERR_PTR(-ENOMEM
);
769 xa_for_each(input_xa
, index
, ctx
) {
770 struct device
*dev
= (struct device
*)index
;
771 struct cxl_port
*port
= ctx
->port
;
772 struct cxl_port
*parent_port
= to_cxl_port(port
->dev
.parent
);
773 struct cxl_port
*gp_port
= to_cxl_port(parent_port
->dev
.parent
);
774 struct cxl_dport
*dport
= port
->parent_dport
;
775 bool is_root
= false;
778 if (is_cxl_root(gp_port
)) {
784 * If the grandparent is cxl root, then index is the root port,
785 * otherwise it's the parent switch upstream device.
788 us_index
= (unsigned long)port
->parent_dport
->dport_dev
;
790 us_index
= (unsigned long)parent_port
->uport_dev
;
792 us_ctx
= xa_load(res_xa
, us_index
);
794 struct cxl_perf_ctx
*n
__free(kfree
) =
795 kzalloc(sizeof(*n
), GFP_KERNEL
);
798 return ERR_PTR(-ENOMEM
);
800 ptr
= xa_store(res_xa
, us_index
, n
, GFP_KERNEL
);
802 return ERR_PTR(xa_err(ptr
));
803 us_ctx
= no_free_ptr(n
);
804 us_ctx
->port
= parent_port
;
808 * If the device isn't an upstream PCIe port, there's something
809 * wrong with the topology.
811 if (!dev_is_pci(dev
))
812 return ERR_PTR(-EINVAL
);
814 /* Retrieve the upstream link bandwidth */
815 rc
= cxl_pci_get_bandwidth(to_pci_dev(dev
), coords
);
817 return ERR_PTR(-ENXIO
);
820 * Take the min of downstream bandwidth and the upstream link
823 cxl_coordinates_combine(coords
, coords
, ctx
->coord
);
826 * Take the min of the calculated bandwdith and the upstream
827 * switch SSLBIS bandwidth if there's a parent switch
830 cxl_coordinates_combine(coords
, coords
, dport
->coord
);
833 * Aggregate the calculated bandwidth common to an upstream
836 cxl_bandwidth_add(us_ctx
->coord
, us_ctx
->coord
, coords
);
839 /* Asymmetric topology detected. */
841 if (gp_count
!= dev_count
) {
843 "Asymmetric hierarchy detected, bandwidth not updated\n");
844 return ERR_PTR(-EOPNOTSUPP
);
849 return no_free_ptr(res_xa
);
853 * cxl_rp_gather_bandwidth - handle the root port level bandwidth collection
854 * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated
855 * below each root port device.
857 * Return: xarray that holds cxl_perf_ctx per host bridge or ERR_PTR(-errno)
859 static struct xarray
*cxl_rp_gather_bandwidth(struct xarray
*xa
)
861 struct xarray
*hb_xa
__free(free_perf_xa
) =
862 kzalloc(sizeof(*hb_xa
), GFP_KERNEL
);
863 struct cxl_perf_ctx
*ctx
;
867 return ERR_PTR(-ENOMEM
);
870 xa_for_each(xa
, index
, ctx
) {
871 struct cxl_port
*port
= ctx
->port
;
872 unsigned long hb_index
= (unsigned long)port
->uport_dev
;
873 struct cxl_perf_ctx
*hb_ctx
;
876 hb_ctx
= xa_load(hb_xa
, hb_index
);
878 struct cxl_perf_ctx
*n
__free(kfree
) =
879 kzalloc(sizeof(*n
), GFP_KERNEL
);
882 return ERR_PTR(-ENOMEM
);
883 ptr
= xa_store(hb_xa
, hb_index
, n
, GFP_KERNEL
);
885 return ERR_PTR(xa_err(ptr
));
886 hb_ctx
= no_free_ptr(n
);
890 cxl_bandwidth_add(hb_ctx
->coord
, hb_ctx
->coord
, ctx
->coord
);
893 return no_free_ptr(hb_xa
);
897 * cxl_hb_gather_bandwidth - handle the host bridge level bandwidth collection
898 * @xa: the xarray that holds the cxl_perf_ctx that has the bandwidth calculated
899 * below each host bridge.
901 * Return: xarray that holds cxl_perf_ctx per ACPI0017 device or ERR_PTR(-errno)
903 static struct xarray
*cxl_hb_gather_bandwidth(struct xarray
*xa
)
905 struct xarray
*mw_xa
__free(free_perf_xa
) =
906 kzalloc(sizeof(*mw_xa
), GFP_KERNEL
);
907 struct cxl_perf_ctx
*ctx
;
911 return ERR_PTR(-ENOMEM
);
914 xa_for_each(xa
, index
, ctx
) {
915 struct cxl_port
*port
= ctx
->port
;
916 struct cxl_port
*parent_port
;
917 struct cxl_perf_ctx
*mw_ctx
;
918 struct cxl_dport
*dport
;
919 unsigned long mw_index
;
922 parent_port
= to_cxl_port(port
->dev
.parent
);
923 mw_index
= (unsigned long)parent_port
->uport_dev
;
925 mw_ctx
= xa_load(mw_xa
, mw_index
);
927 struct cxl_perf_ctx
*n
__free(kfree
) =
928 kzalloc(sizeof(*n
), GFP_KERNEL
);
931 return ERR_PTR(-ENOMEM
);
932 ptr
= xa_store(mw_xa
, mw_index
, n
, GFP_KERNEL
);
934 return ERR_PTR(xa_err(ptr
));
935 mw_ctx
= no_free_ptr(n
);
938 dport
= port
->parent_dport
;
939 cxl_coordinates_combine(ctx
->coord
, ctx
->coord
, dport
->coord
);
940 cxl_bandwidth_add(mw_ctx
->coord
, mw_ctx
->coord
, ctx
->coord
);
943 return no_free_ptr(mw_xa
);
947 * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
948 * @cxlr: The region being operated on
949 * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
951 static void cxl_region_update_bandwidth(struct cxl_region
*cxlr
,
952 struct xarray
*input_xa
)
954 struct access_coordinate coord
[ACCESS_COORDINATE_MAX
];
955 struct cxl_perf_ctx
*ctx
;
958 memset(coord
, 0, sizeof(coord
));
959 xa_for_each(input_xa
, index
, ctx
)
960 cxl_bandwidth_add(coord
, coord
, ctx
->coord
);
962 for (int i
= 0; i
< ACCESS_COORDINATE_MAX
; i
++) {
963 cxlr
->coord
[i
].read_bandwidth
= coord
[i
].read_bandwidth
;
964 cxlr
->coord
[i
].write_bandwidth
= coord
[i
].write_bandwidth
;
969 * cxl_region_shared_upstream_bandwidth_update - Recalculate the bandwidth for
971 * @cxlr: the cxl region to recalculate
973 * The function walks the topology from bottom up and calculates the bandwidth. It
974 * starts at the endpoints, processes at the switches if any, processes at the rootport
975 * level, at the host bridge level, and finally aggregates at the region.
977 void cxl_region_shared_upstream_bandwidth_update(struct cxl_region
*cxlr
)
979 struct xarray
*working_xa
;
984 lockdep_assert_held(&cxl_dpa_rwsem
);
986 struct xarray
*usp_xa
__free(free_perf_xa
) =
987 kzalloc(sizeof(*usp_xa
), GFP_KERNEL
);
994 /* Collect bandwidth data from all the endpoints. */
995 for (int i
= 0; i
< cxlr
->params
.nr_targets
; i
++) {
996 struct cxl_endpoint_decoder
*cxled
= cxlr
->params
.targets
[i
];
999 rc
= cxl_endpoint_gather_bandwidth(cxlr
, cxled
, usp_xa
, &is_root
);
1002 root_count
+= is_root
;
1005 /* Detect asymmetric hierarchy with some direct attached endpoints. */
1006 if (root_count
&& root_count
!= cxlr
->params
.nr_targets
) {
1008 "Asymmetric hierarchy detected, bandwidth not updated\n");
1013 * Walk up one or more switches to deal with the bandwidth of the
1014 * switches if they exist. Endpoints directly attached to RPs skip
1019 working_xa
= cxl_switch_gather_bandwidth(cxlr
, usp_xa
,
1021 if (IS_ERR(working_xa
))
1023 free_perf_xa(usp_xa
);
1024 usp_xa
= working_xa
;
1028 /* Handle the bandwidth at the root port of the hierarchy */
1029 working_xa
= cxl_rp_gather_bandwidth(usp_xa
);
1030 if (IS_ERR(working_xa
))
1032 free_perf_xa(usp_xa
);
1033 usp_xa
= working_xa
;
1035 /* Handle the bandwidth at the host bridge of the hierarchy */
1036 working_xa
= cxl_hb_gather_bandwidth(usp_xa
);
1037 if (IS_ERR(working_xa
))
1039 free_perf_xa(usp_xa
);
1040 usp_xa
= working_xa
;
1043 * Aggregate all the bandwidth collected per CFMWS (ACPI0017) and
1044 * update the region bandwidth with the final calculated values.
1046 cxl_region_update_bandwidth(cxlr
, usp_xa
);
1049 void cxl_region_perf_data_calculate(struct cxl_region
*cxlr
,
1050 struct cxl_endpoint_decoder
*cxled
)
1052 struct cxl_dpa_perf
*perf
;
1054 lockdep_assert_held(&cxl_dpa_rwsem
);
1056 perf
= cxled_get_dpa_perf(cxled
, cxlr
->mode
);
1060 for (int i
= 0; i
< ACCESS_COORDINATE_MAX
; i
++) {
1061 /* Get total bandwidth and the worst latency for the cxl region */
1062 cxlr
->coord
[i
].read_latency
= max_t(unsigned int,
1063 cxlr
->coord
[i
].read_latency
,
1064 perf
->coord
[i
].read_latency
);
1065 cxlr
->coord
[i
].write_latency
= max_t(unsigned int,
1066 cxlr
->coord
[i
].write_latency
,
1067 perf
->coord
[i
].write_latency
);
1068 cxlr
->coord
[i
].read_bandwidth
+= perf
->coord
[i
].read_bandwidth
;
1069 cxlr
->coord
[i
].write_bandwidth
+= perf
->coord
[i
].write_bandwidth
;
1073 int cxl_update_hmat_access_coordinates(int nid
, struct cxl_region
*cxlr
,
1074 enum access_coordinate_class access
)
1076 return hmat_update_target_coordinates(nid
, &cxlr
->coord
[access
], access
);
1079 bool cxl_need_node_perf_attrs_update(int nid
)
1081 return !acpi_node_backed_by_real_pxm(nid
);