1 // SPDX-License-Identifier: GPL-2.0+
2 // Copyright 2017 IBM Corp.
3 #include <asm/pnv-ocxl.h>
5 #include <misc/ocxl-config.h>
8 #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
9 #define PNV_OCXL_ACTAG_MAX 64
10 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
11 #define PNV_OCXL_PASID_BITS 15
12 #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
14 #define AFU_PRESENT (1 << 31)
15 #define AFU_INDEX_MASK 0x3F000000
16 #define AFU_INDEX_SHIFT 24
17 #define ACTAG_MASK 0xFFF
26 struct list_head list
;
30 u16 fn_desired_actags
[8];
31 struct actag_range fn_actags
[8];
34 static struct list_head links_list
= LIST_HEAD_INIT(links_list
);
35 static DEFINE_MUTEX(links_list_lock
);
39 * opencapi actags handling:
41 * When sending commands, the opencapi device references the memory
42 * context it's targeting with an 'actag', which is really an alias
43 * for a (BDF, pasid) combination. When it receives a command, the NPU
44 * must do a lookup of the actag to identify the memory context. The
45 * hardware supports a finite number of actags per link (64 for
48 * The device can carry multiple functions, and each function can have
49 * multiple AFUs. Each AFU advertises in its config space the number
50 * of desired actags. The host must configure in the config space of
51 * the AFU how many actags the AFU is really allowed to use (which can
52 * be less than what the AFU desires).
54 * When a PCI function is probed by the driver, it has no visibility
55 * about the other PCI functions and how many actags they'd like,
56 * which makes it impossible to distribute actags fairly among AFUs.
58 * Unfortunately, the only way to know how many actags a function
59 * desires is by looking at the data for each AFU in the config space
60 * and add them up. Similarly, the only way to know how many actags
61 * all the functions of the physical device desire is by adding the
62 * previously computed function counts. Then we can match that against
63 * what the hardware supports.
65 * To get a comprehensive view, we use a 'pci fixup': at the end of
66 * PCI enumeration, each function counts how many actags its AFUs
67 * desire and we save it in a 'npu_link' structure, shared between all
68 * the PCI functions of a same device. Therefore, when the first
69 * function is probed by the driver, we can get an idea of the total
70 * count of desired actags for the device, and assign the actags to
71 * the AFUs, by pro-rating if needed.
74 static int find_dvsec_from_pos(struct pci_dev
*dev
, int dvsec_id
, int pos
)
79 while ((vsec
= pci_find_next_ext_capability(dev
, vsec
,
80 OCXL_EXT_CAP_ID_DVSEC
))) {
81 pci_read_config_word(dev
, vsec
+ OCXL_DVSEC_VENDOR_OFFSET
,
83 pci_read_config_word(dev
, vsec
+ OCXL_DVSEC_ID_OFFSET
, &id
);
84 if (vendor
== PCI_VENDOR_ID_IBM
&& id
== dvsec_id
)
90 static int find_dvsec_afu_ctrl(struct pci_dev
*dev
, u8 afu_idx
)
95 while ((vsec
= find_dvsec_from_pos(dev
, OCXL_DVSEC_AFU_CTRL_ID
,
97 pci_read_config_byte(dev
, vsec
+ OCXL_DVSEC_AFU_CTRL_AFU_IDX
,
105 static int get_max_afu_index(struct pci_dev
*dev
, int *afu_idx
)
110 pos
= find_dvsec_from_pos(dev
, OCXL_DVSEC_FUNC_ID
, 0);
114 pci_read_config_dword(dev
, pos
+ OCXL_DVSEC_FUNC_OFF_INDEX
, &val
);
115 if (val
& AFU_PRESENT
)
116 *afu_idx
= (val
& AFU_INDEX_MASK
) >> AFU_INDEX_SHIFT
;
122 static int get_actag_count(struct pci_dev
*dev
, int afu_idx
, int *actag
)
127 pos
= find_dvsec_afu_ctrl(dev
, afu_idx
);
131 pci_read_config_word(dev
, pos
+ OCXL_DVSEC_AFU_CTRL_ACTAG_SUP
,
133 *actag
= actag_sup
& ACTAG_MASK
;
137 static struct npu_link
*find_link(struct pci_dev
*dev
)
139 struct npu_link
*link
;
141 list_for_each_entry(link
, &links_list
, list
) {
142 /* The functions of a device all share the same link */
143 if (link
->domain
== pci_domain_nr(dev
->bus
) &&
144 link
->bus
== dev
->bus
->number
&&
145 link
->dev
== PCI_SLOT(dev
->devfn
)) {
150 /* link doesn't exist yet. Allocate one */
151 link
= kzalloc(sizeof(struct npu_link
), GFP_KERNEL
);
154 link
->domain
= pci_domain_nr(dev
->bus
);
155 link
->bus
= dev
->bus
->number
;
156 link
->dev
= PCI_SLOT(dev
->devfn
);
157 list_add(&link
->list
, &links_list
);
161 static void pnv_ocxl_fixup_actag(struct pci_dev
*dev
)
163 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
164 struct pnv_phb
*phb
= hose
->private_data
;
165 struct npu_link
*link
;
166 int rc
, afu_idx
= -1, i
, actag
;
168 if (!machine_is(powernv
))
171 if (phb
->type
!= PNV_PHB_NPU_OCAPI
)
174 mutex_lock(&links_list_lock
);
176 link
= find_link(dev
);
178 dev_warn(&dev
->dev
, "couldn't update actag information\n");
179 mutex_unlock(&links_list_lock
);
184 * Check how many actags are desired for the AFUs under that
185 * function and add it to the count for the link
187 rc
= get_max_afu_index(dev
, &afu_idx
);
189 /* Most likely an invalid config space */
190 dev_dbg(&dev
->dev
, "couldn't find AFU information\n");
194 link
->fn_desired_actags
[PCI_FUNC(dev
->devfn
)] = 0;
195 for (i
= 0; i
<= afu_idx
; i
++) {
197 * AFU index 'holes' are allowed. So don't fail if we
198 * can't read the actag info for an index
200 rc
= get_actag_count(dev
, i
, &actag
);
203 link
->fn_desired_actags
[PCI_FUNC(dev
->devfn
)] += actag
;
205 dev_dbg(&dev
->dev
, "total actags for function: %d\n",
206 link
->fn_desired_actags
[PCI_FUNC(dev
->devfn
)]);
208 mutex_unlock(&links_list_lock
);
210 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID
, PCI_ANY_ID
, pnv_ocxl_fixup_actag
);
212 static u16
assign_fn_actags(u16 desired
, u16 total
)
216 if (total
<= PNV_OCXL_ACTAG_MAX
)
219 count
= PNV_OCXL_ACTAG_MAX
* desired
/ total
;
224 static void assign_actags(struct npu_link
*link
)
226 u16 actag_count
, range_start
= 0, total_desired
= 0;
229 for (i
= 0; i
< 8; i
++)
230 total_desired
+= link
->fn_desired_actags
[i
];
232 for (i
= 0; i
< 8; i
++) {
233 if (link
->fn_desired_actags
[i
]) {
234 actag_count
= assign_fn_actags(
235 link
->fn_desired_actags
[i
],
237 link
->fn_actags
[i
].start
= range_start
;
238 link
->fn_actags
[i
].count
= actag_count
;
239 range_start
+= actag_count
;
240 WARN_ON(range_start
>= PNV_OCXL_ACTAG_MAX
);
242 pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
243 link
->domain
, link
->bus
, link
->dev
, i
,
244 link
->fn_actags
[i
].start
, link
->fn_actags
[i
].count
,
245 link
->fn_desired_actags
[i
]);
247 link
->assignment_done
= true;
250 int pnv_ocxl_get_actag(struct pci_dev
*dev
, u16
*base
, u16
*enabled
,
253 struct npu_link
*link
;
255 mutex_lock(&links_list_lock
);
257 link
= find_link(dev
);
259 dev_err(&dev
->dev
, "actag information not found\n");
260 mutex_unlock(&links_list_lock
);
264 * On p9, we only have 64 actags per link, so they must be
265 * shared by all the functions of the same adapter. We counted
266 * the desired actag counts during PCI enumeration, so that we
267 * can allocate a pro-rated number of actags to each function.
269 if (!link
->assignment_done
)
272 *base
= link
->fn_actags
[PCI_FUNC(dev
->devfn
)].start
;
273 *enabled
= link
->fn_actags
[PCI_FUNC(dev
->devfn
)].count
;
274 *supported
= link
->fn_desired_actags
[PCI_FUNC(dev
->devfn
)];
276 mutex_unlock(&links_list_lock
);
279 EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag
);
281 int pnv_ocxl_get_pasid_count(struct pci_dev
*dev
, int *count
)
283 struct npu_link
*link
;
287 * The number of PASIDs (process address space ID) which can
288 * be used by a function depends on how many functions exist
289 * on the device. The NPU needs to be configured to know how
290 * many bits are available to PASIDs and how many are to be
291 * used by the function BDF indentifier.
293 * We only support one AFU-carrying function for now.
295 mutex_lock(&links_list_lock
);
297 link
= find_link(dev
);
299 dev_err(&dev
->dev
, "actag information not found\n");
300 mutex_unlock(&links_list_lock
);
304 for (i
= 0; i
< 8; i
++)
305 if (link
->fn_desired_actags
[i
] && (i
== PCI_FUNC(dev
->devfn
))) {
306 *count
= PNV_OCXL_PASID_MAX
;
311 mutex_unlock(&links_list_lock
);
312 dev_dbg(&dev
->dev
, "%d PASIDs available for function\n",
316 EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count
);
318 static void set_templ_rate(unsigned int templ
, unsigned int rate
, char *buf
)
322 WARN_ON(templ
> PNV_OCXL_TL_MAX_TEMPLATE
);
323 idx
= (PNV_OCXL_TL_MAX_TEMPLATE
- templ
) / 2;
324 shift
= 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE
- templ
) % 2));
325 buf
[idx
] |= rate
<< shift
;
328 int pnv_ocxl_get_tl_cap(struct pci_dev
*dev
, long *cap
,
329 char *rate_buf
, int rate_buf_size
)
331 if (rate_buf_size
!= PNV_OCXL_TL_RATE_BUF_SIZE
)
334 * The TL capabilities are a characteristic of the NPU, so
335 * we go with hard-coded values.
337 * The receiving rate of each template is encoded on 4 bits.
340 * - templates 0 -> 3 are supported
341 * - templates 0, 1 and 3 have a 0 receiving rate
342 * - template 2 has receiving rate of 1 (extra cycle)
344 memset(rate_buf
, 0, rate_buf_size
);
345 set_templ_rate(2, 1, rate_buf
);
346 *cap
= PNV_OCXL_TL_P9_RECV_CAP
;
349 EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap
);
351 int pnv_ocxl_set_tl_conf(struct pci_dev
*dev
, long cap
,
352 uint64_t rate_buf_phys
, int rate_buf_size
)
354 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
355 struct pnv_phb
*phb
= hose
->private_data
;
358 if (rate_buf_size
!= PNV_OCXL_TL_RATE_BUF_SIZE
)
361 rc
= opal_npu_tl_set(phb
->opal_id
, dev
->devfn
, cap
,
362 rate_buf_phys
, rate_buf_size
);
364 dev_err(&dev
->dev
, "Can't configure host TL: %d\n", rc
);
369 EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf
);
371 int pnv_ocxl_get_xsl_irq(struct pci_dev
*dev
, int *hwirq
)
375 rc
= of_property_read_u32(dev
->dev
.of_node
, "ibm,opal-xsl-irq", hwirq
);
378 "Can't get translation interrupt for device\n");
383 EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq
);
385 void pnv_ocxl_unmap_xsl_regs(void __iomem
*dsisr
, void __iomem
*dar
,
386 void __iomem
*tfc
, void __iomem
*pe_handle
)
393 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs
);
395 int pnv_ocxl_map_xsl_regs(struct pci_dev
*dev
, void __iomem
**dsisr
,
396 void __iomem
**dar
, void __iomem
**tfc
,
397 void __iomem
**pe_handle
)
401 void __iomem
*regs
[4];
404 * opal stores the mmio addresses of the DSISR, DAR, TFC and
405 * PE_HANDLE registers in a device tree property, in that
408 for (i
= 0; i
< 4; i
++) {
409 rc
= of_property_read_u64_index(dev
->dev
.of_node
,
410 "ibm,opal-xsl-mmio", i
, ®
);
413 regs
[i
] = ioremap(reg
, 8);
420 dev_err(&dev
->dev
, "Can't map translation mmio registers\n");
421 for (j
= i
- 1; j
>= 0; j
--)
427 *pe_handle
= regs
[3];
431 EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs
);
438 int pnv_ocxl_spa_setup(struct pci_dev
*dev
, void *spa_mem
, int PE_mask
,
439 void **platform_data
)
441 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
442 struct pnv_phb
*phb
= hose
->private_data
;
443 struct spa_data
*data
;
447 data
= kzalloc(sizeof(*data
), GFP_KERNEL
);
451 bdfn
= (dev
->bus
->number
<< 8) | dev
->devfn
;
452 rc
= opal_npu_spa_setup(phb
->opal_id
, bdfn
, virt_to_phys(spa_mem
),
455 dev_err(&dev
->dev
, "Can't setup Shared Process Area: %d\n", rc
);
459 data
->phb_opal_id
= phb
->opal_id
;
461 *platform_data
= (void *) data
;
464 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup
);
466 void pnv_ocxl_spa_release(void *platform_data
)
468 struct spa_data
*data
= (struct spa_data
*) platform_data
;
471 rc
= opal_npu_spa_setup(data
->phb_opal_id
, data
->bdfn
, 0, 0);
475 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release
);
477 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data
, int pe_handle
)
479 struct spa_data
*data
= (struct spa_data
*) platform_data
;
482 rc
= opal_npu_spa_clear_cache(data
->phb_opal_id
, data
->bdfn
, pe_handle
);
485 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache
);
487 int pnv_ocxl_map_lpar(struct pci_dev
*dev
, uint64_t lparid
,
488 uint64_t lpcr
, void __iomem
**arva
)
490 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
491 struct pnv_phb
*phb
= hose
->private_data
;
495 /* ATSD physical address.
496 * ATSD LAUNCH register: write access initiates a shoot down to
497 * initiate the TLB Invalidate command.
499 rc
= of_property_read_u64_index(hose
->dn
, "ibm,mmio-atsd",
502 dev_info(&dev
->dev
, "No available ATSD found\n");
506 /* Assign a register set to a Logical Partition and MMIO ATSD
507 * LPARID register to the required value.
509 rc
= opal_npu_map_lpar(phb
->opal_id
, pci_dev_id(dev
),
512 dev_err(&dev
->dev
, "Error mapping device to LPAR: %d\n", rc
);
516 *arva
= ioremap(mmio_atsd
, 24);
518 dev_warn(&dev
->dev
, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd
);
524 EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar
);
526 void pnv_ocxl_unmap_lpar(void __iomem
*arva
)
530 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar
);
532 void pnv_ocxl_tlb_invalidate(void __iomem
*arva
,
535 unsigned long page_size
)
537 unsigned long timeout
= jiffies
+ (HZ
* PNV_OCXL_ATSD_TIMEOUT
);
546 /* load Abbreviated Virtual Address register with
547 * the necessary value
549 val
|= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA
, addr
>> (63-51));
550 out_be64(arva
+ PNV_OCXL_ATSD_AVA
, val
);
553 /* Write access initiates a shoot down to initiate the
554 * TLB Invalidate command
556 val
= PNV_OCXL_ATSD_LNCH_R
;
557 val
|= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC
, 0b10);
559 val
|= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS
, 0b00);
561 val
|= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS
, 0b01);
562 val
|= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON
;
564 val
|= PNV_OCXL_ATSD_LNCH_PRS
;
565 /* Actual Page Size to be invalidated
572 if (page_size
== 0x1000)
574 if (page_size
== 0x200000)
576 if (page_size
== 0x40000000)
578 val
|= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP
, size
);
579 val
|= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID
, pid
);
580 out_be64(arva
+ PNV_OCXL_ATSD_LNCH
, val
);
582 /* Poll the ATSD status register to determine when the
583 * TLB Invalidate has been completed.
585 val
= in_be64(arva
+ PNV_OCXL_ATSD_STAT
);
589 if (time_after_eq(jiffies
, timeout
)) {
590 pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
595 val
= in_be64(arva
+ PNV_OCXL_ATSD_STAT
);
599 EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate
);