1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright 2017 IBM Corp.
6 #include <linux/hugetlb.h>
7 #include <linux/sched/mm.h>
8 #include <asm/opal-api.h>
9 #include <asm/pnv-pci.h>
10 #include <misc/cxllib.h>
14 #define CXL_INVALID_DRA ~0ull
15 #define CXL_DUMMY_READ_SIZE 128
16 #define CXL_DUMMY_READ_ALIGN 8
17 #define CXL_CAPI_WINDOW_START 0x2000000000000ull
18 #define CXL_CAPI_WINDOW_LOG_SIZE 48
19 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
22 bool cxllib_slot_is_supported(struct pci_dev
*dev
, unsigned long flags
)
26 u64 chip_id
, capp_unit_id
;
28 /* No flags currently supported */
32 if (!cpu_has_feature(CPU_FTR_HVMODE
))
38 if (cxl_slot_is_switched(dev
))
41 /* on p9, some pci slots are not connected to a CAPP unit */
42 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
48 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported
);
50 static DEFINE_MUTEX(dra_mutex
);
51 static u64 dummy_read_addr
= CXL_INVALID_DRA
;
53 static int allocate_dummy_read_buf(void)
59 * Dummy read buffer is 128-byte long, aligned on a
60 * 256-byte boundary and we need the physical address.
62 buf_size
= CXL_DUMMY_READ_SIZE
+ (1ull << CXL_DUMMY_READ_ALIGN
);
63 buf
= (u64
) kzalloc(buf_size
, GFP_KERNEL
);
67 vaddr
= (buf
+ (1ull << CXL_DUMMY_READ_ALIGN
) - 1) &
68 (~0ull << CXL_DUMMY_READ_ALIGN
);
70 WARN((vaddr
+ CXL_DUMMY_READ_SIZE
) > (buf
+ buf_size
),
71 "Dummy read buffer alignment issue");
72 dummy_read_addr
= virt_to_phys((void *) vaddr
);
76 int cxllib_get_xsl_config(struct pci_dev
*dev
, struct cxllib_xsl_config
*cfg
)
80 u64 chip_id
, capp_unit_id
;
82 if (!cpu_has_feature(CPU_FTR_HVMODE
))
85 mutex_lock(&dra_mutex
);
86 if (dummy_read_addr
== CXL_INVALID_DRA
) {
87 rc
= allocate_dummy_read_buf();
89 mutex_unlock(&dra_mutex
);
93 mutex_unlock(&dra_mutex
);
95 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
99 rc
= cxl_get_xsl9_dsnctl(dev
, capp_unit_id
, &cfg
->dsnctl
);
103 cfg
->version
= CXL_XSL_CONFIG_CURRENT_VERSION
;
104 cfg
->log_bar_size
= CXL_CAPI_WINDOW_LOG_SIZE
;
105 cfg
->bar_addr
= CXL_CAPI_WINDOW_START
;
106 cfg
->dra
= dummy_read_addr
;
109 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config
);
111 int cxllib_switch_phb_mode(struct pci_dev
*dev
, enum cxllib_mode mode
,
116 if (!cpu_has_feature(CPU_FTR_HVMODE
))
122 * We currently don't support going back to PCI mode
123 * However, we'll turn the invalidations off, so that
124 * the firmware doesn't have to ack them and can do
125 * things like reset, etc.. with no worries.
126 * So always return EPERM (can't go back to PCI) or
127 * EBUSY if we couldn't even turn off snooping
129 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_OFF
);
136 /* DMA only supported on TVT1 for the time being */
137 if (flags
!= CXL_MODE_DMA_TVT1
)
139 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_DMA_TVT1
);
142 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_ON
);
149 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode
);
152 * When switching the PHB to capi mode, the TVT#1 entry for
153 * the Partitionable Endpoint is set in bypass mode, like
155 * Configure the device dma to use TVT#1, which is done
156 * by calling dma_set_mask() with a mask large enough.
158 int cxllib_set_device_dma(struct pci_dev
*dev
, unsigned long flags
)
165 rc
= dma_set_mask(&dev
->dev
, DMA_BIT_MASK(64));
168 EXPORT_SYMBOL_GPL(cxllib_set_device_dma
);
170 int cxllib_get_PE_attributes(struct task_struct
*task
,
171 unsigned long translation_mode
,
172 struct cxllib_pe_attributes
*attr
)
174 if (translation_mode
!= CXL_TRANSLATED_MODE
&&
175 translation_mode
!= CXL_REAL_MODE
)
178 attr
->sr
= cxl_calculate_sr(false,
180 translation_mode
== CXL_REAL_MODE
,
182 attr
->lpid
= mfspr(SPRN_LPID
);
184 struct mm_struct
*mm
= get_task_mm(task
);
188 * Caller is keeping a reference on mm_users for as long
189 * as XSL uses the memory context
191 attr
->pid
= mm
->context
.id
;
193 attr
->tid
= task
->thread
.tidr
;
200 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes
);
202 static int get_vma_info(struct mm_struct
*mm
, u64 addr
,
203 u64
*vma_start
, u64
*vma_end
,
204 unsigned long *page_size
)
206 struct vm_area_struct
*vma
= NULL
;
211 vma
= find_vma(mm
, addr
);
216 *page_size
= vma_kernel_pagesize(vma
);
217 *vma_start
= vma
->vm_start
;
218 *vma_end
= vma
->vm_end
;
220 mmap_read_unlock(mm
);
224 int cxllib_handle_fault(struct mm_struct
*mm
, u64 addr
, u64 size
, u64 flags
)
227 u64 dar
, vma_start
, vma_end
;
228 unsigned long page_size
;
234 * The buffer we have to process can extend over several pages
235 * and may also cover several VMAs.
236 * We iterate over all the pages. The page size could vary
239 rc
= get_vma_info(mm
, addr
, &vma_start
, &vma_end
, &page_size
);
243 for (dar
= (addr
& ~(page_size
- 1)); dar
< (addr
+ size
);
245 if (dar
< vma_start
|| dar
>= vma_end
) {
247 * We don't hold mm->mmap_lock while iterating, since
248 * the lock is required by one of the lower-level page
249 * fault processing functions and it could
252 * It means the VMAs can be altered between 2
253 * loop iterations and we could theoretically
254 * miss a page (however unlikely). But that's
255 * not really a problem, as the driver will
256 * retry access, get another page fault on the
257 * missing page and call us again.
259 rc
= get_vma_info(mm
, dar
, &vma_start
, &vma_end
,
265 rc
= cxl_handle_mm_fault(mm
, flags
, dar
);
271 EXPORT_SYMBOL_GPL(cxllib_handle_fault
);