1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright 2017 IBM Corp.
6 #include <linux/hugetlb.h>
7 #include <linux/sched/mm.h>
8 #include <asm/pnv-pci.h>
9 #include <misc/cxllib.h>
13 #define CXL_INVALID_DRA ~0ull
14 #define CXL_DUMMY_READ_SIZE 128
15 #define CXL_DUMMY_READ_ALIGN 8
16 #define CXL_CAPI_WINDOW_START 0x2000000000000ull
17 #define CXL_CAPI_WINDOW_LOG_SIZE 48
18 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
21 bool cxllib_slot_is_supported(struct pci_dev
*dev
, unsigned long flags
)
25 u64 chip_id
, capp_unit_id
;
27 /* No flags currently supported */
31 if (!cpu_has_feature(CPU_FTR_HVMODE
))
37 if (cxl_slot_is_switched(dev
))
40 /* on p9, some pci slots are not connected to a CAPP unit */
41 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
47 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported
);
49 static DEFINE_MUTEX(dra_mutex
);
50 static u64 dummy_read_addr
= CXL_INVALID_DRA
;
52 static int allocate_dummy_read_buf(void)
58 * Dummy read buffer is 128-byte long, aligned on a
59 * 256-byte boundary and we need the physical address.
61 buf_size
= CXL_DUMMY_READ_SIZE
+ (1ull << CXL_DUMMY_READ_ALIGN
);
62 buf
= (u64
) kzalloc(buf_size
, GFP_KERNEL
);
66 vaddr
= (buf
+ (1ull << CXL_DUMMY_READ_ALIGN
) - 1) &
67 (~0ull << CXL_DUMMY_READ_ALIGN
);
69 WARN((vaddr
+ CXL_DUMMY_READ_SIZE
) > (buf
+ buf_size
),
70 "Dummy read buffer alignment issue");
71 dummy_read_addr
= virt_to_phys((void *) vaddr
);
75 int cxllib_get_xsl_config(struct pci_dev
*dev
, struct cxllib_xsl_config
*cfg
)
79 u64 chip_id
, capp_unit_id
;
81 if (!cpu_has_feature(CPU_FTR_HVMODE
))
84 mutex_lock(&dra_mutex
);
85 if (dummy_read_addr
== CXL_INVALID_DRA
) {
86 rc
= allocate_dummy_read_buf();
88 mutex_unlock(&dra_mutex
);
92 mutex_unlock(&dra_mutex
);
94 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
98 rc
= cxl_get_xsl9_dsnctl(dev
, capp_unit_id
, &cfg
->dsnctl
);
102 cfg
->version
= CXL_XSL_CONFIG_CURRENT_VERSION
;
103 cfg
->log_bar_size
= CXL_CAPI_WINDOW_LOG_SIZE
;
104 cfg
->bar_addr
= CXL_CAPI_WINDOW_START
;
105 cfg
->dra
= dummy_read_addr
;
108 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config
);
110 int cxllib_switch_phb_mode(struct pci_dev
*dev
, enum cxllib_mode mode
,
115 if (!cpu_has_feature(CPU_FTR_HVMODE
))
121 * We currently don't support going back to PCI mode
122 * However, we'll turn the invalidations off, so that
123 * the firmware doesn't have to ack them and can do
124 * things like reset, etc.. with no worries.
125 * So always return EPERM (can't go back to PCI) or
126 * EBUSY if we couldn't even turn off snooping
128 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_OFF
);
135 /* DMA only supported on TVT1 for the time being */
136 if (flags
!= CXL_MODE_DMA_TVT1
)
138 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_DMA_TVT1
);
141 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_ON
);
148 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode
);
151 * When switching the PHB to capi mode, the TVT#1 entry for
152 * the Partitionable Endpoint is set in bypass mode, like
154 * Configure the device dma to use TVT#1, which is done
155 * by calling dma_set_mask() with a mask large enough.
157 int cxllib_set_device_dma(struct pci_dev
*dev
, unsigned long flags
)
164 rc
= dma_set_mask(&dev
->dev
, DMA_BIT_MASK(64));
167 EXPORT_SYMBOL_GPL(cxllib_set_device_dma
);
169 int cxllib_get_PE_attributes(struct task_struct
*task
,
170 unsigned long translation_mode
,
171 struct cxllib_pe_attributes
*attr
)
173 struct mm_struct
*mm
= NULL
;
175 if (translation_mode
!= CXL_TRANSLATED_MODE
&&
176 translation_mode
!= CXL_REAL_MODE
)
179 attr
->sr
= cxl_calculate_sr(false,
181 translation_mode
== CXL_REAL_MODE
,
183 attr
->lpid
= mfspr(SPRN_LPID
);
185 mm
= get_task_mm(task
);
189 * Caller is keeping a reference on mm_users for as long
190 * as XSL uses the memory context
192 attr
->pid
= mm
->context
.id
;
194 attr
->tid
= task
->thread
.tidr
;
201 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes
);
203 static int get_vma_info(struct mm_struct
*mm
, u64 addr
,
204 u64
*vma_start
, u64
*vma_end
,
205 unsigned long *page_size
)
207 struct vm_area_struct
*vma
= NULL
;
212 vma
= find_vma(mm
, addr
);
217 *page_size
= vma_kernel_pagesize(vma
);
218 *vma_start
= vma
->vm_start
;
219 *vma_end
= vma
->vm_end
;
221 mmap_read_unlock(mm
);
225 int cxllib_handle_fault(struct mm_struct
*mm
, u64 addr
, u64 size
, u64 flags
)
228 u64 dar
, vma_start
, vma_end
;
229 unsigned long page_size
;
235 * The buffer we have to process can extend over several pages
236 * and may also cover several VMAs.
237 * We iterate over all the pages. The page size could vary
240 rc
= get_vma_info(mm
, addr
, &vma_start
, &vma_end
, &page_size
);
244 for (dar
= (addr
& ~(page_size
- 1)); dar
< (addr
+ size
);
246 if (dar
< vma_start
|| dar
>= vma_end
) {
248 * We don't hold mm->mmap_lock while iterating, since
249 * the lock is required by one of the lower-level page
250 * fault processing functions and it could
253 * It means the VMAs can be altered between 2
254 * loop iterations and we could theoretically
255 * miss a page (however unlikely). But that's
256 * not really a problem, as the driver will
257 * retry access, get another page fault on the
258 * missing page and call us again.
260 rc
= get_vma_info(mm
, dar
, &vma_start
, &vma_end
,
266 rc
= cxl_handle_mm_fault(mm
, flags
, dar
);
272 EXPORT_SYMBOL_GPL(cxllib_handle_fault
);