2 * Copyright 2017 IBM Corp.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
10 #include <linux/hugetlb.h>
11 #include <linux/sched/mm.h>
12 #include <asm/pnv-pci.h>
13 #include <misc/cxllib.h>
17 #define CXL_INVALID_DRA ~0ull
18 #define CXL_DUMMY_READ_SIZE 128
19 #define CXL_DUMMY_READ_ALIGN 8
20 #define CXL_CAPI_WINDOW_START 0x2000000000000ull
21 #define CXL_CAPI_WINDOW_LOG_SIZE 48
22 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
25 bool cxllib_slot_is_supported(struct pci_dev
*dev
, unsigned long flags
)
29 u64 chip_id
, capp_unit_id
;
31 /* No flags currently supported */
35 if (!cpu_has_feature(CPU_FTR_HVMODE
))
41 if (cxl_slot_is_switched(dev
))
44 /* on p9, some pci slots are not connected to a CAPP unit */
45 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
51 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported
);
53 static DEFINE_MUTEX(dra_mutex
);
54 static u64 dummy_read_addr
= CXL_INVALID_DRA
;
56 static int allocate_dummy_read_buf(void)
62 * Dummy read buffer is 128-byte long, aligned on a
63 * 256-byte boundary and we need the physical address.
65 buf_size
= CXL_DUMMY_READ_SIZE
+ (1ull << CXL_DUMMY_READ_ALIGN
);
66 buf
= (u64
) kzalloc(buf_size
, GFP_KERNEL
);
70 vaddr
= (buf
+ (1ull << CXL_DUMMY_READ_ALIGN
) - 1) &
71 (~0ull << CXL_DUMMY_READ_ALIGN
);
73 WARN((vaddr
+ CXL_DUMMY_READ_SIZE
) > (buf
+ buf_size
),
74 "Dummy read buffer alignment issue");
75 dummy_read_addr
= virt_to_phys((void *) vaddr
);
79 int cxllib_get_xsl_config(struct pci_dev
*dev
, struct cxllib_xsl_config
*cfg
)
83 u64 chip_id
, capp_unit_id
;
85 if (!cpu_has_feature(CPU_FTR_HVMODE
))
88 mutex_lock(&dra_mutex
);
89 if (dummy_read_addr
== CXL_INVALID_DRA
) {
90 rc
= allocate_dummy_read_buf();
92 mutex_unlock(&dra_mutex
);
96 mutex_unlock(&dra_mutex
);
98 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
102 rc
= cxl_get_xsl9_dsnctl(capp_unit_id
, &cfg
->dsnctl
);
105 if (cpu_has_feature(CPU_FTR_POWER9_DD1
)) {
106 /* workaround for DD1 - nbwind = capiind */
107 cfg
->dsnctl
|= ((u64
)0x02 << (63-47));
110 cfg
->version
= CXL_XSL_CONFIG_CURRENT_VERSION
;
111 cfg
->log_bar_size
= CXL_CAPI_WINDOW_LOG_SIZE
;
112 cfg
->bar_addr
= CXL_CAPI_WINDOW_START
;
113 cfg
->dra
= dummy_read_addr
;
116 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config
);
118 int cxllib_switch_phb_mode(struct pci_dev
*dev
, enum cxllib_mode mode
,
123 if (!cpu_has_feature(CPU_FTR_HVMODE
))
129 * We currently don't support going back to PCI mode
130 * However, we'll turn the invalidations off, so that
131 * the firmware doesn't have to ack them and can do
132 * things like reset, etc.. with no worries.
133 * So always return EPERM (can't go back to PCI) or
134 * EBUSY if we couldn't even turn off snooping
136 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_OFF
);
143 /* DMA only supported on TVT1 for the time being */
144 if (flags
!= CXL_MODE_DMA_TVT1
)
146 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_DMA_TVT1
);
149 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_ON
);
156 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode
);
159 * When switching the PHB to capi mode, the TVT#1 entry for
160 * the Partitionable Endpoint is set in bypass mode, like
162 * Configure the device dma to use TVT#1, which is done
163 * by calling dma_set_mask() with a mask large enough.
165 int cxllib_set_device_dma(struct pci_dev
*dev
, unsigned long flags
)
172 rc
= dma_set_mask(&dev
->dev
, DMA_BIT_MASK(64));
175 EXPORT_SYMBOL_GPL(cxllib_set_device_dma
);
177 int cxllib_get_PE_attributes(struct task_struct
*task
,
178 unsigned long translation_mode
,
179 struct cxllib_pe_attributes
*attr
)
181 struct mm_struct
*mm
= NULL
;
183 if (translation_mode
!= CXL_TRANSLATED_MODE
&&
184 translation_mode
!= CXL_REAL_MODE
)
187 attr
->sr
= cxl_calculate_sr(false,
189 translation_mode
== CXL_REAL_MODE
,
191 attr
->lpid
= mfspr(SPRN_LPID
);
193 mm
= get_task_mm(task
);
197 * Caller is keeping a reference on mm_users for as long
198 * as XSL uses the memory context
200 attr
->pid
= mm
->context
.id
;
202 attr
->tid
= task
->thread
.tidr
;
209 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes
);
211 static int get_vma_info(struct mm_struct
*mm
, u64 addr
,
212 u64
*vma_start
, u64
*vma_end
,
213 unsigned long *page_size
)
215 struct vm_area_struct
*vma
= NULL
;
218 down_read(&mm
->mmap_sem
);
220 vma
= find_vma(mm
, addr
);
225 *page_size
= vma_kernel_pagesize(vma
);
226 *vma_start
= vma
->vm_start
;
227 *vma_end
= vma
->vm_end
;
229 up_read(&mm
->mmap_sem
);
233 int cxllib_handle_fault(struct mm_struct
*mm
, u64 addr
, u64 size
, u64 flags
)
236 u64 dar
, vma_start
, vma_end
;
237 unsigned long page_size
;
243 * The buffer we have to process can extend over several pages
244 * and may also cover several VMAs.
245 * We iterate over all the pages. The page size could vary
248 rc
= get_vma_info(mm
, addr
, &vma_start
, &vma_end
, &page_size
);
252 for (dar
= (addr
& ~(page_size
- 1)); dar
< (addr
+ size
);
254 if (dar
< vma_start
|| dar
>= vma_end
) {
256 * We don't hold the mm->mmap_sem semaphore
257 * while iterating, since the semaphore is
258 * required by one of the lower-level page
259 * fault processing functions and it could
262 * It means the VMAs can be altered between 2
263 * loop iterations and we could theoretically
264 * miss a page (however unlikely). But that's
265 * not really a problem, as the driver will
266 * retry access, get another page fault on the
267 * missing page and call us again.
269 rc
= get_vma_info(mm
, dar
, &vma_start
, &vma_end
,
275 rc
= cxl_handle_mm_fault(mm
, flags
, dar
);
281 EXPORT_SYMBOL_GPL(cxllib_handle_fault
);