2 * Copyright 2017 IBM Corp.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
10 #include <linux/hugetlb.h>
11 #include <linux/sched/mm.h>
12 #include <asm/pnv-pci.h>
13 #include <misc/cxllib.h>
17 #define CXL_INVALID_DRA ~0ull
18 #define CXL_DUMMY_READ_SIZE 128
19 #define CXL_DUMMY_READ_ALIGN 8
20 #define CXL_CAPI_WINDOW_START 0x2000000000000ull
21 #define CXL_CAPI_WINDOW_LOG_SIZE 48
22 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
25 bool cxllib_slot_is_supported(struct pci_dev
*dev
, unsigned long flags
)
29 u64 chip_id
, capp_unit_id
;
31 /* No flags currently supported */
35 if (!cpu_has_feature(CPU_FTR_HVMODE
))
41 if (cxl_slot_is_switched(dev
))
44 /* on p9, some pci slots are not connected to a CAPP unit */
45 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
51 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported
);
53 static DEFINE_MUTEX(dra_mutex
);
54 static u64 dummy_read_addr
= CXL_INVALID_DRA
;
56 static int allocate_dummy_read_buf(void)
62 * Dummy read buffer is 128-byte long, aligned on a
63 * 256-byte boundary and we need the physical address.
65 buf_size
= CXL_DUMMY_READ_SIZE
+ (1ull << CXL_DUMMY_READ_ALIGN
);
66 buf
= (u64
) kzalloc(buf_size
, GFP_KERNEL
);
70 vaddr
= (buf
+ (1ull << CXL_DUMMY_READ_ALIGN
) - 1) &
71 (~0ull << CXL_DUMMY_READ_ALIGN
);
73 WARN((vaddr
+ CXL_DUMMY_READ_SIZE
) > (buf
+ buf_size
),
74 "Dummy read buffer alignment issue");
75 dummy_read_addr
= virt_to_phys((void *) vaddr
);
79 int cxllib_get_xsl_config(struct pci_dev
*dev
, struct cxllib_xsl_config
*cfg
)
83 u64 chip_id
, capp_unit_id
;
85 if (!cpu_has_feature(CPU_FTR_HVMODE
))
88 mutex_lock(&dra_mutex
);
89 if (dummy_read_addr
== CXL_INVALID_DRA
) {
90 rc
= allocate_dummy_read_buf();
92 mutex_unlock(&dra_mutex
);
96 mutex_unlock(&dra_mutex
);
98 rc
= cxl_calc_capp_routing(dev
, &chip_id
, &phb_index
, &capp_unit_id
);
102 rc
= cxl_get_xsl9_dsnctl(dev
, capp_unit_id
, &cfg
->dsnctl
);
106 cfg
->version
= CXL_XSL_CONFIG_CURRENT_VERSION
;
107 cfg
->log_bar_size
= CXL_CAPI_WINDOW_LOG_SIZE
;
108 cfg
->bar_addr
= CXL_CAPI_WINDOW_START
;
109 cfg
->dra
= dummy_read_addr
;
112 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config
);
114 int cxllib_switch_phb_mode(struct pci_dev
*dev
, enum cxllib_mode mode
,
119 if (!cpu_has_feature(CPU_FTR_HVMODE
))
125 * We currently don't support going back to PCI mode
126 * However, we'll turn the invalidations off, so that
127 * the firmware doesn't have to ack them and can do
128 * things like reset, etc.. with no worries.
129 * So always return EPERM (can't go back to PCI) or
130 * EBUSY if we couldn't even turn off snooping
132 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_OFF
);
139 /* DMA only supported on TVT1 for the time being */
140 if (flags
!= CXL_MODE_DMA_TVT1
)
142 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_DMA_TVT1
);
145 rc
= pnv_phb_to_cxl_mode(dev
, OPAL_PHB_CAPI_MODE_SNOOP_ON
);
152 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode
);
155 * When switching the PHB to capi mode, the TVT#1 entry for
156 * the Partitionable Endpoint is set in bypass mode, like
158 * Configure the device dma to use TVT#1, which is done
159 * by calling dma_set_mask() with a mask large enough.
161 int cxllib_set_device_dma(struct pci_dev
*dev
, unsigned long flags
)
168 rc
= dma_set_mask(&dev
->dev
, DMA_BIT_MASK(64));
171 EXPORT_SYMBOL_GPL(cxllib_set_device_dma
);
173 int cxllib_get_PE_attributes(struct task_struct
*task
,
174 unsigned long translation_mode
,
175 struct cxllib_pe_attributes
*attr
)
177 struct mm_struct
*mm
= NULL
;
179 if (translation_mode
!= CXL_TRANSLATED_MODE
&&
180 translation_mode
!= CXL_REAL_MODE
)
183 attr
->sr
= cxl_calculate_sr(false,
185 translation_mode
== CXL_REAL_MODE
,
187 attr
->lpid
= mfspr(SPRN_LPID
);
189 mm
= get_task_mm(task
);
193 * Caller is keeping a reference on mm_users for as long
194 * as XSL uses the memory context
196 attr
->pid
= mm
->context
.id
;
198 attr
->tid
= task
->thread
.tidr
;
205 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes
);
207 static int get_vma_info(struct mm_struct
*mm
, u64 addr
,
208 u64
*vma_start
, u64
*vma_end
,
209 unsigned long *page_size
)
211 struct vm_area_struct
*vma
= NULL
;
214 down_read(&mm
->mmap_sem
);
216 vma
= find_vma(mm
, addr
);
221 *page_size
= vma_kernel_pagesize(vma
);
222 *vma_start
= vma
->vm_start
;
223 *vma_end
= vma
->vm_end
;
225 up_read(&mm
->mmap_sem
);
229 int cxllib_handle_fault(struct mm_struct
*mm
, u64 addr
, u64 size
, u64 flags
)
232 u64 dar
, vma_start
, vma_end
;
233 unsigned long page_size
;
239 * The buffer we have to process can extend over several pages
240 * and may also cover several VMAs.
241 * We iterate over all the pages. The page size could vary
244 rc
= get_vma_info(mm
, addr
, &vma_start
, &vma_end
, &page_size
);
248 for (dar
= (addr
& ~(page_size
- 1)); dar
< (addr
+ size
);
250 if (dar
< vma_start
|| dar
>= vma_end
) {
252 * We don't hold the mm->mmap_sem semaphore
253 * while iterating, since the semaphore is
254 * required by one of the lower-level page
255 * fault processing functions and it could
258 * It means the VMAs can be altered between 2
259 * loop iterations and we could theoretically
260 * miss a page (however unlikely). But that's
261 * not really a problem, as the driver will
262 * retry access, get another page fault on the
263 * missing page and call us again.
265 rc
= get_vma_info(mm
, dar
, &vma_start
, &vma_end
,
271 rc
= cxl_handle_mm_fault(mm
, flags
, dar
);
277 EXPORT_SYMBOL_GPL(cxllib_handle_fault
);