1 // SPDX-License-Identifier: GPL-2.0+
4 * Copyright 2019, IBM Corporation
7 #define pr_fmt(fmt) "vas: " fmt
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/uaccess.h>
13 #include <linux/kthread.h>
14 #include <linux/sched/signal.h>
15 #include <linux/mmu_context.h>
16 #include <asm/icswx.h>
21 * The maximum FIFO size for fault window can be 8MB
22 * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS
23 * instance will be having fault window.
24 * 8MB FIFO can be used if expects more faults for each VAS
27 #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20)
29 static void dump_crb(struct coprocessor_request_block
*crb
)
31 struct data_descriptor_entry
*dde
;
32 struct nx_fault_stamp
*nx
;
35 pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
36 be64_to_cpu(dde
->address
), be32_to_cpu(dde
->length
),
37 dde
->count
, dde
->index
, dde
->flags
);
40 pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
41 be64_to_cpu(dde
->address
), be32_to_cpu(dde
->length
),
42 dde
->count
, dde
->index
, dde
->flags
);
45 pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
46 be32_to_cpu(nx
->pswid
),
47 be64_to_cpu(crb
->stamp
.nx
.fault_storage_addr
),
48 nx
->flags
, nx
->fault_status
);
52 * Update the CSB to indicate a translation error.
54 * User space will be polling on CSB after the request is issued.
55 * If NX can handle the request without any issues, it updates CSB.
56 * Whereas if NX encounters page fault, the kernel will handle the
57 * fault and update CSB with translation error.
59 * If we are unable to update the CSB means copy_to_user failed due to
60 * invalid csb_addr, send a signal to the process.
62 static void update_csb(struct vas_window
*window
,
63 struct coprocessor_request_block
*crb
)
65 struct coprocessor_status_block csb
;
66 struct kernel_siginfo info
;
67 struct task_struct
*tsk
;
68 void __user
*csb_addr
;
73 * NX user space windows can not be opened for task->mm=NULL
74 * and faults will not be generated for kernel requests.
76 if (WARN_ON_ONCE(!window
->mm
|| !window
->user_win
))
79 csb_addr
= (void __user
*)be64_to_cpu(crb
->csb_addr
);
81 memset(&csb
, 0, sizeof(csb
));
82 csb
.cc
= CSB_CC_FAULT_ADDRESS
;
83 csb
.ce
= CSB_CE_TERMINATION
;
88 * NX operates and returns in BE format as defined CRB struct.
89 * So saves fault_storage_addr in BE as NX pastes in FIFO and
90 * expects user space to convert to CPU format.
92 csb
.address
= crb
->stamp
.nx
.fault_storage_addr
;
96 tsk
= get_pid_task(pid
, PIDTYPE_PID
);
98 * Process closes send window after all pending NX requests are
99 * completed. In multi-thread applications, a child thread can
100 * open a window and can exit without closing it. May be some
101 * requests are pending or this window can be used by other
102 * threads later. We should handle faults if NX encounters
103 * pages faults on these requests. Update CSB with translation
104 * error and fault address. If csb_addr passed by user space is
105 * invalid, send SEGV signal to pid saved in window. If the
106 * child thread is not running, send the signal to tgid.
107 * Parent thread (tgid) will close this window upon its exit.
109 * pid and mm references are taken when window is opened by
110 * process (pid). So tgid is used only when child thread opens
111 * a window and exits without closing it.
115 tsk
= get_pid_task(pid
, PIDTYPE_PID
);
117 * Parent thread (tgid) will be closing window when it
118 * exits. So should not get here.
120 if (WARN_ON_ONCE(!tsk
))
124 /* Return if the task is exiting. */
125 if (tsk
->flags
& PF_EXITING
) {
126 put_task_struct(tsk
);
130 kthread_use_mm(window
->mm
);
131 rc
= copy_to_user(csb_addr
, &csb
, sizeof(csb
));
133 * User space polls on csb.flags (first byte). So add barrier
134 * then copy first byte with csb flags update.
138 /* Make sure update to csb.flags is visible now */
140 rc
= copy_to_user(csb_addr
, &csb
, sizeof(u8
));
142 kthread_unuse_mm(window
->mm
);
143 put_task_struct(tsk
);
149 pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
150 csb_addr
, pid_vnr(pid
));
152 clear_siginfo(&info
);
153 info
.si_signo
= SIGSEGV
;
154 info
.si_errno
= EFAULT
;
155 info
.si_code
= SEGV_MAPERR
;
156 info
.si_addr
= csb_addr
;
159 * process will be polling on csb.flags after request is sent to
160 * NX. So generally CSB update should not fail except when an
161 * application passes invalid csb_addr. So an error message will
162 * be displayed and leave it to user space whether to ignore or
163 * handle this signal.
166 rc
= kill_pid_info(SIGSEGV
, &info
, pid
);
169 pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__
,
173 static void dump_fifo(struct vas_instance
*vinst
, void *entry
)
175 unsigned long *end
= vinst
->fault_fifo
+ vinst
->fault_fifo_size
;
176 unsigned long *fifo
= entry
;
179 pr_err("Fault fifo size %d, Max crbs %d\n", vinst
->fault_fifo_size
,
180 vinst
->fault_fifo_size
/ CRB_SIZE
);
182 /* Dump 10 CRB entries or until end of FIFO */
183 pr_err("Fault FIFO Dump:\n");
184 for (i
= 0; i
< 10*(CRB_SIZE
/8) && fifo
< end
; i
+= 4, fifo
+= 4) {
185 pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n",
186 i
, fifo
, *fifo
, *(fifo
+1), *(fifo
+2), *(fifo
+3));
191 * Process valid CRBs in fault FIFO.
192 * NX process user space requests, return credit and update the status
193 * in CRB. If it encounters transalation error when accessing CRB or
194 * request buffers, raises interrupt on the CPU to handle the fault.
195 * It takes credit on fault window, updates nx_fault_stamp in CRB with
196 * the following information and pastes CRB in fault FIFO.
198 * pswid - window ID of the window on which the request is sent.
199 * fault_storage_addr - fault address
201 * It can raise a single interrupt for multiple faults. Expects OS to
202 * process all valid faults and return credit for each fault on user
203 * space and fault windows. This fault FIFO control will be done with
204 * credit mechanism. NX can continuously paste CRBs until credits are not
205 * available on fault window. Otherwise, returns with RMA_reject.
207 * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128)
210 irqreturn_t
vas_fault_thread_fn(int irq
, void *data
)
212 struct vas_instance
*vinst
= data
;
213 struct coprocessor_request_block
*crb
, *entry
;
214 struct coprocessor_request_block buf
;
215 struct vas_window
*window
;
222 * VAS can interrupt with multiple page faults. So process all
223 * valid CRBs within fault FIFO until reaches invalid CRB.
224 * We use CCW[0] and pswid to validate validate CRBs:
226 * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0
227 * OS sets this bit to 1 after reading CRB.
228 * pswid NX assigns window ID. Set pswid to -1 after
229 * reading CRB from fault FIFO.
231 * We exit this function if no valid CRBs are available to process.
232 * So acquire fault_lock and reset fifo_in_progress to 0 before
234 * In case kernel receives another interrupt with different page
235 * fault, interrupt handler returns with IRQ_HANDLED if
236 * fifo_in_progress is set. Means these new faults will be
237 * handled by the current thread. Otherwise set fifo_in_progress
238 * and return IRQ_WAKE_THREAD to wake up thread.
241 spin_lock_irqsave(&vinst
->fault_lock
, flags
);
243 * Advance the fault fifo pointer to next CRB.
244 * Use CRB_SIZE rather than sizeof(*crb) since the latter is
245 * aligned to CRB_ALIGN (256) but the CRB written to by VAS is
246 * only CRB_SIZE in len.
248 fifo
= vinst
->fault_fifo
+ (vinst
->fault_crbs
* CRB_SIZE
);
251 if ((entry
->stamp
.nx
.pswid
== cpu_to_be32(FIFO_INVALID_ENTRY
))
252 || (entry
->ccw
& cpu_to_be32(CCW0_INVALID
))) {
253 vinst
->fifo_in_progress
= 0;
254 spin_unlock_irqrestore(&vinst
->fault_lock
, flags
);
258 spin_unlock_irqrestore(&vinst
->fault_lock
, flags
);
260 if (vinst
->fault_crbs
== (vinst
->fault_fifo_size
/ CRB_SIZE
))
261 vinst
->fault_crbs
= 0;
263 memcpy(crb
, fifo
, CRB_SIZE
);
264 entry
->stamp
.nx
.pswid
= cpu_to_be32(FIFO_INVALID_ENTRY
);
265 entry
->ccw
|= cpu_to_be32(CCW0_INVALID
);
267 * Return credit for the fault window.
269 vas_return_credit(vinst
->fault_win
, false);
271 pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n",
272 vinst
->vas_id
, vinst
->fault_fifo
, fifo
,
276 window
= vas_pswid_to_window(vinst
,
277 be32_to_cpu(crb
->stamp
.nx
.pswid
));
279 if (IS_ERR(window
)) {
281 * We got an interrupt about a specific send
282 * window but we can't find that window and we can't
283 * even clean it up (return credit on user space
285 * But we should not get here.
288 dump_fifo(vinst
, (void *)entry
);
289 pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n",
290 vinst
->vas_id
, vinst
->fault_fifo
, fifo
,
291 be32_to_cpu(crb
->stamp
.nx
.pswid
),
296 update_csb(window
, crb
);
298 * Return credit for send window after processing
301 vas_return_credit(window
, true);
306 irqreturn_t
vas_fault_handler(int irq
, void *dev_id
)
308 struct vas_instance
*vinst
= dev_id
;
309 irqreturn_t ret
= IRQ_WAKE_THREAD
;
313 * NX can generate an interrupt for multiple faults. So the
314 * fault handler thread process all CRBs until finds invalid
315 * entry. In case if NX sees continuous faults, it is possible
316 * that the thread function entered with the first interrupt
317 * can execute and process all valid CRBs.
318 * So wake up thread only if the fault thread is not in progress.
320 spin_lock_irqsave(&vinst
->fault_lock
, flags
);
322 if (vinst
->fifo_in_progress
)
325 vinst
->fifo_in_progress
= 1;
327 spin_unlock_irqrestore(&vinst
->fault_lock
, flags
);
333 * Fault window is opened per VAS instance. NX pastes fault CRB in fault
334 * FIFO upon page faults.
336 int vas_setup_fault_window(struct vas_instance
*vinst
)
338 struct vas_rx_win_attr attr
;
340 vinst
->fault_fifo_size
= VAS_FAULT_WIN_FIFO_SIZE
;
341 vinst
->fault_fifo
= kzalloc(vinst
->fault_fifo_size
, GFP_KERNEL
);
342 if (!vinst
->fault_fifo
) {
343 pr_err("Unable to alloc %d bytes for fault_fifo\n",
344 vinst
->fault_fifo_size
);
349 * Invalidate all CRB entries. NX pastes valid entry for each fault.
351 memset(vinst
->fault_fifo
, FIFO_INVALID_ENTRY
, vinst
->fault_fifo_size
);
352 vas_init_rx_win_attr(&attr
, VAS_COP_TYPE_FAULT
);
354 attr
.rx_fifo_size
= vinst
->fault_fifo_size
;
355 attr
.rx_fifo
= vinst
->fault_fifo
;
358 * Max creds is based on number of CRBs can fit in the FIFO.
359 * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds
360 * will be 0xffff since the receive creds field is 16bits wide.
362 attr
.wcreds_max
= vinst
->fault_fifo_size
/ CRB_SIZE
;
363 attr
.lnotify_lpid
= 0;
364 attr
.lnotify_pid
= mfspr(SPRN_PID
);
365 attr
.lnotify_tid
= mfspr(SPRN_PID
);
367 vinst
->fault_win
= vas_rx_win_open(vinst
->vas_id
, VAS_COP_TYPE_FAULT
,
370 if (IS_ERR(vinst
->fault_win
)) {
371 pr_err("VAS: Error %ld opening FaultWin\n",
372 PTR_ERR(vinst
->fault_win
));
373 kfree(vinst
->fault_fifo
);
374 return PTR_ERR(vinst
->fault_win
);
377 pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
378 vinst
->fault_win
->winid
, attr
.lnotify_lpid
,
379 attr
.lnotify_pid
, attr
.lnotify_tid
);