arch/powerpc/platforms/powernv/vas-fault.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * VAS Fault handling.
   4  * Copyright 2019, IBM Corporation
   5  */
   6
   7 #define pr_fmt(fmt) "vas: " fmt
   8
   9 #include <linux/kernel.h>
  10 #include <linux/types.h>
  11 #include <linux/slab.h>
  12 #include <linux/uaccess.h>
  13 #include <linux/kthread.h>
  14 #include <linux/sched/signal.h>
  15 #include <linux/mmu_context.h>
  16 #include <asm/icswx.h>
  17
  18 #include "vas.h"
  19
  20 /*
  21  * The maximum FIFO size for fault window can be 8MB
  22  * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS
  23  * instance will be having fault window.
  24  * 8MB FIFO can be used if expects more faults for each VAS
  25  * instance.
  26  */
  27 #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20)
  28
  29 static void dump_crb(struct coprocessor_request_block *crb)
  30 {
  31         struct data_descriptor_entry *dde;
  32         struct nx_fault_stamp *nx;
  33
  34         dde = &crb->source;
  35         pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
  36                 be64_to_cpu(dde->address), be32_to_cpu(dde->length),
  37                 dde->count, dde->index, dde->flags);
  38
  39         dde = &crb->target;
  40         pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
  41                 be64_to_cpu(dde->address), be32_to_cpu(dde->length),
  42                 dde->count, dde->index, dde->flags);
  43
  44         nx = &crb->stamp.nx;
  45         pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
  46                 be32_to_cpu(nx->pswid),
  47                 be64_to_cpu(crb->stamp.nx.fault_storage_addr),
  48                 nx->flags, nx->fault_status);
  49 }
  50
  51 /*
  52  * Update the CSB to indicate a translation error.
  53  *
  54  * User space will be polling on CSB after the request is issued.
  55  * If NX can handle the request without any issues, it updates CSB.
  56  * Whereas if NX encounters page fault, the kernel will handle the
  57  * fault and update CSB with translation error.
  58  *
  59  * If we are unable to update the CSB means copy_to_user failed due to
  60  * invalid csb_addr, send a signal to the process.
  61  */
  62 static void update_csb(struct vas_window *window,
  63                         struct coprocessor_request_block *crb)
  64 {
  65         struct coprocessor_status_block csb;
  66         struct kernel_siginfo info;
  67         struct task_struct *tsk;
  68         void __user *csb_addr;
  69         struct pid *pid;
  70         int rc;
  71
  72         /*
  73          * NX user space windows can not be opened for task->mm=NULL
  74          * and faults will not be generated for kernel requests.
  75          */
  76         if (WARN_ON_ONCE(!window->mm || !window->user_win))
  77                 return;
  78
  79         csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
  80
  81         memset(&csb, 0, sizeof(csb));
  82         csb.cc = CSB_CC_FAULT_ADDRESS;
  83         csb.ce = CSB_CE_TERMINATION;
  84         csb.cs = 0;
  85         csb.count = 0;
  86
  87         /*
  88          * NX operates and returns in BE format as defined CRB struct.
  89          * So saves fault_storage_addr in BE as NX pastes in FIFO and
  90          * expects user space to convert to CPU format.
  91          */
  92         csb.address = crb->stamp.nx.fault_storage_addr;
  93         csb.flags = 0;
  94
  95         pid = window->pid;
  96         tsk = get_pid_task(pid, PIDTYPE_PID);
  97         /*
  98          * Process closes send window after all pending NX requests are
  99          * completed. In multi-thread applications, a child thread can
 100          * open a window and can exit without closing it. May be some
 101          * requests are pending or this window can be used by other
 102          * threads later. We should handle faults if NX encounters
 103          * pages faults on these requests. Update CSB with translation
 104          * error and fault address. If csb_addr passed by user space is
 105          * invalid, send SEGV signal to pid saved in window. If the
 106          * child thread is not running, send the signal to tgid.
 107          * Parent thread (tgid) will close this window upon its exit.
 108          *
 109          * pid and mm references are taken when window is opened by
 110          * process (pid). So tgid is used only when child thread opens
 111          * a window and exits without closing it.
 112          */
 113         if (!tsk) {
 114                 pid = window->tgid;
 115                 tsk = get_pid_task(pid, PIDTYPE_PID);
 116                 /*
 117                  * Parent thread (tgid) will be closing window when it
 118                  * exits. So should not get here.
 119                  */
 120                 if (WARN_ON_ONCE(!tsk))
 121                         return;
 122         }
 123
 124         /* Return if the task is exiting. */
 125         if (tsk->flags & PF_EXITING) {
 126                 put_task_struct(tsk);
 127                 return;
 128         }
 129
 130         kthread_use_mm(window->mm);
 131         rc = copy_to_user(csb_addr, &csb, sizeof(csb));
 132         /*
 133          * User space polls on csb.flags (first byte). So add barrier
 134          * then copy first byte with csb flags update.
 135          */
 136         if (!rc) {
 137                 csb.flags = CSB_V;
 138                 /* Make sure update to csb.flags is visible now */
 139                 smp_mb();
 140                 rc = copy_to_user(csb_addr, &csb, sizeof(u8));
 141         }
 142         kthread_unuse_mm(window->mm);
 143         put_task_struct(tsk);
 144
 145         /* Success */
 146         if (!rc)
 147                 return;
 148
 149         pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
 150                         csb_addr, pid_vnr(pid));
 151
 152         clear_siginfo(&info);
 153         info.si_signo = SIGSEGV;
 154         info.si_errno = EFAULT;
 155         info.si_code = SEGV_MAPERR;
 156         info.si_addr = csb_addr;
 157
 158         /*
 159          * process will be polling on csb.flags after request is sent to
 160          * NX. So generally CSB update should not fail except when an
 161          * application passes invalid csb_addr. So an error message will
 162          * be displayed and leave it to user space whether to ignore or
 163          * handle this signal.
 164          */
 165         rcu_read_lock();
 166         rc = kill_pid_info(SIGSEGV, &info, pid);
 167         rcu_read_unlock();
 168
 169         pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
 170                         pid_vnr(pid), rc);
 171 }
 172
 173 static void dump_fifo(struct vas_instance *vinst, void *entry)
 174 {
 175         unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
 176         unsigned long *fifo = entry;
 177         int i;
 178
 179         pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size,
 180                         vinst->fault_fifo_size / CRB_SIZE);
 181
 182         /* Dump 10 CRB entries or until end of FIFO */
 183         pr_err("Fault FIFO Dump:\n");
 184         for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) {
 185                 pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n",
 186                         i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3));
 187         }
 188 }
 189
 190 /*
 191  * Process valid CRBs in fault FIFO.
 192  * NX process user space requests, return credit and update the status
 193  * in CRB. If it encounters transalation error when accessing CRB or
 194  * request buffers, raises interrupt on the CPU to handle the fault.
 195  * It takes credit on fault window, updates nx_fault_stamp in CRB with
 196  * the following information and pastes CRB in fault FIFO.
 197  *
 198  * pswid - window ID of the window on which the request is sent.
 199  * fault_storage_addr - fault address
 200  *
 201  * It can raise a single interrupt for multiple faults. Expects OS to
 202  * process all valid faults and return credit for each fault on user
 203  * space and fault windows. This fault FIFO control will be done with
 204  * credit mechanism. NX can continuously paste CRBs until credits are not
 205  * available on fault window. Otherwise, returns with RMA_reject.
 206  *
 207  * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128)
 208  *
 209  */
 210 irqreturn_t vas_fault_thread_fn(int irq, void *data)
 211 {
 212         struct vas_instance *vinst = data;
 213         struct coprocessor_request_block *crb, *entry;
 214         struct coprocessor_request_block buf;
 215         struct vas_window *window;
 216         unsigned long flags;
 217         void *fifo;
 218
 219         crb = &buf;
 220
 221         /*
 222          * VAS can interrupt with multiple page faults. So process all
 223          * valid CRBs within fault FIFO until reaches invalid CRB.
 224          * We use CCW[0] and pswid to validate validate CRBs:
 225          *
 226          * CCW[0]       Reserved bit. When NX pastes CRB, CCW[0]=0
 227          *              OS sets this bit to 1 after reading CRB.
 228          * pswid        NX assigns window ID. Set pswid to -1 after
 229          *              reading CRB from fault FIFO.
 230          *
 231          * We exit this function if no valid CRBs are available to process.
 232          * So acquire fault_lock and reset fifo_in_progress to 0 before
 233          * exit.
 234          * In case kernel receives another interrupt with different page
 235          * fault, interrupt handler returns with IRQ_HANDLED if
 236          * fifo_in_progress is set. Means these new faults will be
 237          * handled by the current thread. Otherwise set fifo_in_progress
 238          * and return IRQ_WAKE_THREAD to wake up thread.
 239          */
 240         while (true) {
 241                 spin_lock_irqsave(&vinst->fault_lock, flags);
 242                 /*
 243                  * Advance the fault fifo pointer to next CRB.
 244                  * Use CRB_SIZE rather than sizeof(*crb) since the latter is
 245                  * aligned to CRB_ALIGN (256) but the CRB written to by VAS is
 246                  * only CRB_SIZE in len.
 247                  */
 248                 fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE);
 249                 entry = fifo;
 250
 251                 if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY))
 252                         || (entry->ccw & cpu_to_be32(CCW0_INVALID))) {
 253                         vinst->fifo_in_progress = 0;
 254                         spin_unlock_irqrestore(&vinst->fault_lock, flags);
 255                         return IRQ_HANDLED;
 256                 }
 257
 258                 spin_unlock_irqrestore(&vinst->fault_lock, flags);
 259                 vinst->fault_crbs++;
 260                 if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE))
 261                         vinst->fault_crbs = 0;
 262
 263                 memcpy(crb, fifo, CRB_SIZE);
 264                 entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY);
 265                 entry->ccw |= cpu_to_be32(CCW0_INVALID);
 266                 /*
 267                  * Return credit for the fault window.
 268                  */
 269                 vas_return_credit(vinst->fault_win, false);
 270
 271                 pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n",
 272                                 vinst->vas_id, vinst->fault_fifo, fifo,
 273                                 vinst->fault_crbs);
 274
 275                 dump_crb(crb);
 276                 window = vas_pswid_to_window(vinst,
 277                                 be32_to_cpu(crb->stamp.nx.pswid));
 278
 279                 if (IS_ERR(window)) {
 280                         /*
 281                          * We got an interrupt about a specific send
 282                          * window but we can't find that window and we can't
 283                          * even clean it up (return credit on user space
 284                          * window).
 285                          * But we should not get here.
 286                          * TODO: Disable IRQ.
 287                          */
 288                         dump_fifo(vinst, (void *)entry);
 289                         pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n",
 290                                 vinst->vas_id, vinst->fault_fifo, fifo,
 291                                 be32_to_cpu(crb->stamp.nx.pswid),
 292                                 vinst->fault_crbs);
 293
 294                         WARN_ON_ONCE(1);
 295                 } else {
 296                         update_csb(window, crb);
 297                         /*
 298                          * Return credit for send window after processing
 299                          * fault CRB.
 300                          */
 301                         vas_return_credit(window, true);
 302                 }
 303         }
 304 }
 305
 306 irqreturn_t vas_fault_handler(int irq, void *dev_id)
 307 {
 308         struct vas_instance *vinst = dev_id;
 309         irqreturn_t ret = IRQ_WAKE_THREAD;
 310         unsigned long flags;
 311
 312         /*
 313          * NX can generate an interrupt for multiple faults. So the
 314          * fault handler thread process all CRBs until finds invalid
 315          * entry. In case if NX sees continuous faults, it is possible
 316          * that the thread function entered with the first interrupt
 317          * can execute and process all valid CRBs.
 318          * So wake up thread only if the fault thread is not in progress.
 319          */
 320         spin_lock_irqsave(&vinst->fault_lock, flags);
 321
 322         if (vinst->fifo_in_progress)
 323                 ret = IRQ_HANDLED;
 324         else
 325                 vinst->fifo_in_progress = 1;
 326
 327         spin_unlock_irqrestore(&vinst->fault_lock, flags);
 328
 329         return ret;
 330 }
 331
 332 /*
 333  * Fault window is opened per VAS instance. NX pastes fault CRB in fault
 334  * FIFO upon page faults.
 335  */
 336 int vas_setup_fault_window(struct vas_instance *vinst)
 337 {
 338         struct vas_rx_win_attr attr;
 339
 340         vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
 341         vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
 342         if (!vinst->fault_fifo) {
 343                 pr_err("Unable to alloc %d bytes for fault_fifo\n",
 344                                 vinst->fault_fifo_size);
 345                 return -ENOMEM;
 346         }
 347
 348         /*
 349          * Invalidate all CRB entries. NX pastes valid entry for each fault.
 350          */
 351         memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size);
 352         vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT);
 353
 354         attr.rx_fifo_size = vinst->fault_fifo_size;
 355         attr.rx_fifo = vinst->fault_fifo;
 356
 357         /*
 358          * Max creds is based on number of CRBs can fit in the FIFO.
 359          * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds
 360          * will be 0xffff since the receive creds field is 16bits wide.
 361          */
 362         attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE;
 363         attr.lnotify_lpid = 0;
 364         attr.lnotify_pid = mfspr(SPRN_PID);
 365         attr.lnotify_tid = mfspr(SPRN_PID);
 366
 367         vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT,
 368                                         &attr);
 369
 370         if (IS_ERR(vinst->fault_win)) {
 371                 pr_err("VAS: Error %ld opening FaultWin\n",
 372                         PTR_ERR(vinst->fault_win));
 373                 kfree(vinst->fault_fifo);
 374                 return PTR_ERR(vinst->fault_win);
 375         }
 376
 377         pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
 378                         vinst->fault_win->winid, attr.lnotify_lpid,
 379                         attr.lnotify_pid, attr.lnotify_tid);
 380
 381         return 0;
 382 }