xtensa: support DMA buffers in high memory
[cris-mirror.git] / drivers / misc / cxl / api.c
blob753b1a698fc4e3ef74a7ebb718a1997ae8a13a4d
1 /*
2 * Copyright 2014 IBM Corp.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
10 #include <linux/pci.h>
11 #include <linux/slab.h>
12 #include <linux/file.h>
13 #include <misc/cxl.h>
14 #include <linux/msi.h>
15 #include <linux/module.h>
16 #include <linux/mount.h>
17 #include <linux/sched/mm.h>
18 #include <linux/mmu_context.h>
20 #include "cxl.h"
23 * Since we want to track memory mappings to be able to force-unmap
24 * when the AFU is no longer reachable, we need an inode. For devices
25 * opened through the cxl user API, this is not a problem, but a
26 * userland process can also get a cxl fd through the cxl_get_fd()
27 * API, which is used by the cxlflash driver.
29 * Therefore we implement our own simple pseudo-filesystem and inode
30 * allocator. We don't use the anonymous inode, as we need the
31 * meta-data associated with it (address_space) and it is shared by
32 * other drivers/processes, so it could lead to cxl unmapping VMAs
33 * from random processes.
36 #define CXL_PSEUDO_FS_MAGIC 0x1697697f
38 static int cxl_fs_cnt;
39 static struct vfsmount *cxl_vfs_mount;
41 static const struct dentry_operations cxl_fs_dops = {
42 .d_dname = simple_dname,
45 static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
46 const char *dev_name, void *data)
48 return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
49 CXL_PSEUDO_FS_MAGIC);
52 static struct file_system_type cxl_fs_type = {
53 .name = "cxl",
54 .owner = THIS_MODULE,
55 .mount = cxl_fs_mount,
56 .kill_sb = kill_anon_super,
60 void cxl_release_mapping(struct cxl_context *ctx)
62 if (ctx->kernelapi && ctx->mapping)
63 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
66 static struct file *cxl_getfile(const char *name,
67 const struct file_operations *fops,
68 void *priv, int flags)
70 struct qstr this;
71 struct path path;
72 struct file *file;
73 struct inode *inode = NULL;
74 int rc;
76 /* strongly inspired by anon_inode_getfile() */
78 if (fops->owner && !try_module_get(fops->owner))
79 return ERR_PTR(-ENOENT);
81 rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
82 if (rc < 0) {
83 pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
84 file = ERR_PTR(rc);
85 goto err_module;
88 inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
89 if (IS_ERR(inode)) {
90 file = ERR_CAST(inode);
91 goto err_fs;
94 file = ERR_PTR(-ENOMEM);
95 this.name = name;
96 this.len = strlen(name);
97 this.hash = 0;
98 path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this);
99 if (!path.dentry)
100 goto err_inode;
102 path.mnt = mntget(cxl_vfs_mount);
103 d_instantiate(path.dentry, inode);
105 file = alloc_file(&path, OPEN_FMODE(flags), fops);
106 if (IS_ERR(file))
107 goto err_dput;
108 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
109 file->private_data = priv;
111 return file;
113 err_dput:
114 path_put(&path);
115 err_inode:
116 iput(inode);
117 err_fs:
118 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
119 err_module:
120 module_put(fops->owner);
121 return file;
124 struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
126 struct cxl_afu *afu;
127 struct cxl_context *ctx;
128 int rc;
130 afu = cxl_pci_to_afu(dev);
131 if (IS_ERR(afu))
132 return ERR_CAST(afu);
134 ctx = cxl_context_alloc();
135 if (!ctx)
136 return ERR_PTR(-ENOMEM);
138 ctx->kernelapi = true;
140 /* Make it a slave context. We can promote it later? */
141 rc = cxl_context_init(ctx, afu, false);
142 if (rc)
143 goto err_ctx;
145 return ctx;
147 err_ctx:
148 kfree(ctx);
149 return ERR_PTR(rc);
151 EXPORT_SYMBOL_GPL(cxl_dev_context_init);
153 struct cxl_context *cxl_get_context(struct pci_dev *dev)
155 return dev->dev.archdata.cxl_ctx;
157 EXPORT_SYMBOL_GPL(cxl_get_context);
159 int cxl_release_context(struct cxl_context *ctx)
161 if (ctx->status >= STARTED)
162 return -EBUSY;
164 cxl_context_free(ctx);
166 return 0;
168 EXPORT_SYMBOL_GPL(cxl_release_context);
170 static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
172 __u16 range;
173 int r;
175 for (r = 0; r < CXL_IRQ_RANGES; r++) {
176 range = ctx->irqs.range[r];
177 if (num < range) {
178 return ctx->irqs.offset[r] + num;
180 num -= range;
182 return 0;
185 int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
187 if (*ctx == NULL || *afu_irq == 0) {
188 *afu_irq = 1;
189 *ctx = cxl_get_context(pdev);
190 } else {
191 (*afu_irq)++;
192 if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
193 *ctx = list_next_entry(*ctx, extra_irq_contexts);
194 *afu_irq = 1;
197 return cxl_find_afu_irq(*ctx, *afu_irq);
199 /* Exported via cxl_base */
201 int cxl_set_priv(struct cxl_context *ctx, void *priv)
203 if (!ctx)
204 return -EINVAL;
206 ctx->priv = priv;
208 return 0;
210 EXPORT_SYMBOL_GPL(cxl_set_priv);
212 void *cxl_get_priv(struct cxl_context *ctx)
214 if (!ctx)
215 return ERR_PTR(-EINVAL);
217 return ctx->priv;
219 EXPORT_SYMBOL_GPL(cxl_get_priv);
221 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
223 int res;
224 irq_hw_number_t hwirq;
226 if (num == 0)
227 num = ctx->afu->pp_irqs;
228 res = afu_allocate_irqs(ctx, num);
229 if (res)
230 return res;
232 if (!cpu_has_feature(CPU_FTR_HVMODE)) {
233 /* In a guest, the PSL interrupt is not multiplexed. It was
234 * allocated above, and we need to set its handler
236 hwirq = cxl_find_afu_irq(ctx, 0);
237 if (hwirq)
238 cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
241 if (ctx->status == STARTED) {
242 if (cxl_ops->update_ivtes)
243 cxl_ops->update_ivtes(ctx);
244 else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
247 return res;
249 EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
251 void cxl_free_afu_irqs(struct cxl_context *ctx)
253 irq_hw_number_t hwirq;
254 unsigned int virq;
256 if (!cpu_has_feature(CPU_FTR_HVMODE)) {
257 hwirq = cxl_find_afu_irq(ctx, 0);
258 if (hwirq) {
259 virq = irq_find_mapping(NULL, hwirq);
260 if (virq)
261 cxl_unmap_irq(virq, ctx);
264 afu_irq_name_free(ctx);
265 cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
267 EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
269 int cxl_map_afu_irq(struct cxl_context *ctx, int num,
270 irq_handler_t handler, void *cookie, char *name)
272 irq_hw_number_t hwirq;
275 * Find interrupt we are to register.
277 hwirq = cxl_find_afu_irq(ctx, num);
278 if (!hwirq)
279 return -ENOENT;
281 return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
283 EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
285 void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
287 irq_hw_number_t hwirq;
288 unsigned int virq;
290 hwirq = cxl_find_afu_irq(ctx, num);
291 if (!hwirq)
292 return;
294 virq = irq_find_mapping(NULL, hwirq);
295 if (virq)
296 cxl_unmap_irq(virq, cookie);
298 EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
301 * Start a context
302 * Code here similar to afu_ioctl_start_work().
304 int cxl_start_context(struct cxl_context *ctx, u64 wed,
305 struct task_struct *task)
307 int rc = 0;
308 bool kernel = true;
310 pr_devel("%s: pe: %i\n", __func__, ctx->pe);
312 mutex_lock(&ctx->status_mutex);
313 if (ctx->status == STARTED)
314 goto out; /* already started */
317 * Increment the mapped context count for adapter. This also checks
318 * if adapter_context_lock is taken.
320 rc = cxl_adapter_context_get(ctx->afu->adapter);
321 if (rc)
322 goto out;
324 if (task) {
325 ctx->pid = get_task_pid(task, PIDTYPE_PID);
326 kernel = false;
327 ctx->real_mode = false;
329 /* acquire a reference to the task's mm */
330 ctx->mm = get_task_mm(current);
332 /* ensure this mm_struct can't be freed */
333 cxl_context_mm_count_get(ctx);
335 if (ctx->mm) {
336 /* decrement the use count from above */
337 mmput(ctx->mm);
338 /* make TLBIs for this context global */
339 mm_context_add_copro(ctx->mm);
344 * Increment driver use count. Enables global TLBIs for hash
345 * and callbacks to handle the segment table
347 cxl_ctx_get();
349 /* See the comment in afu_ioctl_start_work() */
350 smp_mb();
352 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
353 put_pid(ctx->pid);
354 ctx->pid = NULL;
355 cxl_adapter_context_put(ctx->afu->adapter);
356 cxl_ctx_put();
357 if (task) {
358 cxl_context_mm_count_put(ctx);
359 if (ctx->mm)
360 mm_context_remove_copro(ctx->mm);
362 goto out;
365 ctx->status = STARTED;
366 out:
367 mutex_unlock(&ctx->status_mutex);
368 return rc;
370 EXPORT_SYMBOL_GPL(cxl_start_context);
372 int cxl_process_element(struct cxl_context *ctx)
374 return ctx->external_pe;
376 EXPORT_SYMBOL_GPL(cxl_process_element);
378 /* Stop a context. Returns 0 on success, otherwise -Errno */
379 int cxl_stop_context(struct cxl_context *ctx)
381 return __detach_context(ctx);
383 EXPORT_SYMBOL_GPL(cxl_stop_context);
385 void cxl_set_master(struct cxl_context *ctx)
387 ctx->master = true;
389 EXPORT_SYMBOL_GPL(cxl_set_master);
391 int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
393 if (ctx->status == STARTED) {
395 * We could potentially update the PE and issue an update LLCMD
396 * to support this, but it doesn't seem to have a good use case
397 * since it's trivial to just create a second kernel context
398 * with different translation modes, so until someone convinces
399 * me otherwise:
401 return -EBUSY;
404 ctx->real_mode = real_mode;
405 return 0;
407 EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
409 /* wrappers around afu_* file ops which are EXPORTED */
410 int cxl_fd_open(struct inode *inode, struct file *file)
412 return afu_open(inode, file);
414 EXPORT_SYMBOL_GPL(cxl_fd_open);
415 int cxl_fd_release(struct inode *inode, struct file *file)
417 return afu_release(inode, file);
419 EXPORT_SYMBOL_GPL(cxl_fd_release);
420 long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
422 return afu_ioctl(file, cmd, arg);
424 EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
425 int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
427 return afu_mmap(file, vm);
429 EXPORT_SYMBOL_GPL(cxl_fd_mmap);
430 __poll_t cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
432 return afu_poll(file, poll);
434 EXPORT_SYMBOL_GPL(cxl_fd_poll);
435 ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
436 loff_t *off)
438 return afu_read(file, buf, count, off);
440 EXPORT_SYMBOL_GPL(cxl_fd_read);
442 #define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
444 /* Get a struct file and fd for a context and attach the ops */
445 struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
446 int *fd)
448 struct file *file;
449 int rc, flags, fdtmp;
450 char *name = NULL;
452 /* only allow one per context */
453 if (ctx->mapping)
454 return ERR_PTR(-EEXIST);
456 flags = O_RDWR | O_CLOEXEC;
458 /* This code is similar to anon_inode_getfd() */
459 rc = get_unused_fd_flags(flags);
460 if (rc < 0)
461 return ERR_PTR(rc);
462 fdtmp = rc;
465 * Patch the file ops. Needs to be careful that this is rentrant safe.
467 if (fops) {
468 PATCH_FOPS(open);
469 PATCH_FOPS(poll);
470 PATCH_FOPS(read);
471 PATCH_FOPS(release);
472 PATCH_FOPS(unlocked_ioctl);
473 PATCH_FOPS(compat_ioctl);
474 PATCH_FOPS(mmap);
475 } else /* use default ops */
476 fops = (struct file_operations *)&afu_fops;
478 name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
479 file = cxl_getfile(name, fops, ctx, flags);
480 kfree(name);
481 if (IS_ERR(file))
482 goto err_fd;
484 cxl_context_set_mapping(ctx, file->f_mapping);
485 *fd = fdtmp;
486 return file;
488 err_fd:
489 put_unused_fd(fdtmp);
490 return NULL;
492 EXPORT_SYMBOL_GPL(cxl_get_fd);
494 struct cxl_context *cxl_fops_get_context(struct file *file)
496 return file->private_data;
498 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
500 void cxl_set_driver_ops(struct cxl_context *ctx,
501 struct cxl_afu_driver_ops *ops)
503 WARN_ON(!ops->fetch_event || !ops->event_delivered);
504 atomic_set(&ctx->afu_driver_events, 0);
505 ctx->afu_driver_ops = ops;
507 EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
509 void cxl_context_events_pending(struct cxl_context *ctx,
510 unsigned int new_events)
512 atomic_add(new_events, &ctx->afu_driver_events);
513 wake_up_all(&ctx->wq);
515 EXPORT_SYMBOL_GPL(cxl_context_events_pending);
517 int cxl_start_work(struct cxl_context *ctx,
518 struct cxl_ioctl_start_work *work)
520 int rc;
522 /* code taken from afu_ioctl_start_work */
523 if (!(work->flags & CXL_START_WORK_NUM_IRQS))
524 work->num_interrupts = ctx->afu->pp_irqs;
525 else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
526 (work->num_interrupts > ctx->afu->irqs_max)) {
527 return -EINVAL;
530 rc = afu_register_irqs(ctx, work->num_interrupts);
531 if (rc)
532 return rc;
534 rc = cxl_start_context(ctx, work->work_element_descriptor, current);
535 if (rc < 0) {
536 afu_release_irqs(ctx, ctx);
537 return rc;
540 return 0;
542 EXPORT_SYMBOL_GPL(cxl_start_work);
544 void __iomem *cxl_psa_map(struct cxl_context *ctx)
546 if (ctx->status != STARTED)
547 return NULL;
549 pr_devel("%s: psn_phys%llx size:%llx\n",
550 __func__, ctx->psn_phys, ctx->psn_size);
551 return ioremap(ctx->psn_phys, ctx->psn_size);
553 EXPORT_SYMBOL_GPL(cxl_psa_map);
555 void cxl_psa_unmap(void __iomem *addr)
557 iounmap(addr);
559 EXPORT_SYMBOL_GPL(cxl_psa_unmap);
561 int cxl_afu_reset(struct cxl_context *ctx)
563 struct cxl_afu *afu = ctx->afu;
564 int rc;
566 rc = cxl_ops->afu_reset(afu);
567 if (rc)
568 return rc;
570 return cxl_ops->afu_check_and_enable(afu);
572 EXPORT_SYMBOL_GPL(cxl_afu_reset);
574 void cxl_perst_reloads_same_image(struct cxl_afu *afu,
575 bool perst_reloads_same_image)
577 afu->adapter->perst_same_image = perst_reloads_same_image;
579 EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
581 ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
583 struct cxl_afu *afu = cxl_pci_to_afu(dev);
584 if (IS_ERR(afu))
585 return -ENODEV;
587 return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
589 EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
591 int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs)
593 struct cxl_afu *afu = cxl_pci_to_afu(dev);
594 if (IS_ERR(afu))
595 return -ENODEV;
597 if (irqs > afu->adapter->user_irqs)
598 return -EINVAL;
600 /* Limit user_irqs to prevent the user increasing this via sysfs */
601 afu->adapter->user_irqs = irqs;
602 afu->irqs_max = irqs;
604 return 0;
606 EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process);
608 int cxl_get_max_irqs_per_process(struct pci_dev *dev)
610 struct cxl_afu *afu = cxl_pci_to_afu(dev);
611 if (IS_ERR(afu))
612 return -ENODEV;
614 return afu->irqs_max;
616 EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
619 * This is a special interrupt allocation routine called from the PHB's MSI
620 * setup function. When capi interrupts are allocated in this manner they must
621 * still be associated with a running context, but since the MSI APIs have no
622 * way to specify this we use the default context associated with the device.
624 * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
625 * interrupt number, so in order to overcome this their driver informs us of
626 * the restriction by setting the maximum interrupts per context, and we
627 * allocate additional contexts as necessary so that we can keep the AFU
628 * interrupt number within the supported range.
630 int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
632 struct cxl_context *ctx, *new_ctx, *default_ctx;
633 int remaining;
634 int rc;
636 ctx = default_ctx = cxl_get_context(pdev);
637 if (WARN_ON(!default_ctx))
638 return -ENODEV;
640 remaining = nvec;
641 while (remaining > 0) {
642 rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
643 if (rc) {
644 pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
645 return rc;
647 remaining -= ctx->afu->irqs_max;
649 if (ctx != default_ctx && default_ctx->status == STARTED) {
650 WARN_ON(cxl_start_context(ctx,
651 be64_to_cpu(default_ctx->elem->common.wed),
652 NULL));
655 if (remaining > 0) {
656 new_ctx = cxl_dev_context_init(pdev);
657 if (IS_ERR(new_ctx)) {
658 pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
659 return -ENOSPC;
661 list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
662 ctx = new_ctx;
666 return 0;
668 /* Exported via cxl_base */
670 void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
672 struct cxl_context *ctx, *pos, *tmp;
674 ctx = cxl_get_context(pdev);
675 if (WARN_ON(!ctx))
676 return;
678 cxl_free_afu_irqs(ctx);
679 list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
680 cxl_stop_context(pos);
681 cxl_free_afu_irqs(pos);
682 list_del(&pos->extra_irq_contexts);
683 cxl_release_context(pos);
686 /* Exported via cxl_base */