Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / drivers / iommu / iommufd / ioas.c
blob1542c5fd10a85cac4e20d37cc0f3fa1f904e0dd6
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
4 */
5 #include <linux/file.h>
6 #include <linux/interval_tree.h>
7 #include <linux/iommu.h>
8 #include <linux/iommufd.h>
9 #include <uapi/linux/iommufd.h>
11 #include "io_pagetable.h"
13 void iommufd_ioas_destroy(struct iommufd_object *obj)
15 struct iommufd_ioas *ioas = container_of(obj, struct iommufd_ioas, obj);
16 int rc;
18 rc = iopt_unmap_all(&ioas->iopt, NULL);
19 WARN_ON(rc && rc != -ENOENT);
20 iopt_destroy_table(&ioas->iopt);
21 mutex_destroy(&ioas->mutex);
24 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
26 struct iommufd_ioas *ioas;
28 ioas = iommufd_object_alloc(ictx, ioas, IOMMUFD_OBJ_IOAS);
29 if (IS_ERR(ioas))
30 return ioas;
32 iopt_init_table(&ioas->iopt);
33 INIT_LIST_HEAD(&ioas->hwpt_list);
34 mutex_init(&ioas->mutex);
35 return ioas;
38 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd)
40 struct iommu_ioas_alloc *cmd = ucmd->cmd;
41 struct iommufd_ioas *ioas;
42 int rc;
44 if (cmd->flags)
45 return -EOPNOTSUPP;
47 ioas = iommufd_ioas_alloc(ucmd->ictx);
48 if (IS_ERR(ioas))
49 return PTR_ERR(ioas);
51 cmd->out_ioas_id = ioas->obj.id;
52 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
53 if (rc)
54 goto out_table;
56 down_read(&ucmd->ictx->ioas_creation_lock);
57 iommufd_object_finalize(ucmd->ictx, &ioas->obj);
58 up_read(&ucmd->ictx->ioas_creation_lock);
59 return 0;
61 out_table:
62 iommufd_object_abort_and_destroy(ucmd->ictx, &ioas->obj);
63 return rc;
66 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd)
68 struct iommu_iova_range __user *ranges;
69 struct iommu_ioas_iova_ranges *cmd = ucmd->cmd;
70 struct iommufd_ioas *ioas;
71 struct interval_tree_span_iter span;
72 u32 max_iovas;
73 int rc;
75 if (cmd->__reserved)
76 return -EOPNOTSUPP;
78 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
79 if (IS_ERR(ioas))
80 return PTR_ERR(ioas);
82 down_read(&ioas->iopt.iova_rwsem);
83 max_iovas = cmd->num_iovas;
84 ranges = u64_to_user_ptr(cmd->allowed_iovas);
85 cmd->num_iovas = 0;
86 cmd->out_iova_alignment = ioas->iopt.iova_alignment;
87 interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0,
88 ULONG_MAX) {
89 if (!span.is_hole)
90 continue;
91 if (cmd->num_iovas < max_iovas) {
92 struct iommu_iova_range elm = {
93 .start = span.start_hole,
94 .last = span.last_hole,
97 if (copy_to_user(&ranges[cmd->num_iovas], &elm,
98 sizeof(elm))) {
99 rc = -EFAULT;
100 goto out_put;
103 cmd->num_iovas++;
105 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
106 if (rc)
107 goto out_put;
108 if (cmd->num_iovas > max_iovas)
109 rc = -EMSGSIZE;
110 out_put:
111 up_read(&ioas->iopt.iova_rwsem);
112 iommufd_put_object(ucmd->ictx, &ioas->obj);
113 return rc;
116 static int iommufd_ioas_load_iovas(struct rb_root_cached *itree,
117 struct iommu_iova_range __user *ranges,
118 u32 num)
120 u32 i;
122 for (i = 0; i != num; i++) {
123 struct iommu_iova_range range;
124 struct iopt_allowed *allowed;
126 if (copy_from_user(&range, ranges + i, sizeof(range)))
127 return -EFAULT;
129 if (range.start >= range.last)
130 return -EINVAL;
132 if (interval_tree_iter_first(itree, range.start, range.last))
133 return -EINVAL;
135 allowed = kzalloc(sizeof(*allowed), GFP_KERNEL_ACCOUNT);
136 if (!allowed)
137 return -ENOMEM;
138 allowed->node.start = range.start;
139 allowed->node.last = range.last;
141 interval_tree_insert(&allowed->node, itree);
143 return 0;
146 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd)
148 struct iommu_ioas_allow_iovas *cmd = ucmd->cmd;
149 struct rb_root_cached allowed_iova = RB_ROOT_CACHED;
150 struct interval_tree_node *node;
151 struct iommufd_ioas *ioas;
152 struct io_pagetable *iopt;
153 int rc = 0;
155 if (cmd->__reserved)
156 return -EOPNOTSUPP;
158 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
159 if (IS_ERR(ioas))
160 return PTR_ERR(ioas);
161 iopt = &ioas->iopt;
163 rc = iommufd_ioas_load_iovas(&allowed_iova,
164 u64_to_user_ptr(cmd->allowed_iovas),
165 cmd->num_iovas);
166 if (rc)
167 goto out_free;
170 * We want the allowed tree update to be atomic, so we have to keep the
171 * original nodes around, and keep track of the new nodes as we allocate
172 * memory for them. The simplest solution is to have a new/old tree and
173 * then swap new for old. On success we free the old tree, on failure we
174 * free the new tree.
176 rc = iopt_set_allow_iova(iopt, &allowed_iova);
177 out_free:
178 while ((node = interval_tree_iter_first(&allowed_iova, 0, ULONG_MAX))) {
179 interval_tree_remove(node, &allowed_iova);
180 kfree(container_of(node, struct iopt_allowed, node));
182 iommufd_put_object(ucmd->ictx, &ioas->obj);
183 return rc;
186 static int conv_iommu_prot(u32 map_flags)
189 * We provide no manual cache coherency ioctls to userspace and most
190 * architectures make the CPU ops for cache flushing privileged.
191 * Therefore we require the underlying IOMMU to support CPU coherent
192 * operation. Support for IOMMU_CACHE is enforced by the
193 * IOMMU_CAP_CACHE_COHERENCY test during bind.
195 int iommu_prot = IOMMU_CACHE;
197 if (map_flags & IOMMU_IOAS_MAP_WRITEABLE)
198 iommu_prot |= IOMMU_WRITE;
199 if (map_flags & IOMMU_IOAS_MAP_READABLE)
200 iommu_prot |= IOMMU_READ;
201 return iommu_prot;
204 int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd)
206 struct iommu_ioas_map_file *cmd = ucmd->cmd;
207 unsigned long iova = cmd->iova;
208 struct iommufd_ioas *ioas;
209 unsigned int flags = 0;
210 struct file *file;
211 int rc;
213 if (cmd->flags &
214 ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
215 IOMMU_IOAS_MAP_READABLE))
216 return -EOPNOTSUPP;
218 if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX)
219 return -EOVERFLOW;
221 if (!(cmd->flags &
222 (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)))
223 return -EINVAL;
225 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
226 if (IS_ERR(ioas))
227 return PTR_ERR(ioas);
229 if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
230 flags = IOPT_ALLOC_IOVA;
232 file = fget(cmd->fd);
233 if (!file)
234 return -EBADF;
236 rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, file,
237 cmd->start, cmd->length,
238 conv_iommu_prot(cmd->flags), flags);
239 if (rc)
240 goto out_put;
242 cmd->iova = iova;
243 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
244 out_put:
245 iommufd_put_object(ucmd->ictx, &ioas->obj);
246 fput(file);
247 return rc;
250 int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
252 struct iommu_ioas_map *cmd = ucmd->cmd;
253 unsigned long iova = cmd->iova;
254 struct iommufd_ioas *ioas;
255 unsigned int flags = 0;
256 int rc;
258 if ((cmd->flags &
259 ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
260 IOMMU_IOAS_MAP_READABLE)) ||
261 cmd->__reserved)
262 return -EOPNOTSUPP;
263 if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX)
264 return -EOVERFLOW;
266 if (!(cmd->flags &
267 (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)))
268 return -EINVAL;
270 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
271 if (IS_ERR(ioas))
272 return PTR_ERR(ioas);
274 if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
275 flags = IOPT_ALLOC_IOVA;
276 rc = iopt_map_user_pages(ucmd->ictx, &ioas->iopt, &iova,
277 u64_to_user_ptr(cmd->user_va), cmd->length,
278 conv_iommu_prot(cmd->flags), flags);
279 if (rc)
280 goto out_put;
282 cmd->iova = iova;
283 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
284 out_put:
285 iommufd_put_object(ucmd->ictx, &ioas->obj);
286 return rc;
289 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
291 struct iommu_ioas_copy *cmd = ucmd->cmd;
292 struct iommufd_ioas *src_ioas;
293 struct iommufd_ioas *dst_ioas;
294 unsigned int flags = 0;
295 LIST_HEAD(pages_list);
296 unsigned long iova;
297 int rc;
299 iommufd_test_syz_conv_iova_id(ucmd, cmd->src_ioas_id, &cmd->src_iova,
300 &cmd->flags);
302 if ((cmd->flags &
303 ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
304 IOMMU_IOAS_MAP_READABLE)))
305 return -EOPNOTSUPP;
306 if (cmd->length >= ULONG_MAX || cmd->src_iova >= ULONG_MAX ||
307 cmd->dst_iova >= ULONG_MAX)
308 return -EOVERFLOW;
310 if (!(cmd->flags &
311 (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)))
312 return -EINVAL;
314 src_ioas = iommufd_get_ioas(ucmd->ictx, cmd->src_ioas_id);
315 if (IS_ERR(src_ioas))
316 return PTR_ERR(src_ioas);
317 rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length,
318 &pages_list);
319 iommufd_put_object(ucmd->ictx, &src_ioas->obj);
320 if (rc)
321 return rc;
323 dst_ioas = iommufd_get_ioas(ucmd->ictx, cmd->dst_ioas_id);
324 if (IS_ERR(dst_ioas)) {
325 rc = PTR_ERR(dst_ioas);
326 goto out_pages;
329 if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
330 flags = IOPT_ALLOC_IOVA;
331 iova = cmd->dst_iova;
332 rc = iopt_map_pages(&dst_ioas->iopt, &pages_list, cmd->length, &iova,
333 conv_iommu_prot(cmd->flags), flags);
334 if (rc)
335 goto out_put_dst;
337 cmd->dst_iova = iova;
338 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
339 out_put_dst:
340 iommufd_put_object(ucmd->ictx, &dst_ioas->obj);
341 out_pages:
342 iopt_free_pages_list(&pages_list);
343 return rc;
346 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
348 struct iommu_ioas_unmap *cmd = ucmd->cmd;
349 struct iommufd_ioas *ioas;
350 unsigned long unmapped = 0;
351 int rc;
353 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
354 if (IS_ERR(ioas))
355 return PTR_ERR(ioas);
357 if (cmd->iova == 0 && cmd->length == U64_MAX) {
358 rc = iopt_unmap_all(&ioas->iopt, &unmapped);
359 if (rc)
360 goto out_put;
361 } else {
362 if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) {
363 rc = -EOVERFLOW;
364 goto out_put;
366 rc = iopt_unmap_iova(&ioas->iopt, cmd->iova, cmd->length,
367 &unmapped);
368 if (rc)
369 goto out_put;
372 cmd->length = unmapped;
373 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
375 out_put:
376 iommufd_put_object(ucmd->ictx, &ioas->obj);
377 return rc;
380 static void iommufd_release_all_iova_rwsem(struct iommufd_ctx *ictx,
381 struct xarray *ioas_list)
383 struct iommufd_ioas *ioas;
384 unsigned long index;
386 xa_for_each(ioas_list, index, ioas) {
387 up_write(&ioas->iopt.iova_rwsem);
388 refcount_dec(&ioas->obj.users);
390 up_write(&ictx->ioas_creation_lock);
391 xa_destroy(ioas_list);
394 static int iommufd_take_all_iova_rwsem(struct iommufd_ctx *ictx,
395 struct xarray *ioas_list)
397 struct iommufd_object *obj;
398 unsigned long index;
399 int rc;
402 * This is very ugly, it is done instead of adding a lock around
403 * pages->source_mm, which is a performance path for mdev, we just
404 * obtain the write side of all the iova_rwsems which also protects the
405 * pages->source_*. Due to copies we can't know which IOAS could read
406 * from the pages, so we just lock everything. This is the only place
407 * locks are nested and they are uniformly taken in ID order.
409 * ioas_creation_lock prevents new IOAS from being installed in the
410 * xarray while we do this, and also prevents more than one thread from
411 * holding nested locks.
413 down_write(&ictx->ioas_creation_lock);
414 xa_lock(&ictx->objects);
415 xa_for_each(&ictx->objects, index, obj) {
416 struct iommufd_ioas *ioas;
418 if (!obj || obj->type != IOMMUFD_OBJ_IOAS)
419 continue;
421 if (!refcount_inc_not_zero(&obj->users))
422 continue;
424 xa_unlock(&ictx->objects);
426 ioas = container_of(obj, struct iommufd_ioas, obj);
427 down_write_nest_lock(&ioas->iopt.iova_rwsem,
428 &ictx->ioas_creation_lock);
430 rc = xa_err(xa_store(ioas_list, index, ioas, GFP_KERNEL));
431 if (rc) {
432 iommufd_release_all_iova_rwsem(ictx, ioas_list);
433 return rc;
436 xa_lock(&ictx->objects);
438 xa_unlock(&ictx->objects);
439 return 0;
442 static bool need_charge_update(struct iopt_pages *pages)
444 switch (pages->account_mode) {
445 case IOPT_PAGES_ACCOUNT_NONE:
446 return false;
447 case IOPT_PAGES_ACCOUNT_MM:
448 return pages->source_mm != current->mm;
449 case IOPT_PAGES_ACCOUNT_USER:
451 * Update when mm changes because it also accounts
452 * in mm->pinned_vm.
454 return (pages->source_user != current_user()) ||
455 (pages->source_mm != current->mm);
457 return true;
460 static int charge_current(unsigned long *npinned)
462 struct iopt_pages tmp = {
463 .source_mm = current->mm,
464 .source_task = current->group_leader,
465 .source_user = current_user(),
467 unsigned int account_mode;
468 int rc;
470 for (account_mode = 0; account_mode != IOPT_PAGES_ACCOUNT_MODE_NUM;
471 account_mode++) {
472 if (!npinned[account_mode])
473 continue;
475 tmp.account_mode = account_mode;
476 rc = iopt_pages_update_pinned(&tmp, npinned[account_mode], true,
477 NULL);
478 if (rc)
479 goto err_undo;
481 return 0;
483 err_undo:
484 while (account_mode != 0) {
485 account_mode--;
486 if (!npinned[account_mode])
487 continue;
488 tmp.account_mode = account_mode;
489 iopt_pages_update_pinned(&tmp, npinned[account_mode], false,
490 NULL);
492 return rc;
495 static void change_mm(struct iopt_pages *pages)
497 struct task_struct *old_task = pages->source_task;
498 struct user_struct *old_user = pages->source_user;
499 struct mm_struct *old_mm = pages->source_mm;
501 pages->source_mm = current->mm;
502 mmgrab(pages->source_mm);
503 mmdrop(old_mm);
505 pages->source_task = current->group_leader;
506 get_task_struct(pages->source_task);
507 put_task_struct(old_task);
509 pages->source_user = get_uid(current_user());
510 free_uid(old_user);
513 #define for_each_ioas_area(_xa, _index, _ioas, _area) \
514 xa_for_each((_xa), (_index), (_ioas)) \
515 for (_area = iopt_area_iter_first(&_ioas->iopt, 0, ULONG_MAX); \
516 _area; \
517 _area = iopt_area_iter_next(_area, 0, ULONG_MAX))
519 int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd)
521 struct iommu_ioas_change_process *cmd = ucmd->cmd;
522 struct iommufd_ctx *ictx = ucmd->ictx;
523 unsigned long all_npinned[IOPT_PAGES_ACCOUNT_MODE_NUM] = {};
524 struct iommufd_ioas *ioas;
525 struct iopt_area *area;
526 struct iopt_pages *pages;
527 struct xarray ioas_list;
528 unsigned long index;
529 int rc;
531 if (cmd->__reserved)
532 return -EOPNOTSUPP;
534 xa_init(&ioas_list);
535 rc = iommufd_take_all_iova_rwsem(ictx, &ioas_list);
536 if (rc)
537 return rc;
539 for_each_ioas_area(&ioas_list, index, ioas, area) {
540 if (area->pages->type != IOPT_ADDRESS_FILE) {
541 rc = -EINVAL;
542 goto out;
547 * Count last_pinned pages, then clear it to avoid double counting
548 * if the same iopt_pages is visited multiple times in this loop.
549 * Since we are under all the locks, npinned == last_npinned, so we
550 * can easily restore last_npinned before we return.
552 for_each_ioas_area(&ioas_list, index, ioas, area) {
553 pages = area->pages;
555 if (need_charge_update(pages)) {
556 all_npinned[pages->account_mode] += pages->last_npinned;
557 pages->last_npinned = 0;
561 rc = charge_current(all_npinned);
563 if (rc) {
564 /* Charge failed. Fix last_npinned and bail. */
565 for_each_ioas_area(&ioas_list, index, ioas, area)
566 area->pages->last_npinned = area->pages->npinned;
567 goto out;
570 for_each_ioas_area(&ioas_list, index, ioas, area) {
571 pages = area->pages;
573 /* Uncharge the old one (which also restores last_npinned) */
574 if (need_charge_update(pages)) {
575 int r = iopt_pages_update_pinned(pages, pages->npinned,
576 false, NULL);
578 if (WARN_ON(r))
579 rc = r;
581 change_mm(pages);
584 out:
585 iommufd_release_all_iova_rwsem(ictx, &ioas_list);
586 return rc;
589 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
590 struct iommufd_ctx *ictx)
592 if (cmd->object_id)
593 return -EOPNOTSUPP;
595 if (cmd->op == IOMMU_OPTION_OP_GET) {
596 cmd->val64 = ictx->account_mode == IOPT_PAGES_ACCOUNT_MM;
597 return 0;
599 if (cmd->op == IOMMU_OPTION_OP_SET) {
600 int rc = 0;
602 if (!capable(CAP_SYS_RESOURCE))
603 return -EPERM;
605 xa_lock(&ictx->objects);
606 if (!xa_empty(&ictx->objects)) {
607 rc = -EBUSY;
608 } else {
609 if (cmd->val64 == 0)
610 ictx->account_mode = IOPT_PAGES_ACCOUNT_USER;
611 else if (cmd->val64 == 1)
612 ictx->account_mode = IOPT_PAGES_ACCOUNT_MM;
613 else
614 rc = -EINVAL;
616 xa_unlock(&ictx->objects);
618 return rc;
620 return -EOPNOTSUPP;
623 static int iommufd_ioas_option_huge_pages(struct iommu_option *cmd,
624 struct iommufd_ioas *ioas)
626 if (cmd->op == IOMMU_OPTION_OP_GET) {
627 cmd->val64 = !ioas->iopt.disable_large_pages;
628 return 0;
630 if (cmd->op == IOMMU_OPTION_OP_SET) {
631 if (cmd->val64 == 0)
632 return iopt_disable_large_pages(&ioas->iopt);
633 if (cmd->val64 == 1) {
634 iopt_enable_large_pages(&ioas->iopt);
635 return 0;
637 return -EINVAL;
639 return -EOPNOTSUPP;
642 int iommufd_ioas_option(struct iommufd_ucmd *ucmd)
644 struct iommu_option *cmd = ucmd->cmd;
645 struct iommufd_ioas *ioas;
646 int rc = 0;
648 if (cmd->__reserved)
649 return -EOPNOTSUPP;
651 ioas = iommufd_get_ioas(ucmd->ictx, cmd->object_id);
652 if (IS_ERR(ioas))
653 return PTR_ERR(ioas);
655 switch (cmd->option_id) {
656 case IOMMU_OPTION_HUGE_PAGES:
657 rc = iommufd_ioas_option_huge_pages(cmd, ioas);
658 break;
659 default:
660 rc = -EOPNOTSUPP;
663 iommufd_put_object(ucmd->ictx, &ioas->obj);
664 return rc;