1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2013 Red Hat
4 * Author: Rob Clark <robdclark@gmail.com>
7 #include <linux/file.h>
8 #include <linux/sync_file.h>
9 #include <linux/uaccess.h>
11 #include <drm/drm_drv.h>
12 #include <drm/drm_file.h>
13 #include <drm/drm_syncobj.h>
18 #include "msm_gpu_trace.h"
21 * Cmdstream submission:
24 /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
25 #define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */
26 #define BO_LOCKED 0x4000
27 #define BO_PINNED 0x2000
29 static struct msm_gem_submit
*submit_create(struct drm_device
*dev
,
31 struct msm_gpu_submitqueue
*queue
, uint32_t nr_bos
,
34 struct msm_gem_submit
*submit
;
35 uint64_t sz
= struct_size(submit
, bos
, nr_bos
) +
36 ((u64
)nr_cmds
* sizeof(submit
->cmd
[0]));
41 submit
= kmalloc(sz
, GFP_KERNEL
| __GFP_NOWARN
| __GFP_NORETRY
);
45 kref_init(&submit
->ref
);
47 submit
->aspace
= queue
->ctx
->aspace
;
50 submit
->cmd
= (void *)&submit
->bos
[nr_bos
];
51 submit
->queue
= queue
;
52 submit
->ring
= gpu
->rb
[queue
->prio
];
54 /* initially, until copy_from_user() and bo lookup succeeds: */
58 INIT_LIST_HEAD(&submit
->node
);
59 INIT_LIST_HEAD(&submit
->bo_list
);
64 void __msm_gem_submit_destroy(struct kref
*kref
)
66 struct msm_gem_submit
*submit
=
67 container_of(kref
, struct msm_gem_submit
, ref
);
70 dma_fence_put(submit
->fence
);
72 msm_submitqueue_put(submit
->queue
);
74 for (i
= 0; i
< submit
->nr_cmds
; i
++)
75 kfree(submit
->cmd
[i
].relocs
);
80 static int submit_lookup_objects(struct msm_gem_submit
*submit
,
81 struct drm_msm_gem_submit
*args
, struct drm_file
*file
)
86 for (i
= 0; i
< args
->nr_bos
; i
++) {
87 struct drm_msm_gem_submit_bo submit_bo
;
88 void __user
*userptr
=
89 u64_to_user_ptr(args
->bos
+ (i
* sizeof(submit_bo
)));
91 /* make sure we don't have garbage flags, in case we hit
92 * error path before flags is initialized:
94 submit
->bos
[i
].flags
= 0;
96 if (copy_from_user(&submit_bo
, userptr
, sizeof(submit_bo
))) {
102 /* at least one of READ and/or WRITE flags should be set: */
103 #define MANDATORY_FLAGS (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
105 if ((submit_bo
.flags
& ~MSM_SUBMIT_BO_FLAGS
) ||
106 !(submit_bo
.flags
& MANDATORY_FLAGS
)) {
107 DRM_ERROR("invalid flags: %x\n", submit_bo
.flags
);
113 submit
->bos
[i
].handle
= submit_bo
.handle
;
114 submit
->bos
[i
].flags
= submit_bo
.flags
;
115 /* in validate_objects() we figure out if this is true: */
116 submit
->bos
[i
].iova
= submit_bo
.presumed
;
119 spin_lock(&file
->table_lock
);
121 for (i
= 0; i
< args
->nr_bos
; i
++) {
122 struct drm_gem_object
*obj
;
123 struct msm_gem_object
*msm_obj
;
125 /* normally use drm_gem_object_lookup(), but for bulk lookup
126 * all under single table_lock just hit object_idr directly:
128 obj
= idr_find(&file
->object_idr
, submit
->bos
[i
].handle
);
130 DRM_ERROR("invalid handle %u at index %u\n", submit
->bos
[i
].handle
, i
);
135 msm_obj
= to_msm_bo(obj
);
137 if (!list_empty(&msm_obj
->submit_entry
)) {
138 DRM_ERROR("handle %u at index %u already on submit list\n",
139 submit
->bos
[i
].handle
, i
);
144 drm_gem_object_get(obj
);
146 submit
->bos
[i
].obj
= msm_obj
;
148 list_add_tail(&msm_obj
->submit_entry
, &submit
->bo_list
);
152 spin_unlock(&file
->table_lock
);
160 static int submit_lookup_cmds(struct msm_gem_submit
*submit
,
161 struct drm_msm_gem_submit
*args
, struct drm_file
*file
)
166 for (i
= 0; i
< args
->nr_cmds
; i
++) {
167 struct drm_msm_gem_submit_cmd submit_cmd
;
168 void __user
*userptr
=
169 u64_to_user_ptr(args
->cmds
+ (i
* sizeof(submit_cmd
)));
171 ret
= copy_from_user(&submit_cmd
, userptr
, sizeof(submit_cmd
));
177 /* validate input from userspace: */
178 switch (submit_cmd
.type
) {
179 case MSM_SUBMIT_CMD_BUF
:
180 case MSM_SUBMIT_CMD_IB_TARGET_BUF
:
181 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF
:
184 DRM_ERROR("invalid type: %08x\n", submit_cmd
.type
);
188 if (submit_cmd
.size
% 4) {
189 DRM_ERROR("non-aligned cmdstream buffer size: %u\n",
195 submit
->cmd
[i
].type
= submit_cmd
.type
;
196 submit
->cmd
[i
].size
= submit_cmd
.size
/ 4;
197 submit
->cmd
[i
].offset
= submit_cmd
.submit_offset
/ 4;
198 submit
->cmd
[i
].idx
= submit_cmd
.submit_idx
;
199 submit
->cmd
[i
].nr_relocs
= submit_cmd
.nr_relocs
;
201 sz
= array_size(submit_cmd
.nr_relocs
,
202 sizeof(struct drm_msm_gem_submit_reloc
));
203 /* check for overflow: */
204 if (sz
== SIZE_MAX
) {
208 submit
->cmd
[i
].relocs
= kmalloc(sz
, GFP_KERNEL
);
209 ret
= copy_from_user(submit
->cmd
[i
].relocs
, userptr
, sz
);
220 static void submit_unlock_unpin_bo(struct msm_gem_submit
*submit
,
223 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
225 if (submit
->bos
[i
].flags
& BO_PINNED
)
226 msm_gem_unpin_iova_locked(&msm_obj
->base
, submit
->aspace
);
228 if (submit
->bos
[i
].flags
& BO_LOCKED
)
229 dma_resv_unlock(msm_obj
->base
.resv
);
231 if (backoff
&& !(submit
->bos
[i
].flags
& BO_VALID
))
232 submit
->bos
[i
].iova
= 0;
234 submit
->bos
[i
].flags
&= ~(BO_LOCKED
| BO_PINNED
);
237 /* This is where we make sure all the bo's are reserved and pin'd: */
238 static int submit_lock_objects(struct msm_gem_submit
*submit
)
240 int contended
, slow_locked
= -1, i
, ret
= 0;
243 for (i
= 0; i
< submit
->nr_bos
; i
++) {
244 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
246 if (slow_locked
== i
)
251 if (!(submit
->bos
[i
].flags
& BO_LOCKED
)) {
252 ret
= dma_resv_lock_interruptible(msm_obj
->base
.resv
,
256 submit
->bos
[i
].flags
|= BO_LOCKED
;
260 ww_acquire_done(&submit
->ticket
);
266 submit_unlock_unpin_bo(submit
, i
, true);
269 submit_unlock_unpin_bo(submit
, slow_locked
, true);
271 if (ret
== -EDEADLK
) {
272 struct msm_gem_object
*msm_obj
= submit
->bos
[contended
].obj
;
273 /* we lost out in a seqno race, lock and retry.. */
274 ret
= dma_resv_lock_slow_interruptible(msm_obj
->base
.resv
,
277 submit
->bos
[contended
].flags
|= BO_LOCKED
;
278 slow_locked
= contended
;
286 static int submit_fence_sync(struct msm_gem_submit
*submit
, bool no_implicit
)
290 for (i
= 0; i
< submit
->nr_bos
; i
++) {
291 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
292 bool write
= submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
;
295 /* NOTE: _reserve_shared() must happen before
296 * _add_shared_fence(), which makes this a slightly
297 * strange place to call it. OTOH this is a
298 * convenient can-fail point to hook it in.
300 ret
= dma_resv_reserve_shared(msm_obj
->base
.resv
,
309 ret
= msm_gem_sync_object(&msm_obj
->base
, submit
->ring
->fctx
,
318 static int submit_pin_objects(struct msm_gem_submit
*submit
)
322 submit
->valid
= true;
324 for (i
= 0; i
< submit
->nr_bos
; i
++) {
325 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
328 /* if locking succeeded, pin bo: */
329 ret
= msm_gem_get_and_pin_iova_locked(&msm_obj
->base
,
330 submit
->aspace
, &iova
);
335 submit
->bos
[i
].flags
|= BO_PINNED
;
337 if (iova
== submit
->bos
[i
].iova
) {
338 submit
->bos
[i
].flags
|= BO_VALID
;
340 submit
->bos
[i
].iova
= iova
;
341 /* iova changed, so address in cmdstream is not valid: */
342 submit
->bos
[i
].flags
&= ~BO_VALID
;
343 submit
->valid
= false;
350 static int submit_bo(struct msm_gem_submit
*submit
, uint32_t idx
,
351 struct msm_gem_object
**obj
, uint64_t *iova
, bool *valid
)
353 if (idx
>= submit
->nr_bos
) {
354 DRM_ERROR("invalid buffer index: %u (out of %u)\n",
355 idx
, submit
->nr_bos
);
360 *obj
= submit
->bos
[idx
].obj
;
362 *iova
= submit
->bos
[idx
].iova
;
364 *valid
= !!(submit
->bos
[idx
].flags
& BO_VALID
);
369 /* process the reloc's and patch up the cmdstream as needed: */
370 static int submit_reloc(struct msm_gem_submit
*submit
, struct msm_gem_object
*obj
,
371 uint32_t offset
, uint32_t nr_relocs
, struct drm_msm_gem_submit_reloc
*relocs
)
373 uint32_t i
, last_offset
= 0;
381 DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset
);
385 /* For now, just map the entire thing. Eventually we probably
386 * to do it page-by-page, w/ kmap() if not vmap()d..
388 ptr
= msm_gem_get_vaddr_locked(&obj
->base
);
392 DBG("failed to map: %d", ret
);
396 for (i
= 0; i
< nr_relocs
; i
++) {
397 struct drm_msm_gem_submit_reloc submit_reloc
= relocs
[i
];
402 if (submit_reloc
.submit_offset
% 4) {
403 DRM_ERROR("non-aligned reloc offset: %u\n",
404 submit_reloc
.submit_offset
);
409 /* offset in dwords: */
410 off
= submit_reloc
.submit_offset
/ 4;
412 if ((off
>= (obj
->base
.size
/ 4)) ||
413 (off
< last_offset
)) {
414 DRM_ERROR("invalid offset %u at reloc %u\n", off
, i
);
419 ret
= submit_bo(submit
, submit_reloc
.reloc_idx
, NULL
, &iova
, &valid
);
426 iova
+= submit_reloc
.reloc_offset
;
428 if (submit_reloc
.shift
< 0)
429 iova
>>= -submit_reloc
.shift
;
431 iova
<<= submit_reloc
.shift
;
433 ptr
[off
] = iova
| submit_reloc
.or;
439 msm_gem_put_vaddr_locked(&obj
->base
);
444 static void submit_cleanup(struct msm_gem_submit
*submit
)
448 for (i
= 0; i
< submit
->nr_bos
; i
++) {
449 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
450 submit_unlock_unpin_bo(submit
, i
, false);
451 list_del_init(&msm_obj
->submit_entry
);
452 drm_gem_object_put_locked(&msm_obj
->base
);
457 struct msm_submit_post_dep
{
458 struct drm_syncobj
*syncobj
;
460 struct dma_fence_chain
*chain
;
463 static struct drm_syncobj
**msm_wait_deps(struct drm_device
*dev
,
464 struct drm_file
*file
,
465 uint64_t in_syncobjs_addr
,
466 uint32_t nr_in_syncobjs
,
467 size_t syncobj_stride
,
468 struct msm_ringbuffer
*ring
)
470 struct drm_syncobj
**syncobjs
= NULL
;
471 struct drm_msm_gem_submit_syncobj syncobj_desc
= {0};
475 syncobjs
= kcalloc(nr_in_syncobjs
, sizeof(*syncobjs
),
476 GFP_KERNEL
| __GFP_NOWARN
| __GFP_NORETRY
);
478 return ERR_PTR(-ENOMEM
);
480 for (i
= 0; i
< nr_in_syncobjs
; ++i
) {
481 uint64_t address
= in_syncobjs_addr
+ i
* syncobj_stride
;
482 struct dma_fence
*fence
;
484 if (copy_from_user(&syncobj_desc
,
485 u64_to_user_ptr(address
),
486 min(syncobj_stride
, sizeof(syncobj_desc
)))) {
491 if (syncobj_desc
.point
&&
492 !drm_core_check_feature(dev
, DRIVER_SYNCOBJ_TIMELINE
)) {
497 if (syncobj_desc
.flags
& ~MSM_SUBMIT_SYNCOBJ_FLAGS
) {
502 ret
= drm_syncobj_find_fence(file
, syncobj_desc
.handle
,
503 syncobj_desc
.point
, 0, &fence
);
507 if (!dma_fence_match_context(fence
, ring
->fctx
->context
))
508 ret
= dma_fence_wait(fence
, true);
510 dma_fence_put(fence
);
514 if (syncobj_desc
.flags
& MSM_SUBMIT_SYNCOBJ_RESET
) {
516 drm_syncobj_find(file
, syncobj_desc
.handle
);
525 for (j
= 0; j
<= i
; ++j
) {
527 drm_syncobj_put(syncobjs
[j
]);
535 static void msm_reset_syncobjs(struct drm_syncobj
**syncobjs
,
536 uint32_t nr_syncobjs
)
540 for (i
= 0; syncobjs
&& i
< nr_syncobjs
; ++i
) {
542 drm_syncobj_replace_fence(syncobjs
[i
], NULL
);
546 static struct msm_submit_post_dep
*msm_parse_post_deps(struct drm_device
*dev
,
547 struct drm_file
*file
,
548 uint64_t syncobjs_addr
,
549 uint32_t nr_syncobjs
,
550 size_t syncobj_stride
)
552 struct msm_submit_post_dep
*post_deps
;
553 struct drm_msm_gem_submit_syncobj syncobj_desc
= {0};
557 post_deps
= kmalloc_array(nr_syncobjs
, sizeof(*post_deps
),
558 GFP_KERNEL
| __GFP_NOWARN
| __GFP_NORETRY
);
560 return ERR_PTR(-ENOMEM
);
562 for (i
= 0; i
< nr_syncobjs
; ++i
) {
563 uint64_t address
= syncobjs_addr
+ i
* syncobj_stride
;
565 if (copy_from_user(&syncobj_desc
,
566 u64_to_user_ptr(address
),
567 min(syncobj_stride
, sizeof(syncobj_desc
)))) {
572 post_deps
[i
].point
= syncobj_desc
.point
;
573 post_deps
[i
].chain
= NULL
;
575 if (syncobj_desc
.flags
) {
580 if (syncobj_desc
.point
) {
581 if (!drm_core_check_feature(dev
,
582 DRIVER_SYNCOBJ_TIMELINE
)) {
588 kmalloc(sizeof(*post_deps
[i
].chain
),
590 if (!post_deps
[i
].chain
) {
596 post_deps
[i
].syncobj
=
597 drm_syncobj_find(file
, syncobj_desc
.handle
);
598 if (!post_deps
[i
].syncobj
) {
605 for (j
= 0; j
<= i
; ++j
) {
606 kfree(post_deps
[j
].chain
);
607 if (post_deps
[j
].syncobj
)
608 drm_syncobj_put(post_deps
[j
].syncobj
);
618 static void msm_process_post_deps(struct msm_submit_post_dep
*post_deps
,
619 uint32_t count
, struct dma_fence
*fence
)
623 for (i
= 0; post_deps
&& i
< count
; ++i
) {
624 if (post_deps
[i
].chain
) {
625 drm_syncobj_add_point(post_deps
[i
].syncobj
,
627 fence
, post_deps
[i
].point
);
628 post_deps
[i
].chain
= NULL
;
630 drm_syncobj_replace_fence(post_deps
[i
].syncobj
,
636 int msm_ioctl_gem_submit(struct drm_device
*dev
, void *data
,
637 struct drm_file
*file
)
639 static atomic_t ident
= ATOMIC_INIT(0);
640 struct msm_drm_private
*priv
= dev
->dev_private
;
641 struct drm_msm_gem_submit
*args
= data
;
642 struct msm_file_private
*ctx
= file
->driver_priv
;
643 struct msm_gem_submit
*submit
;
644 struct msm_gpu
*gpu
= priv
->gpu
;
645 struct sync_file
*sync_file
= NULL
;
646 struct msm_gpu_submitqueue
*queue
;
647 struct msm_ringbuffer
*ring
;
648 struct msm_submit_post_dep
*post_deps
= NULL
;
649 struct drm_syncobj
**syncobjs_to_reset
= NULL
;
650 int out_fence_fd
= -1;
651 struct pid
*pid
= get_pid(task_pid(current
));
652 bool has_ww_ticket
= false;
661 /* for now, we just have 3d pipe.. eventually this would need to
662 * be more clever to dispatch to appropriate gpu module:
664 if (MSM_PIPE_ID(args
->flags
) != MSM_PIPE_3D0
)
667 if (MSM_PIPE_FLAGS(args
->flags
) & ~MSM_SUBMIT_FLAGS
)
670 if (args
->flags
& MSM_SUBMIT_SUDO
) {
671 if (!IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO
) ||
672 !capable(CAP_SYS_RAWIO
))
676 queue
= msm_submitqueue_get(ctx
, args
->queueid
);
680 /* Get a unique identifier for the submission for logging purposes */
681 submitid
= atomic_inc_return(&ident
) - 1;
683 ring
= gpu
->rb
[queue
->prio
];
684 trace_msm_gpu_submit(pid_nr(pid
), ring
->id
, submitid
,
685 args
->nr_bos
, args
->nr_cmds
);
687 if (args
->flags
& MSM_SUBMIT_FENCE_FD_IN
) {
688 struct dma_fence
*in_fence
;
690 in_fence
= sync_file_get_fence(args
->fence_fd
);
696 * Wait if the fence is from a foreign context, or if the fence
697 * array contains any fence from a foreign context.
700 if (!dma_fence_match_context(in_fence
, ring
->fctx
->context
))
701 ret
= dma_fence_wait(in_fence
, true);
703 dma_fence_put(in_fence
);
708 if (args
->flags
& MSM_SUBMIT_SYNCOBJ_IN
) {
709 syncobjs_to_reset
= msm_wait_deps(dev
, file
,
711 args
->nr_in_syncobjs
,
712 args
->syncobj_stride
, ring
);
713 if (IS_ERR(syncobjs_to_reset
))
714 return PTR_ERR(syncobjs_to_reset
);
717 if (args
->flags
& MSM_SUBMIT_SYNCOBJ_OUT
) {
718 post_deps
= msm_parse_post_deps(dev
, file
,
720 args
->nr_out_syncobjs
,
721 args
->syncobj_stride
);
722 if (IS_ERR(post_deps
)) {
723 ret
= PTR_ERR(post_deps
);
724 goto out_post_unlock
;
728 ret
= mutex_lock_interruptible(&dev
->struct_mutex
);
730 goto out_post_unlock
;
732 if (args
->flags
& MSM_SUBMIT_FENCE_FD_OUT
) {
733 out_fence_fd
= get_unused_fd_flags(O_CLOEXEC
);
734 if (out_fence_fd
< 0) {
740 submit
= submit_create(dev
, gpu
, queue
, args
->nr_bos
,
748 submit
->ident
= submitid
;
750 if (args
->flags
& MSM_SUBMIT_SUDO
)
751 submit
->in_rb
= true;
753 ret
= submit_lookup_objects(submit
, args
, file
);
757 ret
= submit_lookup_cmds(submit
, args
, file
);
762 * Thanks to dev_pm_opp opp_table_lock interactions with mm->mmap_sem
763 * in the resume path, we need to to rpm get before we lock objs.
764 * Which unfortunately might involve powering up the GPU sooner than
765 * is necessary. But at least in the explicit fencing case, we will
766 * have already done all the fence waiting.
768 pm_runtime_get_sync(&gpu
->pdev
->dev
);
770 /* copy_*_user while holding a ww ticket upsets lockdep */
771 ww_acquire_init(&submit
->ticket
, &reservation_ww_class
);
772 has_ww_ticket
= true;
773 ret
= submit_lock_objects(submit
);
777 ret
= submit_fence_sync(submit
, !!(args
->flags
& MSM_SUBMIT_NO_IMPLICIT
));
781 ret
= submit_pin_objects(submit
);
785 for (i
= 0; i
< args
->nr_cmds
; i
++) {
786 struct msm_gem_object
*msm_obj
;
789 ret
= submit_bo(submit
, submit
->cmd
[i
].idx
,
790 &msm_obj
, &iova
, NULL
);
794 if (!submit
->cmd
[i
].size
||
795 ((submit
->cmd
[i
].size
+ submit
->cmd
[i
].offset
) >
796 msm_obj
->base
.size
/ 4)) {
797 DRM_ERROR("invalid cmdstream size: %u\n", submit
->cmd
[i
].size
* 4);
802 submit
->cmd
[i
].iova
= iova
+ (submit
->cmd
[i
].offset
* 4);
807 ret
= submit_reloc(submit
, msm_obj
, submit
->cmd
[i
].offset
* 4,
808 submit
->cmd
[i
].nr_relocs
, submit
->cmd
[i
].relocs
);
815 submit
->fence
= msm_fence_alloc(ring
->fctx
);
816 if (IS_ERR(submit
->fence
)) {
817 ret
= PTR_ERR(submit
->fence
);
818 submit
->fence
= NULL
;
822 if (args
->flags
& MSM_SUBMIT_FENCE_FD_OUT
) {
823 sync_file
= sync_file_create(submit
->fence
);
830 msm_gpu_submit(gpu
, submit
);
832 args
->fence
= submit
->fence
->seqno
;
834 if (args
->flags
& MSM_SUBMIT_FENCE_FD_OUT
) {
835 fd_install(out_fence_fd
, sync_file
->file
);
836 args
->fence_fd
= out_fence_fd
;
839 msm_reset_syncobjs(syncobjs_to_reset
, args
->nr_in_syncobjs
);
840 msm_process_post_deps(post_deps
, args
->nr_out_syncobjs
,
845 pm_runtime_put(&gpu
->pdev
->dev
);
847 submit_cleanup(submit
);
849 ww_acquire_fini(&submit
->ticket
);
850 msm_gem_submit_put(submit
);
852 if (ret
&& (out_fence_fd
>= 0))
853 put_unused_fd(out_fence_fd
);
854 mutex_unlock(&dev
->struct_mutex
);
857 if (!IS_ERR_OR_NULL(post_deps
)) {
858 for (i
= 0; i
< args
->nr_out_syncobjs
; ++i
) {
859 kfree(post_deps
[i
].chain
);
860 drm_syncobj_put(post_deps
[i
].syncobj
);
865 if (!IS_ERR_OR_NULL(syncobjs_to_reset
)) {
866 for (i
= 0; i
< args
->nr_in_syncobjs
; ++i
) {
867 if (syncobjs_to_reset
[i
])
868 drm_syncobj_put(syncobjs_to_reset
[i
]);
870 kfree(syncobjs_to_reset
);