2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include <linux/module.h>
25 #include <linux/platform_device.h>
26 #include <linux/pm_runtime.h>
27 #include <linux/device.h>
29 #include <linux/sched/signal.h>
31 #include "uapi/drm/vc4_drm.h"
34 #include "vc4_trace.h"
37 vc4_queue_hangcheck(struct drm_device
*dev
)
39 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
41 mod_timer(&vc4
->hangcheck
.timer
,
42 round_jiffies_up(jiffies
+ msecs_to_jiffies(100)));
45 struct vc4_hang_state
{
46 struct drm_vc4_get_hang_state user_state
;
49 struct drm_gem_object
**bo
;
53 vc4_free_hang_state(struct drm_device
*dev
, struct vc4_hang_state
*state
)
57 for (i
= 0; i
< state
->user_state
.bo_count
; i
++)
58 drm_gem_object_put_unlocked(state
->bo
[i
]);
64 vc4_get_hang_state_ioctl(struct drm_device
*dev
, void *data
,
65 struct drm_file
*file_priv
)
67 struct drm_vc4_get_hang_state
*get_state
= data
;
68 struct drm_vc4_get_hang_state_bo
*bo_state
;
69 struct vc4_hang_state
*kernel_state
;
70 struct drm_vc4_get_hang_state
*state
;
71 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
72 unsigned long irqflags
;
76 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
77 kernel_state
= vc4
->hang_state
;
79 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
82 state
= &kernel_state
->user_state
;
84 /* If the user's array isn't big enough, just return the
85 * required array size.
87 if (get_state
->bo_count
< state
->bo_count
) {
88 get_state
->bo_count
= state
->bo_count
;
89 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
93 vc4
->hang_state
= NULL
;
94 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
96 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
97 state
->bo
= get_state
->bo
;
98 memcpy(get_state
, state
, sizeof(*state
));
100 bo_state
= kcalloc(state
->bo_count
, sizeof(*bo_state
), GFP_KERNEL
);
106 for (i
= 0; i
< state
->bo_count
; i
++) {
107 struct vc4_bo
*vc4_bo
= to_vc4_bo(kernel_state
->bo
[i
]);
110 ret
= drm_gem_handle_create(file_priv
, kernel_state
->bo
[i
],
115 goto err_delete_handle
;
117 bo_state
[i
].handle
= handle
;
118 bo_state
[i
].paddr
= vc4_bo
->base
.paddr
;
119 bo_state
[i
].size
= vc4_bo
->base
.base
.size
;
122 if (copy_to_user(u64_to_user_ptr(get_state
->bo
),
124 state
->bo_count
* sizeof(*bo_state
)))
129 for (i
= 0; i
< state
->bo_count
; i
++)
130 drm_gem_handle_delete(file_priv
, bo_state
[i
].handle
);
134 vc4_free_hang_state(dev
, kernel_state
);
141 vc4_save_hang_state(struct drm_device
*dev
)
143 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
144 struct drm_vc4_get_hang_state
*state
;
145 struct vc4_hang_state
*kernel_state
;
146 struct vc4_exec_info
*exec
[2];
148 unsigned long irqflags
;
149 unsigned int i
, j
, k
, unref_list_count
;
151 kernel_state
= kcalloc(1, sizeof(*kernel_state
), GFP_KERNEL
);
155 state
= &kernel_state
->user_state
;
157 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
158 exec
[0] = vc4_first_bin_job(vc4
);
159 exec
[1] = vc4_first_render_job(vc4
);
160 if (!exec
[0] && !exec
[1]) {
161 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
165 /* Get the bos from both binner and renderer into hang state. */
167 for (i
= 0; i
< 2; i
++) {
171 unref_list_count
= 0;
172 list_for_each_entry(bo
, &exec
[i
]->unref_list
, unref_head
)
174 state
->bo_count
+= exec
[i
]->bo_count
+ unref_list_count
;
177 kernel_state
->bo
= kcalloc(state
->bo_count
,
178 sizeof(*kernel_state
->bo
), GFP_ATOMIC
);
180 if (!kernel_state
->bo
) {
181 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
186 for (i
= 0; i
< 2; i
++) {
190 for (j
= 0; j
< exec
[i
]->bo_count
; j
++) {
191 bo
= to_vc4_bo(&exec
[i
]->bo
[j
]->base
);
193 /* Retain BOs just in case they were marked purgeable.
194 * This prevents the BO from being purged before
195 * someone had a chance to dump the hang state.
197 WARN_ON(!refcount_read(&bo
->usecnt
));
198 refcount_inc(&bo
->usecnt
);
199 drm_gem_object_get(&exec
[i
]->bo
[j
]->base
);
200 kernel_state
->bo
[k
++] = &exec
[i
]->bo
[j
]->base
;
203 list_for_each_entry(bo
, &exec
[i
]->unref_list
, unref_head
) {
204 /* No need to retain BOs coming from the ->unref_list
205 * because they are naturally unpurgeable.
207 drm_gem_object_get(&bo
->base
.base
);
208 kernel_state
->bo
[k
++] = &bo
->base
.base
;
212 WARN_ON_ONCE(k
!= state
->bo_count
);
215 state
->start_bin
= exec
[0]->ct0ca
;
217 state
->start_render
= exec
[1]->ct1ca
;
219 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
221 state
->ct0ca
= V3D_READ(V3D_CTNCA(0));
222 state
->ct0ea
= V3D_READ(V3D_CTNEA(0));
224 state
->ct1ca
= V3D_READ(V3D_CTNCA(1));
225 state
->ct1ea
= V3D_READ(V3D_CTNEA(1));
227 state
->ct0cs
= V3D_READ(V3D_CTNCS(0));
228 state
->ct1cs
= V3D_READ(V3D_CTNCS(1));
230 state
->ct0ra0
= V3D_READ(V3D_CT00RA0
);
231 state
->ct1ra0
= V3D_READ(V3D_CT01RA0
);
233 state
->bpca
= V3D_READ(V3D_BPCA
);
234 state
->bpcs
= V3D_READ(V3D_BPCS
);
235 state
->bpoa
= V3D_READ(V3D_BPOA
);
236 state
->bpos
= V3D_READ(V3D_BPOS
);
238 state
->vpmbase
= V3D_READ(V3D_VPMBASE
);
240 state
->dbge
= V3D_READ(V3D_DBGE
);
241 state
->fdbgo
= V3D_READ(V3D_FDBGO
);
242 state
->fdbgb
= V3D_READ(V3D_FDBGB
);
243 state
->fdbgr
= V3D_READ(V3D_FDBGR
);
244 state
->fdbgs
= V3D_READ(V3D_FDBGS
);
245 state
->errstat
= V3D_READ(V3D_ERRSTAT
);
247 /* We need to turn purgeable BOs into unpurgeable ones so that
248 * userspace has a chance to dump the hang state before the kernel
249 * decides to purge those BOs.
250 * Note that BO consistency at dump time cannot be guaranteed. For
251 * example, if the owner of these BOs decides to re-use them or mark
252 * them purgeable again there's nothing we can do to prevent it.
254 for (i
= 0; i
< kernel_state
->user_state
.bo_count
; i
++) {
255 struct vc4_bo
*bo
= to_vc4_bo(kernel_state
->bo
[i
]);
257 if (bo
->madv
== __VC4_MADV_NOTSUPP
)
260 mutex_lock(&bo
->madv_lock
);
261 if (!WARN_ON(bo
->madv
== __VC4_MADV_PURGED
))
262 bo
->madv
= VC4_MADV_WILLNEED
;
263 refcount_dec(&bo
->usecnt
);
264 mutex_unlock(&bo
->madv_lock
);
267 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
268 if (vc4
->hang_state
) {
269 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
270 vc4_free_hang_state(dev
, kernel_state
);
272 vc4
->hang_state
= kernel_state
;
273 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
278 vc4_reset(struct drm_device
*dev
)
280 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
282 DRM_INFO("Resetting GPU.\n");
284 mutex_lock(&vc4
->power_lock
);
285 if (vc4
->power_refcount
) {
286 /* Power the device off and back on the by dropping the
287 * reference on runtime PM.
289 pm_runtime_put_sync_suspend(&vc4
->v3d
->pdev
->dev
);
290 pm_runtime_get_sync(&vc4
->v3d
->pdev
->dev
);
292 mutex_unlock(&vc4
->power_lock
);
296 /* Rearm the hangcheck -- another job might have been waiting
297 * for our hung one to get kicked off, and vc4_irq_reset()
298 * would have started it.
300 vc4_queue_hangcheck(dev
);
304 vc4_reset_work(struct work_struct
*work
)
306 struct vc4_dev
*vc4
=
307 container_of(work
, struct vc4_dev
, hangcheck
.reset_work
);
309 vc4_save_hang_state(vc4
->dev
);
315 vc4_hangcheck_elapsed(struct timer_list
*t
)
317 struct vc4_dev
*vc4
= from_timer(vc4
, t
, hangcheck
.timer
);
318 struct drm_device
*dev
= vc4
->dev
;
319 uint32_t ct0ca
, ct1ca
;
320 unsigned long irqflags
;
321 struct vc4_exec_info
*bin_exec
, *render_exec
;
323 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
325 bin_exec
= vc4_first_bin_job(vc4
);
326 render_exec
= vc4_first_render_job(vc4
);
328 /* If idle, we can stop watching for hangs. */
329 if (!bin_exec
&& !render_exec
) {
330 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
334 ct0ca
= V3D_READ(V3D_CTNCA(0));
335 ct1ca
= V3D_READ(V3D_CTNCA(1));
337 /* If we've made any progress in execution, rearm the timer
340 if ((bin_exec
&& ct0ca
!= bin_exec
->last_ct0ca
) ||
341 (render_exec
&& ct1ca
!= render_exec
->last_ct1ca
)) {
343 bin_exec
->last_ct0ca
= ct0ca
;
345 render_exec
->last_ct1ca
= ct1ca
;
346 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
347 vc4_queue_hangcheck(dev
);
351 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
353 /* We've gone too long with no progress, reset. This has to
354 * be done from a work struct, since resetting can sleep and
355 * this timer hook isn't allowed to.
357 schedule_work(&vc4
->hangcheck
.reset_work
);
361 submit_cl(struct drm_device
*dev
, uint32_t thread
, uint32_t start
, uint32_t end
)
363 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
365 /* Set the current and end address of the control list.
366 * Writing the end register is what starts the job.
368 V3D_WRITE(V3D_CTNCA(thread
), start
);
369 V3D_WRITE(V3D_CTNEA(thread
), end
);
373 vc4_wait_for_seqno(struct drm_device
*dev
, uint64_t seqno
, uint64_t timeout_ns
,
376 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
378 unsigned long timeout_expire
;
381 if (vc4
->finished_seqno
>= seqno
)
387 timeout_expire
= jiffies
+ nsecs_to_jiffies(timeout_ns
);
389 trace_vc4_wait_for_seqno_begin(dev
, seqno
, timeout_ns
);
391 prepare_to_wait(&vc4
->job_wait_queue
, &wait
,
392 interruptible
? TASK_INTERRUPTIBLE
:
393 TASK_UNINTERRUPTIBLE
);
395 if (interruptible
&& signal_pending(current
)) {
400 if (vc4
->finished_seqno
>= seqno
)
403 if (timeout_ns
!= ~0ull) {
404 if (time_after_eq(jiffies
, timeout_expire
)) {
408 schedule_timeout(timeout_expire
- jiffies
);
414 finish_wait(&vc4
->job_wait_queue
, &wait
);
415 trace_vc4_wait_for_seqno_end(dev
, seqno
);
421 vc4_flush_caches(struct drm_device
*dev
)
423 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
425 /* Flush the GPU L2 caches. These caches sit on top of system
426 * L3 (the 128kb or so shared with the CPU), and are
427 * non-allocating in the L3.
429 V3D_WRITE(V3D_L2CACTL
,
432 V3D_WRITE(V3D_SLCACTL
,
433 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC
) |
434 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC
) |
435 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC
) |
436 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC
));
440 vc4_flush_texture_caches(struct drm_device
*dev
)
442 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
444 V3D_WRITE(V3D_L2CACTL
,
447 V3D_WRITE(V3D_SLCACTL
,
448 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC
) |
449 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC
));
452 /* Sets the registers for the next job to be actually be executed in
455 * The job_lock should be held during this.
458 vc4_submit_next_bin_job(struct drm_device
*dev
)
460 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
461 struct vc4_exec_info
*exec
;
464 exec
= vc4_first_bin_job(vc4
);
468 vc4_flush_caches(dev
);
470 /* Either put the job in the binner if it uses the binner, or
471 * immediately move it to the to-be-rendered queue.
473 if (exec
->ct0ca
!= exec
->ct0ea
) {
474 submit_cl(dev
, 0, exec
->ct0ca
, exec
->ct0ea
);
476 vc4_move_job_to_render(dev
, exec
);
482 vc4_submit_next_render_job(struct drm_device
*dev
)
484 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
485 struct vc4_exec_info
*exec
= vc4_first_render_job(vc4
);
490 /* A previous RCL may have written to one of our textures, and
491 * our full cache flush at bin time may have occurred before
492 * that RCL completed. Flush the texture cache now, but not
493 * the instructions or uniforms (since we don't write those
496 vc4_flush_texture_caches(dev
);
498 submit_cl(dev
, 1, exec
->ct1ca
, exec
->ct1ea
);
502 vc4_move_job_to_render(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
504 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
505 bool was_empty
= list_empty(&vc4
->render_job_list
);
507 list_move_tail(&exec
->head
, &vc4
->render_job_list
);
509 vc4_submit_next_render_job(dev
);
513 vc4_update_bo_seqnos(struct vc4_exec_info
*exec
, uint64_t seqno
)
518 for (i
= 0; i
< exec
->bo_count
; i
++) {
519 bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
522 reservation_object_add_shared_fence(bo
->resv
, exec
->fence
);
525 list_for_each_entry(bo
, &exec
->unref_list
, unref_head
) {
529 for (i
= 0; i
< exec
->rcl_write_bo_count
; i
++) {
530 bo
= to_vc4_bo(&exec
->rcl_write_bo
[i
]->base
);
531 bo
->write_seqno
= seqno
;
533 reservation_object_add_excl_fence(bo
->resv
, exec
->fence
);
538 vc4_unlock_bo_reservations(struct drm_device
*dev
,
539 struct vc4_exec_info
*exec
,
540 struct ww_acquire_ctx
*acquire_ctx
)
544 for (i
= 0; i
< exec
->bo_count
; i
++) {
545 struct vc4_bo
*bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
547 ww_mutex_unlock(&bo
->resv
->lock
);
550 ww_acquire_fini(acquire_ctx
);
553 /* Takes the reservation lock on all the BOs being referenced, so that
554 * at queue submit time we can update the reservations.
556 * We don't lock the RCL the tile alloc/state BOs, or overflow memory
557 * (all of which are on exec->unref_list). They're entirely private
558 * to vc4, so we don't attach dma-buf fences to them.
561 vc4_lock_bo_reservations(struct drm_device
*dev
,
562 struct vc4_exec_info
*exec
,
563 struct ww_acquire_ctx
*acquire_ctx
)
565 int contended_lock
= -1;
569 ww_acquire_init(acquire_ctx
, &reservation_ww_class
);
572 if (contended_lock
!= -1) {
573 bo
= to_vc4_bo(&exec
->bo
[contended_lock
]->base
);
574 ret
= ww_mutex_lock_slow_interruptible(&bo
->resv
->lock
,
577 ww_acquire_done(acquire_ctx
);
582 for (i
= 0; i
< exec
->bo_count
; i
++) {
583 if (i
== contended_lock
)
586 bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
588 ret
= ww_mutex_lock_interruptible(&bo
->resv
->lock
, acquire_ctx
);
592 for (j
= 0; j
< i
; j
++) {
593 bo
= to_vc4_bo(&exec
->bo
[j
]->base
);
594 ww_mutex_unlock(&bo
->resv
->lock
);
597 if (contended_lock
!= -1 && contended_lock
>= i
) {
598 bo
= to_vc4_bo(&exec
->bo
[contended_lock
]->base
);
600 ww_mutex_unlock(&bo
->resv
->lock
);
603 if (ret
== -EDEADLK
) {
608 ww_acquire_done(acquire_ctx
);
613 ww_acquire_done(acquire_ctx
);
615 /* Reserve space for our shared (read-only) fence references,
616 * before we commit the CL to the hardware.
618 for (i
= 0; i
< exec
->bo_count
; i
++) {
619 bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
621 ret
= reservation_object_reserve_shared(bo
->resv
);
623 vc4_unlock_bo_reservations(dev
, exec
, acquire_ctx
);
631 /* Queues a struct vc4_exec_info for execution. If no job is
632 * currently executing, then submits it.
634 * Unlike most GPUs, our hardware only handles one command list at a
635 * time. To queue multiple jobs at once, we'd need to edit the
636 * previous command list to have a jump to the new one at the end, and
637 * then bump the end address. That's a change for a later date,
641 vc4_queue_submit(struct drm_device
*dev
, struct vc4_exec_info
*exec
,
642 struct ww_acquire_ctx
*acquire_ctx
)
644 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
646 unsigned long irqflags
;
647 struct vc4_fence
*fence
;
649 fence
= kzalloc(sizeof(*fence
), GFP_KERNEL
);
654 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
656 seqno
= ++vc4
->emit_seqno
;
659 dma_fence_init(&fence
->base
, &vc4_fence_ops
, &vc4
->job_lock
,
660 vc4
->dma_fence_context
, exec
->seqno
);
661 fence
->seqno
= exec
->seqno
;
662 exec
->fence
= &fence
->base
;
664 vc4_update_bo_seqnos(exec
, seqno
);
666 vc4_unlock_bo_reservations(dev
, exec
, acquire_ctx
);
668 list_add_tail(&exec
->head
, &vc4
->bin_job_list
);
670 /* If no job was executing, kick ours off. Otherwise, it'll
671 * get started when the previous job's flush done interrupt
674 if (vc4_first_bin_job(vc4
) == exec
) {
675 vc4_submit_next_bin_job(dev
);
676 vc4_queue_hangcheck(dev
);
679 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
685 * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
686 * referenced by the job.
688 * @file_priv: DRM file for this fd
689 * @exec: V3D job being set up
691 * The command validator needs to reference BOs by their index within
692 * the submitted job's BO list. This does the validation of the job's
693 * BO list and reference counting for the lifetime of the job.
696 vc4_cl_lookup_bos(struct drm_device
*dev
,
697 struct drm_file
*file_priv
,
698 struct vc4_exec_info
*exec
)
700 struct drm_vc4_submit_cl
*args
= exec
->args
;
705 exec
->bo_count
= args
->bo_handle_count
;
707 if (!exec
->bo_count
) {
708 /* See comment on bo_index for why we have to check
711 DRM_DEBUG("Rendering requires BOs to validate\n");
715 exec
->bo
= kvmalloc_array(exec
->bo_count
,
716 sizeof(struct drm_gem_cma_object
*),
717 GFP_KERNEL
| __GFP_ZERO
);
719 DRM_ERROR("Failed to allocate validated BO pointers\n");
723 handles
= kvmalloc_array(exec
->bo_count
, sizeof(uint32_t), GFP_KERNEL
);
726 DRM_ERROR("Failed to allocate incoming GEM handles\n");
730 if (copy_from_user(handles
, u64_to_user_ptr(args
->bo_handles
),
731 exec
->bo_count
* sizeof(uint32_t))) {
733 DRM_ERROR("Failed to copy in GEM handles\n");
737 spin_lock(&file_priv
->table_lock
);
738 for (i
= 0; i
< exec
->bo_count
; i
++) {
739 struct drm_gem_object
*bo
= idr_find(&file_priv
->object_idr
,
742 DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
748 drm_gem_object_get(bo
);
749 exec
->bo
[i
] = (struct drm_gem_cma_object
*)bo
;
751 spin_unlock(&file_priv
->table_lock
);
756 for (i
= 0; i
< exec
->bo_count
; i
++) {
757 ret
= vc4_bo_inc_usecnt(to_vc4_bo(&exec
->bo
[i
]->base
));
759 goto fail_dec_usecnt
;
766 /* Decrease usecnt on acquired objects.
767 * We cannot rely on vc4_complete_exec() to release resources here,
768 * because vc4_complete_exec() has no information about which BO has
769 * had its ->usecnt incremented.
770 * To make things easier we just free everything explicitly and set
771 * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
774 for (i
-- ; i
>= 0; i
--)
775 vc4_bo_dec_usecnt(to_vc4_bo(&exec
->bo
[i
]->base
));
778 /* Release any reference to acquired objects. */
779 for (i
= 0; i
< exec
->bo_count
&& exec
->bo
[i
]; i
++)
780 drm_gem_object_put_unlocked(&exec
->bo
[i
]->base
);
790 vc4_get_bcl(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
792 struct drm_vc4_submit_cl
*args
= exec
->args
;
796 uint32_t bin_offset
= 0;
797 uint32_t shader_rec_offset
= roundup(bin_offset
+ args
->bin_cl_size
,
799 uint32_t uniforms_offset
= shader_rec_offset
+ args
->shader_rec_size
;
800 uint32_t exec_size
= uniforms_offset
+ args
->uniforms_size
;
801 uint32_t temp_size
= exec_size
+ (sizeof(struct vc4_shader_state
) *
802 args
->shader_rec_count
);
805 if (shader_rec_offset
< args
->bin_cl_size
||
806 uniforms_offset
< shader_rec_offset
||
807 exec_size
< uniforms_offset
||
808 args
->shader_rec_count
>= (UINT_MAX
/
809 sizeof(struct vc4_shader_state
)) ||
810 temp_size
< exec_size
) {
811 DRM_DEBUG("overflow in exec arguments\n");
816 /* Allocate space where we'll store the copied in user command lists
817 * and shader records.
819 * We don't just copy directly into the BOs because we need to
820 * read the contents back for validation, and I think the
821 * bo->vaddr is uncached access.
823 temp
= kvmalloc_array(temp_size
, 1, GFP_KERNEL
);
825 DRM_ERROR("Failed to allocate storage for copying "
826 "in bin/render CLs.\n");
830 bin
= temp
+ bin_offset
;
831 exec
->shader_rec_u
= temp
+ shader_rec_offset
;
832 exec
->uniforms_u
= temp
+ uniforms_offset
;
833 exec
->shader_state
= temp
+ exec_size
;
834 exec
->shader_state_size
= args
->shader_rec_count
;
836 if (copy_from_user(bin
,
837 u64_to_user_ptr(args
->bin_cl
),
838 args
->bin_cl_size
)) {
843 if (copy_from_user(exec
->shader_rec_u
,
844 u64_to_user_ptr(args
->shader_rec
),
845 args
->shader_rec_size
)) {
850 if (copy_from_user(exec
->uniforms_u
,
851 u64_to_user_ptr(args
->uniforms
),
852 args
->uniforms_size
)) {
857 bo
= vc4_bo_create(dev
, exec_size
, true, VC4_BO_TYPE_BCL
);
859 DRM_ERROR("Couldn't allocate BO for binning\n");
863 exec
->exec_bo
= &bo
->base
;
865 list_add_tail(&to_vc4_bo(&exec
->exec_bo
->base
)->unref_head
,
868 exec
->ct0ca
= exec
->exec_bo
->paddr
+ bin_offset
;
872 exec
->shader_rec_v
= exec
->exec_bo
->vaddr
+ shader_rec_offset
;
873 exec
->shader_rec_p
= exec
->exec_bo
->paddr
+ shader_rec_offset
;
874 exec
->shader_rec_size
= args
->shader_rec_size
;
876 exec
->uniforms_v
= exec
->exec_bo
->vaddr
+ uniforms_offset
;
877 exec
->uniforms_p
= exec
->exec_bo
->paddr
+ uniforms_offset
;
878 exec
->uniforms_size
= args
->uniforms_size
;
880 ret
= vc4_validate_bin_cl(dev
,
881 exec
->exec_bo
->vaddr
+ bin_offset
,
887 ret
= vc4_validate_shader_recs(dev
, exec
);
891 /* Block waiting on any previous rendering into the CS's VBO,
892 * IB, or textures, so that pixels are actually written by the
893 * time we try to read them.
895 ret
= vc4_wait_for_seqno(dev
, exec
->bin_dep_seqno
, ~0ull, true);
903 vc4_complete_exec(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
905 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
906 unsigned long irqflags
;
909 /* If we got force-completed because of GPU reset rather than
910 * through our IRQ handler, signal the fence now.
913 dma_fence_signal(exec
->fence
);
914 dma_fence_put(exec
->fence
);
918 for (i
= 0; i
< exec
->bo_count
; i
++) {
919 struct vc4_bo
*bo
= to_vc4_bo(&exec
->bo
[i
]->base
);
921 vc4_bo_dec_usecnt(bo
);
922 drm_gem_object_put_unlocked(&exec
->bo
[i
]->base
);
927 while (!list_empty(&exec
->unref_list
)) {
928 struct vc4_bo
*bo
= list_first_entry(&exec
->unref_list
,
929 struct vc4_bo
, unref_head
);
930 list_del(&bo
->unref_head
);
931 drm_gem_object_put_unlocked(&bo
->base
.base
);
934 /* Free up the allocation of any bin slots we used. */
935 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
936 vc4
->bin_alloc_used
&= ~exec
->bin_slots
;
937 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
939 mutex_lock(&vc4
->power_lock
);
940 if (--vc4
->power_refcount
== 0) {
941 pm_runtime_mark_last_busy(&vc4
->v3d
->pdev
->dev
);
942 pm_runtime_put_autosuspend(&vc4
->v3d
->pdev
->dev
);
944 mutex_unlock(&vc4
->power_lock
);
950 vc4_job_handle_completed(struct vc4_dev
*vc4
)
952 unsigned long irqflags
;
953 struct vc4_seqno_cb
*cb
, *cb_temp
;
955 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
956 while (!list_empty(&vc4
->job_done_list
)) {
957 struct vc4_exec_info
*exec
=
958 list_first_entry(&vc4
->job_done_list
,
959 struct vc4_exec_info
, head
);
960 list_del(&exec
->head
);
962 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
963 vc4_complete_exec(vc4
->dev
, exec
);
964 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
967 list_for_each_entry_safe(cb
, cb_temp
, &vc4
->seqno_cb_list
, work
.entry
) {
968 if (cb
->seqno
<= vc4
->finished_seqno
) {
969 list_del_init(&cb
->work
.entry
);
970 schedule_work(&cb
->work
);
974 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
977 static void vc4_seqno_cb_work(struct work_struct
*work
)
979 struct vc4_seqno_cb
*cb
= container_of(work
, struct vc4_seqno_cb
, work
);
984 int vc4_queue_seqno_cb(struct drm_device
*dev
,
985 struct vc4_seqno_cb
*cb
, uint64_t seqno
,
986 void (*func
)(struct vc4_seqno_cb
*cb
))
988 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
990 unsigned long irqflags
;
993 INIT_WORK(&cb
->work
, vc4_seqno_cb_work
);
995 spin_lock_irqsave(&vc4
->job_lock
, irqflags
);
996 if (seqno
> vc4
->finished_seqno
) {
998 list_add_tail(&cb
->work
.entry
, &vc4
->seqno_cb_list
);
1000 schedule_work(&cb
->work
);
1002 spin_unlock_irqrestore(&vc4
->job_lock
, irqflags
);
1007 /* Scheduled when any job has been completed, this walks the list of
1008 * jobs that had completed and unrefs their BOs and frees their exec
1012 vc4_job_done_work(struct work_struct
*work
)
1014 struct vc4_dev
*vc4
=
1015 container_of(work
, struct vc4_dev
, job_done_work
);
1017 vc4_job_handle_completed(vc4
);
1021 vc4_wait_for_seqno_ioctl_helper(struct drm_device
*dev
,
1023 uint64_t *timeout_ns
)
1025 unsigned long start
= jiffies
;
1026 int ret
= vc4_wait_for_seqno(dev
, seqno
, *timeout_ns
, true);
1028 if ((ret
== -EINTR
|| ret
== -ERESTARTSYS
) && *timeout_ns
!= ~0ull) {
1029 uint64_t delta
= jiffies_to_nsecs(jiffies
- start
);
1031 if (*timeout_ns
>= delta
)
1032 *timeout_ns
-= delta
;
1039 vc4_wait_seqno_ioctl(struct drm_device
*dev
, void *data
,
1040 struct drm_file
*file_priv
)
1042 struct drm_vc4_wait_seqno
*args
= data
;
1044 return vc4_wait_for_seqno_ioctl_helper(dev
, args
->seqno
,
1049 vc4_wait_bo_ioctl(struct drm_device
*dev
, void *data
,
1050 struct drm_file
*file_priv
)
1053 struct drm_vc4_wait_bo
*args
= data
;
1054 struct drm_gem_object
*gem_obj
;
1060 gem_obj
= drm_gem_object_lookup(file_priv
, args
->handle
);
1062 DRM_DEBUG("Failed to look up GEM BO %d\n", args
->handle
);
1065 bo
= to_vc4_bo(gem_obj
);
1067 ret
= vc4_wait_for_seqno_ioctl_helper(dev
, bo
->seqno
,
1070 drm_gem_object_put_unlocked(gem_obj
);
1075 * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4.
1077 * @data: ioctl argument
1078 * @file_priv: DRM file for this fd
1080 * This is the main entrypoint for userspace to submit a 3D frame to
1081 * the GPU. Userspace provides the binner command list (if
1082 * applicable), and the kernel sets up the render command list to draw
1083 * to the framebuffer described in the ioctl, using the command lists
1084 * that the 3D engine's binner will produce.
1087 vc4_submit_cl_ioctl(struct drm_device
*dev
, void *data
,
1088 struct drm_file
*file_priv
)
1090 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
1091 struct drm_vc4_submit_cl
*args
= data
;
1092 struct vc4_exec_info
*exec
;
1093 struct ww_acquire_ctx acquire_ctx
;
1096 if ((args
->flags
& ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR
|
1097 VC4_SUBMIT_CL_FIXED_RCL_ORDER
|
1098 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X
|
1099 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y
)) != 0) {
1100 DRM_DEBUG("Unknown flags: 0x%02x\n", args
->flags
);
1104 exec
= kcalloc(1, sizeof(*exec
), GFP_KERNEL
);
1106 DRM_ERROR("malloc failure on exec struct\n");
1110 mutex_lock(&vc4
->power_lock
);
1111 if (vc4
->power_refcount
++ == 0) {
1112 ret
= pm_runtime_get_sync(&vc4
->v3d
->pdev
->dev
);
1114 mutex_unlock(&vc4
->power_lock
);
1115 vc4
->power_refcount
--;
1120 mutex_unlock(&vc4
->power_lock
);
1123 INIT_LIST_HEAD(&exec
->unref_list
);
1125 ret
= vc4_cl_lookup_bos(dev
, file_priv
, exec
);
1129 if (exec
->args
->bin_cl_size
!= 0) {
1130 ret
= vc4_get_bcl(dev
, exec
);
1138 ret
= vc4_get_rcl(dev
, exec
);
1142 ret
= vc4_lock_bo_reservations(dev
, exec
, &acquire_ctx
);
1146 /* Clear this out of the struct we'll be putting in the queue,
1147 * since it's part of our stack.
1151 ret
= vc4_queue_submit(dev
, exec
, &acquire_ctx
);
1155 /* Return the seqno for our job. */
1156 args
->seqno
= vc4
->emit_seqno
;
1161 vc4_complete_exec(vc4
->dev
, exec
);
1167 vc4_gem_init(struct drm_device
*dev
)
1169 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
1171 vc4
->dma_fence_context
= dma_fence_context_alloc(1);
1173 INIT_LIST_HEAD(&vc4
->bin_job_list
);
1174 INIT_LIST_HEAD(&vc4
->render_job_list
);
1175 INIT_LIST_HEAD(&vc4
->job_done_list
);
1176 INIT_LIST_HEAD(&vc4
->seqno_cb_list
);
1177 spin_lock_init(&vc4
->job_lock
);
1179 INIT_WORK(&vc4
->hangcheck
.reset_work
, vc4_reset_work
);
1180 timer_setup(&vc4
->hangcheck
.timer
, vc4_hangcheck_elapsed
, 0);
1182 INIT_WORK(&vc4
->job_done_work
, vc4_job_done_work
);
1184 mutex_init(&vc4
->power_lock
);
1186 INIT_LIST_HEAD(&vc4
->purgeable
.list
);
1187 mutex_init(&vc4
->purgeable
.lock
);
1191 vc4_gem_destroy(struct drm_device
*dev
)
1193 struct vc4_dev
*vc4
= to_vc4_dev(dev
);
1195 /* Waiting for exec to finish would need to be done before
1196 * unregistering V3D.
1198 WARN_ON(vc4
->emit_seqno
!= vc4
->finished_seqno
);
1200 /* V3D should already have disabled its interrupt and cleared
1201 * the overflow allocation registers. Now free the object.
1204 drm_gem_object_put_unlocked(&vc4
->bin_bo
->base
.base
);
1208 if (vc4
->hang_state
)
1209 vc4_free_hang_state(dev
, vc4
->hang_state
);
1212 int vc4_gem_madvise_ioctl(struct drm_device
*dev
, void *data
,
1213 struct drm_file
*file_priv
)
1215 struct drm_vc4_gem_madvise
*args
= data
;
1216 struct drm_gem_object
*gem_obj
;
1220 switch (args
->madv
) {
1221 case VC4_MADV_DONTNEED
:
1222 case VC4_MADV_WILLNEED
:
1231 gem_obj
= drm_gem_object_lookup(file_priv
, args
->handle
);
1233 DRM_DEBUG("Failed to look up GEM BO %d\n", args
->handle
);
1237 bo
= to_vc4_bo(gem_obj
);
1239 /* Only BOs exposed to userspace can be purged. */
1240 if (bo
->madv
== __VC4_MADV_NOTSUPP
) {
1241 DRM_DEBUG("madvise not supported on this BO\n");
1246 /* Not sure it's safe to purge imported BOs. Let's just assume it's
1247 * not until proven otherwise.
1249 if (gem_obj
->import_attach
) {
1250 DRM_DEBUG("madvise not supported on imported BOs\n");
1255 mutex_lock(&bo
->madv_lock
);
1257 if (args
->madv
== VC4_MADV_DONTNEED
&& bo
->madv
== VC4_MADV_WILLNEED
&&
1258 !refcount_read(&bo
->usecnt
)) {
1259 /* If the BO is about to be marked as purgeable, is not used
1260 * and is not already purgeable or purged, add it to the
1263 vc4_bo_add_to_purgeable_pool(bo
);
1264 } else if (args
->madv
== VC4_MADV_WILLNEED
&&
1265 bo
->madv
== VC4_MADV_DONTNEED
&&
1266 !refcount_read(&bo
->usecnt
)) {
1267 /* The BO has not been purged yet, just remove it from
1268 * the purgeable list.
1270 vc4_bo_remove_from_purgeable_pool(bo
);
1273 /* Save the purged state. */
1274 args
->retained
= bo
->madv
!= __VC4_MADV_PURGED
;
1276 /* Update internal madv state only if the bo was not purged. */
1277 if (bo
->madv
!= __VC4_MADV_PURGED
)
1278 bo
->madv
= args
->madv
;
1280 mutex_unlock(&bo
->madv_lock
);
1285 drm_gem_object_put_unlocked(gem_obj
);