2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/kthread.h>
25 #include <linux/wait.h>
26 #include <linux/sched.h>
27 #include <uapi/linux/sched/types.h>
29 #include <drm/gpu_scheduler.h>
30 #include <drm/spsc_queue.h>
32 #define CREATE_TRACE_POINTS
33 #include <drm/gpu_scheduler_trace.h>
35 #define to_drm_sched_job(sched_job) \
36 container_of((sched_job), struct drm_sched_job, queue_node)
38 static bool drm_sched_entity_is_ready(struct drm_sched_entity
*entity
);
39 static void drm_sched_wakeup(struct drm_gpu_scheduler
*sched
);
40 static void drm_sched_process_job(struct dma_fence
*f
, struct dma_fence_cb
*cb
);
42 /* Initialize a given run queue struct */
43 static void drm_sched_rq_init(struct drm_sched_rq
*rq
)
45 spin_lock_init(&rq
->lock
);
46 INIT_LIST_HEAD(&rq
->entities
);
47 rq
->current_entity
= NULL
;
50 static void drm_sched_rq_add_entity(struct drm_sched_rq
*rq
,
51 struct drm_sched_entity
*entity
)
53 if (!list_empty(&entity
->list
))
56 list_add_tail(&entity
->list
, &rq
->entities
);
57 spin_unlock(&rq
->lock
);
60 static void drm_sched_rq_remove_entity(struct drm_sched_rq
*rq
,
61 struct drm_sched_entity
*entity
)
63 if (list_empty(&entity
->list
))
66 list_del_init(&entity
->list
);
67 if (rq
->current_entity
== entity
)
68 rq
->current_entity
= NULL
;
69 spin_unlock(&rq
->lock
);
73 * Select an entity which could provide a job to run
75 * @rq The run queue to check.
77 * Try to find a ready entity, returns NULL if none found.
79 static struct drm_sched_entity
*
80 drm_sched_rq_select_entity(struct drm_sched_rq
*rq
)
82 struct drm_sched_entity
*entity
;
86 entity
= rq
->current_entity
;
88 list_for_each_entry_continue(entity
, &rq
->entities
, list
) {
89 if (drm_sched_entity_is_ready(entity
)) {
90 rq
->current_entity
= entity
;
91 spin_unlock(&rq
->lock
);
97 list_for_each_entry(entity
, &rq
->entities
, list
) {
99 if (drm_sched_entity_is_ready(entity
)) {
100 rq
->current_entity
= entity
;
101 spin_unlock(&rq
->lock
);
105 if (entity
== rq
->current_entity
)
109 spin_unlock(&rq
->lock
);
115 * Init a context entity used by scheduler when submit to HW ring.
117 * @sched The pointer to the scheduler
118 * @entity The pointer to a valid drm_sched_entity
119 * @rq The run queue this entity belongs
120 * @kernel If this is an entity for the kernel
121 * @jobs The max number of jobs in the job queue
123 * return 0 if succeed. negative error code on failure
125 int drm_sched_entity_init(struct drm_gpu_scheduler
*sched
,
126 struct drm_sched_entity
*entity
,
127 struct drm_sched_rq
*rq
,
128 uint32_t jobs
, atomic_t
*guilty
)
130 if (!(sched
&& entity
&& rq
))
133 memset(entity
, 0, sizeof(struct drm_sched_entity
));
134 INIT_LIST_HEAD(&entity
->list
);
136 entity
->sched
= sched
;
137 entity
->guilty
= guilty
;
139 spin_lock_init(&entity
->rq_lock
);
140 spin_lock_init(&entity
->queue_lock
);
141 spsc_queue_init(&entity
->job_queue
);
143 atomic_set(&entity
->fence_seq
, 0);
144 entity
->fence_context
= dma_fence_context_alloc(2);
148 EXPORT_SYMBOL(drm_sched_entity_init
);
151 * Query if entity is initialized
153 * @sched Pointer to scheduler instance
154 * @entity The pointer to a valid scheduler entity
156 * return true if entity is initialized, false otherwise
158 static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler
*sched
,
159 struct drm_sched_entity
*entity
)
161 return entity
->sched
== sched
&&
166 * Check if entity is idle
168 * @entity The pointer to a valid scheduler entity
170 * Return true if entity don't has any unscheduled jobs.
172 static bool drm_sched_entity_is_idle(struct drm_sched_entity
*entity
)
175 if (spsc_queue_peek(&entity
->job_queue
) == NULL
)
182 * Check if entity is ready
184 * @entity The pointer to a valid scheduler entity
186 * Return true if entity could provide a job.
188 static bool drm_sched_entity_is_ready(struct drm_sched_entity
*entity
)
190 if (spsc_queue_peek(&entity
->job_queue
) == NULL
)
193 if (READ_ONCE(entity
->dependency
))
200 * Destroy a context entity
202 * @sched Pointer to scheduler instance
203 * @entity The pointer to a valid scheduler entity
205 * Cleanup and free the allocated resources.
207 void drm_sched_entity_fini(struct drm_gpu_scheduler
*sched
,
208 struct drm_sched_entity
*entity
)
212 if (!drm_sched_entity_is_initialized(sched
, entity
))
215 * The client will not queue more IBs during this fini, consume existing
216 * queued IBs or discard them on SIGKILL
218 if ((current
->flags
& PF_SIGNALED
) && current
->exit_code
== SIGKILL
)
221 r
= wait_event_killable(sched
->job_scheduled
,
222 drm_sched_entity_is_idle(entity
));
223 drm_sched_entity_set_rq(entity
, NULL
);
225 struct drm_sched_job
*job
;
227 /* Park the kernel for a moment to make sure it isn't processing
230 kthread_park(sched
->thread
);
231 kthread_unpark(sched
->thread
);
232 if (entity
->dependency
) {
233 dma_fence_remove_callback(entity
->dependency
,
235 dma_fence_put(entity
->dependency
);
236 entity
->dependency
= NULL
;
239 while ((job
= to_drm_sched_job(spsc_queue_pop(&entity
->job_queue
)))) {
240 struct drm_sched_fence
*s_fence
= job
->s_fence
;
241 drm_sched_fence_scheduled(s_fence
);
242 dma_fence_set_error(&s_fence
->finished
, -ESRCH
);
243 drm_sched_fence_finished(s_fence
);
244 WARN_ON(s_fence
->parent
);
245 dma_fence_put(&s_fence
->finished
);
246 sched
->ops
->free_job(job
);
250 EXPORT_SYMBOL(drm_sched_entity_fini
);
252 static void drm_sched_entity_wakeup(struct dma_fence
*f
, struct dma_fence_cb
*cb
)
254 struct drm_sched_entity
*entity
=
255 container_of(cb
, struct drm_sched_entity
, cb
);
256 entity
->dependency
= NULL
;
258 drm_sched_wakeup(entity
->sched
);
261 static void drm_sched_entity_clear_dep(struct dma_fence
*f
, struct dma_fence_cb
*cb
)
263 struct drm_sched_entity
*entity
=
264 container_of(cb
, struct drm_sched_entity
, cb
);
265 entity
->dependency
= NULL
;
269 void drm_sched_entity_set_rq(struct drm_sched_entity
*entity
,
270 struct drm_sched_rq
*rq
)
272 if (entity
->rq
== rq
)
275 spin_lock(&entity
->rq_lock
);
278 drm_sched_rq_remove_entity(entity
->rq
, entity
);
282 drm_sched_rq_add_entity(rq
, entity
);
284 spin_unlock(&entity
->rq_lock
);
286 EXPORT_SYMBOL(drm_sched_entity_set_rq
);
288 bool drm_sched_dependency_optimized(struct dma_fence
* fence
,
289 struct drm_sched_entity
*entity
)
291 struct drm_gpu_scheduler
*sched
= entity
->sched
;
292 struct drm_sched_fence
*s_fence
;
294 if (!fence
|| dma_fence_is_signaled(fence
))
296 if (fence
->context
== entity
->fence_context
)
298 s_fence
= to_drm_sched_fence(fence
);
299 if (s_fence
&& s_fence
->sched
== sched
)
304 EXPORT_SYMBOL(drm_sched_dependency_optimized
);
306 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity
*entity
)
308 struct drm_gpu_scheduler
*sched
= entity
->sched
;
309 struct dma_fence
* fence
= entity
->dependency
;
310 struct drm_sched_fence
*s_fence
;
312 if (fence
->context
== entity
->fence_context
) {
313 /* We can ignore fences from ourself */
314 dma_fence_put(entity
->dependency
);
318 s_fence
= to_drm_sched_fence(fence
);
319 if (s_fence
&& s_fence
->sched
== sched
) {
322 * Fence is from the same scheduler, only need to wait for
325 fence
= dma_fence_get(&s_fence
->scheduled
);
326 dma_fence_put(entity
->dependency
);
327 entity
->dependency
= fence
;
328 if (!dma_fence_add_callback(fence
, &entity
->cb
,
329 drm_sched_entity_clear_dep
))
332 /* Ignore it when it is already scheduled */
333 dma_fence_put(fence
);
337 if (!dma_fence_add_callback(entity
->dependency
, &entity
->cb
,
338 drm_sched_entity_wakeup
))
341 dma_fence_put(entity
->dependency
);
345 static struct drm_sched_job
*
346 drm_sched_entity_pop_job(struct drm_sched_entity
*entity
)
348 struct drm_gpu_scheduler
*sched
= entity
->sched
;
349 struct drm_sched_job
*sched_job
= to_drm_sched_job(
350 spsc_queue_peek(&entity
->job_queue
));
355 while ((entity
->dependency
= sched
->ops
->dependency(sched_job
, entity
)))
356 if (drm_sched_entity_add_dependency_cb(entity
))
359 /* skip jobs from entity that marked guilty */
360 if (entity
->guilty
&& atomic_read(entity
->guilty
))
361 dma_fence_set_error(&sched_job
->s_fence
->finished
, -ECANCELED
);
363 spsc_queue_pop(&entity
->job_queue
);
368 * Submit a job to the job queue
370 * @sched_job The pointer to job required to submit
372 * Returns 0 for success, negative error code otherwise.
374 void drm_sched_entity_push_job(struct drm_sched_job
*sched_job
,
375 struct drm_sched_entity
*entity
)
377 struct drm_gpu_scheduler
*sched
= sched_job
->sched
;
380 trace_drm_sched_job(sched_job
, entity
);
382 spin_lock(&entity
->queue_lock
);
383 first
= spsc_queue_push(&entity
->job_queue
, &sched_job
->queue_node
);
385 spin_unlock(&entity
->queue_lock
);
387 /* first job wakes up scheduler */
389 /* Add the entity to the run queue */
390 spin_lock(&entity
->rq_lock
);
391 drm_sched_rq_add_entity(entity
->rq
, entity
);
392 spin_unlock(&entity
->rq_lock
);
393 drm_sched_wakeup(sched
);
396 EXPORT_SYMBOL(drm_sched_entity_push_job
);
398 /* job_finish is called after hw fence signaled
400 static void drm_sched_job_finish(struct work_struct
*work
)
402 struct drm_sched_job
*s_job
= container_of(work
, struct drm_sched_job
,
404 struct drm_gpu_scheduler
*sched
= s_job
->sched
;
406 /* remove job from ring_mirror_list */
407 spin_lock(&sched
->job_list_lock
);
408 list_del_init(&s_job
->node
);
409 if (sched
->timeout
!= MAX_SCHEDULE_TIMEOUT
) {
410 struct drm_sched_job
*next
;
412 spin_unlock(&sched
->job_list_lock
);
413 cancel_delayed_work_sync(&s_job
->work_tdr
);
414 spin_lock(&sched
->job_list_lock
);
416 /* queue TDR for next job */
417 next
= list_first_entry_or_null(&sched
->ring_mirror_list
,
418 struct drm_sched_job
, node
);
421 schedule_delayed_work(&next
->work_tdr
, sched
->timeout
);
423 spin_unlock(&sched
->job_list_lock
);
424 dma_fence_put(&s_job
->s_fence
->finished
);
425 sched
->ops
->free_job(s_job
);
428 static void drm_sched_job_finish_cb(struct dma_fence
*f
,
429 struct dma_fence_cb
*cb
)
431 struct drm_sched_job
*job
= container_of(cb
, struct drm_sched_job
,
433 schedule_work(&job
->finish_work
);
436 static void drm_sched_job_begin(struct drm_sched_job
*s_job
)
438 struct drm_gpu_scheduler
*sched
= s_job
->sched
;
440 dma_fence_add_callback(&s_job
->s_fence
->finished
, &s_job
->finish_cb
,
441 drm_sched_job_finish_cb
);
443 spin_lock(&sched
->job_list_lock
);
444 list_add_tail(&s_job
->node
, &sched
->ring_mirror_list
);
445 if (sched
->timeout
!= MAX_SCHEDULE_TIMEOUT
&&
446 list_first_entry_or_null(&sched
->ring_mirror_list
,
447 struct drm_sched_job
, node
) == s_job
)
448 schedule_delayed_work(&s_job
->work_tdr
, sched
->timeout
);
449 spin_unlock(&sched
->job_list_lock
);
452 static void drm_sched_job_timedout(struct work_struct
*work
)
454 struct drm_sched_job
*job
= container_of(work
, struct drm_sched_job
,
457 job
->sched
->ops
->timedout_job(job
);
460 void drm_sched_hw_job_reset(struct drm_gpu_scheduler
*sched
, struct drm_sched_job
*bad
)
462 struct drm_sched_job
*s_job
;
463 struct drm_sched_entity
*entity
, *tmp
;
466 spin_lock(&sched
->job_list_lock
);
467 list_for_each_entry_reverse(s_job
, &sched
->ring_mirror_list
, node
) {
468 if (s_job
->s_fence
->parent
&&
469 dma_fence_remove_callback(s_job
->s_fence
->parent
,
470 &s_job
->s_fence
->cb
)) {
471 dma_fence_put(s_job
->s_fence
->parent
);
472 s_job
->s_fence
->parent
= NULL
;
473 atomic_dec(&sched
->hw_rq_count
);
476 spin_unlock(&sched
->job_list_lock
);
478 if (bad
&& bad
->s_priority
!= DRM_SCHED_PRIORITY_KERNEL
) {
479 atomic_inc(&bad
->karma
);
480 /* don't increase @bad's karma if it's from KERNEL RQ,
481 * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs)
482 * corrupt but keep in mind that kernel jobs always considered good.
484 for (i
= DRM_SCHED_PRIORITY_MIN
; i
< DRM_SCHED_PRIORITY_KERNEL
; i
++ ) {
485 struct drm_sched_rq
*rq
= &sched
->sched_rq
[i
];
487 spin_lock(&rq
->lock
);
488 list_for_each_entry_safe(entity
, tmp
, &rq
->entities
, list
) {
489 if (bad
->s_fence
->scheduled
.context
== entity
->fence_context
) {
490 if (atomic_read(&bad
->karma
) > bad
->sched
->hang_limit
)
492 atomic_set(entity
->guilty
, 1);
496 spin_unlock(&rq
->lock
);
497 if (&entity
->list
!= &rq
->entities
)
502 EXPORT_SYMBOL(drm_sched_hw_job_reset
);
504 void drm_sched_job_recovery(struct drm_gpu_scheduler
*sched
)
506 struct drm_sched_job
*s_job
, *tmp
;
507 bool found_guilty
= false;
510 spin_lock(&sched
->job_list_lock
);
511 s_job
= list_first_entry_or_null(&sched
->ring_mirror_list
,
512 struct drm_sched_job
, node
);
513 if (s_job
&& sched
->timeout
!= MAX_SCHEDULE_TIMEOUT
)
514 schedule_delayed_work(&s_job
->work_tdr
, sched
->timeout
);
516 list_for_each_entry_safe(s_job
, tmp
, &sched
->ring_mirror_list
, node
) {
517 struct drm_sched_fence
*s_fence
= s_job
->s_fence
;
518 struct dma_fence
*fence
;
519 uint64_t guilty_context
;
521 if (!found_guilty
&& atomic_read(&s_job
->karma
) > sched
->hang_limit
) {
523 guilty_context
= s_job
->s_fence
->scheduled
.context
;
526 if (found_guilty
&& s_job
->s_fence
->scheduled
.context
== guilty_context
)
527 dma_fence_set_error(&s_fence
->finished
, -ECANCELED
);
529 spin_unlock(&sched
->job_list_lock
);
530 fence
= sched
->ops
->run_job(s_job
);
531 atomic_inc(&sched
->hw_rq_count
);
533 s_fence
->parent
= dma_fence_get(fence
);
534 r
= dma_fence_add_callback(fence
, &s_fence
->cb
,
535 drm_sched_process_job
);
537 drm_sched_process_job(fence
, &s_fence
->cb
);
539 DRM_ERROR("fence add callback failed (%d)\n",
541 dma_fence_put(fence
);
543 drm_sched_process_job(NULL
, &s_fence
->cb
);
545 spin_lock(&sched
->job_list_lock
);
547 spin_unlock(&sched
->job_list_lock
);
549 EXPORT_SYMBOL(drm_sched_job_recovery
);
551 /* init a sched_job with basic field */
552 int drm_sched_job_init(struct drm_sched_job
*job
,
553 struct drm_gpu_scheduler
*sched
,
554 struct drm_sched_entity
*entity
,
558 job
->s_priority
= entity
->rq
- sched
->sched_rq
;
559 job
->s_fence
= drm_sched_fence_create(entity
, owner
);
562 job
->id
= atomic64_inc_return(&sched
->job_id_count
);
564 INIT_WORK(&job
->finish_work
, drm_sched_job_finish
);
565 INIT_LIST_HEAD(&job
->node
);
566 INIT_DELAYED_WORK(&job
->work_tdr
, drm_sched_job_timedout
);
570 EXPORT_SYMBOL(drm_sched_job_init
);
573 * Return ture if we can push more jobs to the hw.
575 static bool drm_sched_ready(struct drm_gpu_scheduler
*sched
)
577 return atomic_read(&sched
->hw_rq_count
) <
578 sched
->hw_submission_limit
;
582 * Wake up the scheduler when it is ready
584 static void drm_sched_wakeup(struct drm_gpu_scheduler
*sched
)
586 if (drm_sched_ready(sched
))
587 wake_up_interruptible(&sched
->wake_up_worker
);
591 * Select next entity to process
593 static struct drm_sched_entity
*
594 drm_sched_select_entity(struct drm_gpu_scheduler
*sched
)
596 struct drm_sched_entity
*entity
;
599 if (!drm_sched_ready(sched
))
602 /* Kernel run queue has higher priority than normal run queue*/
603 for (i
= DRM_SCHED_PRIORITY_MAX
- 1; i
>= DRM_SCHED_PRIORITY_MIN
; i
--) {
604 entity
= drm_sched_rq_select_entity(&sched
->sched_rq
[i
]);
612 static void drm_sched_process_job(struct dma_fence
*f
, struct dma_fence_cb
*cb
)
614 struct drm_sched_fence
*s_fence
=
615 container_of(cb
, struct drm_sched_fence
, cb
);
616 struct drm_gpu_scheduler
*sched
= s_fence
->sched
;
618 dma_fence_get(&s_fence
->finished
);
619 atomic_dec(&sched
->hw_rq_count
);
620 drm_sched_fence_finished(s_fence
);
622 trace_drm_sched_process_job(s_fence
);
623 dma_fence_put(&s_fence
->finished
);
624 wake_up_interruptible(&sched
->wake_up_worker
);
627 static bool drm_sched_blocked(struct drm_gpu_scheduler
*sched
)
629 if (kthread_should_park()) {
637 static int drm_sched_main(void *param
)
639 struct sched_param sparam
= {.sched_priority
= 1};
640 struct drm_gpu_scheduler
*sched
= (struct drm_gpu_scheduler
*)param
;
643 sched_setscheduler(current
, SCHED_FIFO
, &sparam
);
645 while (!kthread_should_stop()) {
646 struct drm_sched_entity
*entity
= NULL
;
647 struct drm_sched_fence
*s_fence
;
648 struct drm_sched_job
*sched_job
;
649 struct dma_fence
*fence
;
651 wait_event_interruptible(sched
->wake_up_worker
,
652 (!drm_sched_blocked(sched
) &&
653 (entity
= drm_sched_select_entity(sched
))) ||
654 kthread_should_stop());
659 sched_job
= drm_sched_entity_pop_job(entity
);
663 s_fence
= sched_job
->s_fence
;
665 atomic_inc(&sched
->hw_rq_count
);
666 drm_sched_job_begin(sched_job
);
668 fence
= sched
->ops
->run_job(sched_job
);
669 drm_sched_fence_scheduled(s_fence
);
672 s_fence
->parent
= dma_fence_get(fence
);
673 r
= dma_fence_add_callback(fence
, &s_fence
->cb
,
674 drm_sched_process_job
);
676 drm_sched_process_job(fence
, &s_fence
->cb
);
678 DRM_ERROR("fence add callback failed (%d)\n",
680 dma_fence_put(fence
);
682 drm_sched_process_job(NULL
, &s_fence
->cb
);
685 wake_up(&sched
->job_scheduled
);
691 * Init a gpu scheduler instance
693 * @sched The pointer to the scheduler
694 * @ops The backend operations for this scheduler.
695 * @hw_submissions Number of hw submissions to do.
696 * @name Name used for debugging
698 * Return 0 on success, otherwise error code.
700 int drm_sched_init(struct drm_gpu_scheduler
*sched
,
701 const struct drm_sched_backend_ops
*ops
,
702 unsigned hw_submission
,
709 sched
->hw_submission_limit
= hw_submission
;
711 sched
->timeout
= timeout
;
712 sched
->hang_limit
= hang_limit
;
713 for (i
= DRM_SCHED_PRIORITY_MIN
; i
< DRM_SCHED_PRIORITY_MAX
; i
++)
714 drm_sched_rq_init(&sched
->sched_rq
[i
]);
716 init_waitqueue_head(&sched
->wake_up_worker
);
717 init_waitqueue_head(&sched
->job_scheduled
);
718 INIT_LIST_HEAD(&sched
->ring_mirror_list
);
719 spin_lock_init(&sched
->job_list_lock
);
720 atomic_set(&sched
->hw_rq_count
, 0);
721 atomic64_set(&sched
->job_id_count
, 0);
723 /* Each scheduler will run on a seperate kernel thread */
724 sched
->thread
= kthread_run(drm_sched_main
, sched
, sched
->name
);
725 if (IS_ERR(sched
->thread
)) {
726 DRM_ERROR("Failed to create scheduler for %s.\n", name
);
727 return PTR_ERR(sched
->thread
);
732 EXPORT_SYMBOL(drm_sched_init
);
735 * Destroy a gpu scheduler
737 * @sched The pointer to the scheduler
739 void drm_sched_fini(struct drm_gpu_scheduler
*sched
)
742 kthread_stop(sched
->thread
);
744 EXPORT_SYMBOL(drm_sched_fini
);