2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
26 static struct i915_vma
*create_scratch(struct intel_gt
*gt
)
28 struct drm_i915_gem_object
*obj
;
32 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
36 i915_gem_object_set_cache_coherency(obj
, I915_CACHING_CACHED
);
38 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
40 i915_gem_object_put(obj
);
44 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
46 i915_gem_object_put(obj
);
53 static void engine_heartbeat_disable(struct intel_engine_cs
*engine
,
56 *saved
= engine
->props
.heartbeat_interval_ms
;
57 engine
->props
.heartbeat_interval_ms
= 0;
59 intel_engine_pm_get(engine
);
60 intel_engine_park_heartbeat(engine
);
63 static void engine_heartbeat_enable(struct intel_engine_cs
*engine
,
66 intel_engine_pm_put(engine
);
68 engine
->props
.heartbeat_interval_ms
= saved
;
71 static int live_sanitycheck(void *arg
)
73 struct intel_gt
*gt
= arg
;
74 struct intel_engine_cs
*engine
;
75 enum intel_engine_id id
;
76 struct igt_spinner spin
;
79 if (!HAS_LOGICAL_RING_CONTEXTS(gt
->i915
))
82 if (igt_spinner_init(&spin
, gt
))
85 for_each_engine(engine
, gt
, id
) {
86 struct intel_context
*ce
;
87 struct i915_request
*rq
;
89 ce
= intel_context_create(engine
);
95 rq
= igt_spinner_create_request(&spin
, ce
, MI_NOOP
);
101 i915_request_add(rq
);
102 if (!igt_wait_for_spinner(&spin
, rq
)) {
103 GEM_TRACE("spinner failed to start\n");
105 intel_gt_set_wedged(gt
);
110 igt_spinner_end(&spin
);
111 if (igt_flush_test(gt
->i915
)) {
117 intel_context_put(ce
);
122 igt_spinner_fini(&spin
);
126 static int live_unlite_restore(struct intel_gt
*gt
, int prio
)
128 struct intel_engine_cs
*engine
;
129 enum intel_engine_id id
;
130 struct igt_spinner spin
;
134 * Check that we can correctly context switch between 2 instances
135 * on the same engine from the same parent context.
138 if (igt_spinner_init(&spin
, gt
))
142 for_each_engine(engine
, gt
, id
) {
143 struct intel_context
*ce
[2] = {};
144 struct i915_request
*rq
[2];
145 struct igt_live_test t
;
149 if (prio
&& !intel_engine_has_preemption(engine
))
152 if (!intel_engine_can_store_dword(engine
))
155 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
159 engine_heartbeat_disable(engine
, &saved
);
161 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
162 struct intel_context
*tmp
;
164 tmp
= intel_context_create(engine
);
170 err
= intel_context_pin(tmp
);
172 intel_context_put(tmp
);
177 * Setup the pair of contexts such that if we
178 * lite-restore using the RING_TAIL from ce[1] it
179 * will execute garbage from ce[0]->ring.
181 memset(tmp
->ring
->vaddr
,
182 POISON_INUSE
, /* IPEHR: 0x5a5a5a5a [hung!] */
183 tmp
->ring
->vma
->size
);
187 GEM_BUG_ON(!ce
[1]->ring
->size
);
188 intel_ring_reset(ce
[1]->ring
, ce
[1]->ring
->size
/ 2);
189 __execlists_update_reg_state(ce
[1], engine
);
191 rq
[0] = igt_spinner_create_request(&spin
, ce
[0], MI_ARB_CHECK
);
193 err
= PTR_ERR(rq
[0]);
197 i915_request_get(rq
[0]);
198 i915_request_add(rq
[0]);
199 GEM_BUG_ON(rq
[0]->postfix
> ce
[1]->ring
->emit
);
201 if (!igt_wait_for_spinner(&spin
, rq
[0])) {
202 i915_request_put(rq
[0]);
206 rq
[1] = i915_request_create(ce
[1]);
208 err
= PTR_ERR(rq
[1]);
209 i915_request_put(rq
[0]);
215 * Ensure we do the switch to ce[1] on completion.
217 * rq[0] is already submitted, so this should reduce
218 * to a no-op (a wait on a request on the same engine
219 * uses the submit fence, not the completion fence),
220 * but it will install a dependency on rq[1] for rq[0]
221 * that will prevent the pair being reordered by
224 i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
227 i915_request_get(rq
[1]);
228 i915_request_add(rq
[1]);
229 GEM_BUG_ON(rq
[1]->postfix
<= rq
[0]->postfix
);
230 i915_request_put(rq
[0]);
233 struct i915_sched_attr attr
= {
237 /* Alternatively preempt the spinner with ce[1] */
238 engine
->schedule(rq
[1], &attr
);
241 /* And switch back to ce[0] for good measure */
242 rq
[0] = i915_request_create(ce
[0]);
244 err
= PTR_ERR(rq
[0]);
245 i915_request_put(rq
[1]);
249 i915_request_await_dma_fence(rq
[0], &rq
[1]->fence
);
250 i915_request_get(rq
[0]);
251 i915_request_add(rq
[0]);
252 GEM_BUG_ON(rq
[0]->postfix
> rq
[1]->postfix
);
253 i915_request_put(rq
[1]);
254 i915_request_put(rq
[0]);
257 tasklet_kill(&engine
->execlists
.tasklet
); /* flush submission */
258 igt_spinner_end(&spin
);
259 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
260 if (IS_ERR_OR_NULL(ce
[n
]))
263 intel_context_unpin(ce
[n
]);
264 intel_context_put(ce
[n
]);
267 engine_heartbeat_enable(engine
, saved
);
268 if (igt_live_test_end(&t
))
274 igt_spinner_fini(&spin
);
278 static int live_unlite_switch(void *arg
)
280 return live_unlite_restore(arg
, 0);
283 static int live_unlite_preempt(void *arg
)
285 return live_unlite_restore(arg
, I915_USER_PRIORITY(I915_PRIORITY_MAX
));
289 emit_semaphore_chain(struct i915_request
*rq
, struct i915_vma
*vma
, int idx
)
293 cs
= intel_ring_begin(rq
, 10);
297 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
299 *cs
++ = MI_SEMAPHORE_WAIT
|
300 MI_SEMAPHORE_GLOBAL_GTT
|
302 MI_SEMAPHORE_SAD_NEQ_SDD
;
304 *cs
++ = i915_ggtt_offset(vma
) + 4 * idx
;
308 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
309 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
319 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
321 intel_ring_advance(rq
, cs
);
325 static struct i915_request
*
326 semaphore_queue(struct intel_engine_cs
*engine
, struct i915_vma
*vma
, int idx
)
328 struct intel_context
*ce
;
329 struct i915_request
*rq
;
332 ce
= intel_context_create(engine
);
336 rq
= intel_context_create_request(ce
);
341 if (rq
->engine
->emit_init_breadcrumb
)
342 err
= rq
->engine
->emit_init_breadcrumb(rq
);
344 err
= emit_semaphore_chain(rq
, vma
, idx
);
346 i915_request_get(rq
);
347 i915_request_add(rq
);
352 intel_context_put(ce
);
357 release_queue(struct intel_engine_cs
*engine
,
358 struct i915_vma
*vma
,
361 struct i915_sched_attr attr
= {
364 struct i915_request
*rq
;
367 rq
= intel_engine_create_kernel_request(engine
);
371 cs
= intel_ring_begin(rq
, 4);
373 i915_request_add(rq
);
377 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
378 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
382 intel_ring_advance(rq
, cs
);
384 i915_request_get(rq
);
385 i915_request_add(rq
);
388 engine
->schedule(rq
, &attr
);
389 local_bh_enable(); /* kick tasklet */
391 i915_request_put(rq
);
397 slice_semaphore_queue(struct intel_engine_cs
*outer
,
398 struct i915_vma
*vma
,
401 struct intel_engine_cs
*engine
;
402 struct i915_request
*head
;
403 enum intel_engine_id id
;
406 head
= semaphore_queue(outer
, vma
, n
++);
408 return PTR_ERR(head
);
410 for_each_engine(engine
, outer
->gt
, id
) {
411 for (i
= 0; i
< count
; i
++) {
412 struct i915_request
*rq
;
414 rq
= semaphore_queue(engine
, vma
, n
++);
420 i915_request_put(rq
);
424 err
= release_queue(outer
, vma
, n
, INT_MAX
);
428 if (i915_request_wait(head
, 0,
429 2 * RUNTIME_INFO(outer
->i915
)->num_engines
* (count
+ 2) * (count
+ 3)) < 0) {
430 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
433 intel_gt_set_wedged(outer
->gt
);
438 i915_request_put(head
);
442 static int live_timeslice_preempt(void *arg
)
444 struct intel_gt
*gt
= arg
;
445 struct drm_i915_gem_object
*obj
;
446 struct i915_vma
*vma
;
452 * If a request takes too long, we would like to give other users
453 * a fair go on the GPU. In particular, users may create batches
454 * that wait upon external input, where that input may even be
455 * supplied by another GPU job. To avoid blocking forever, we
456 * need to preempt the current task and replace it with another
459 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
462 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
466 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
472 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
474 err
= PTR_ERR(vaddr
);
478 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
482 for_each_prime_number_from(count
, 1, 16) {
483 struct intel_engine_cs
*engine
;
484 enum intel_engine_id id
;
486 for_each_engine(engine
, gt
, id
) {
489 if (!intel_engine_has_preemption(engine
))
492 memset(vaddr
, 0, PAGE_SIZE
);
494 engine_heartbeat_disable(engine
, &saved
);
495 err
= slice_semaphore_queue(engine
, vma
, count
);
496 engine_heartbeat_enable(engine
, saved
);
500 if (igt_flush_test(gt
->i915
)) {
510 i915_gem_object_unpin_map(obj
);
512 i915_gem_object_put(obj
);
516 static struct i915_request
*nop_request(struct intel_engine_cs
*engine
)
518 struct i915_request
*rq
;
520 rq
= intel_engine_create_kernel_request(engine
);
524 i915_request_get(rq
);
525 i915_request_add(rq
);
530 static int wait_for_submit(struct intel_engine_cs
*engine
,
531 struct i915_request
*rq
,
532 unsigned long timeout
)
537 intel_engine_flush_submission(engine
);
538 if (i915_request_is_active(rq
))
540 } while (time_before(jiffies
, timeout
));
545 static long timeslice_threshold(const struct intel_engine_cs
*engine
)
547 return 2 * msecs_to_jiffies_timeout(timeslice(engine
)) + 1;
550 static int live_timeslice_queue(void *arg
)
552 struct intel_gt
*gt
= arg
;
553 struct drm_i915_gem_object
*obj
;
554 struct intel_engine_cs
*engine
;
555 enum intel_engine_id id
;
556 struct i915_vma
*vma
;
561 * Make sure that even if ELSP[0] and ELSP[1] are filled with
562 * timeslicing between them disabled, we *do* enable timeslicing
563 * if the queue demands it. (Normally, we do not submit if
564 * ELSP[1] is already occupied, so must rely on timeslicing to
565 * eject ELSP[0] in favour of the queue.)
567 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
570 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
574 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
580 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
582 err
= PTR_ERR(vaddr
);
586 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
590 for_each_engine(engine
, gt
, id
) {
591 struct i915_sched_attr attr
= {
592 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
594 struct i915_request
*rq
, *nop
;
597 if (!intel_engine_has_preemption(engine
))
600 engine_heartbeat_disable(engine
, &saved
);
601 memset(vaddr
, 0, PAGE_SIZE
);
603 /* ELSP[0]: semaphore wait */
604 rq
= semaphore_queue(engine
, vma
, 0);
609 engine
->schedule(rq
, &attr
);
610 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
612 pr_err("%s: Timed out trying to submit semaphores\n",
617 /* ELSP[1]: nop request */
618 nop
= nop_request(engine
);
623 err
= wait_for_submit(engine
, nop
, HZ
/ 2);
624 i915_request_put(nop
);
626 pr_err("%s: Timed out trying to submit nop\n",
631 GEM_BUG_ON(i915_request_completed(rq
));
632 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
634 /* Queue: semaphore signal, matching priority as semaphore */
635 err
= release_queue(engine
, vma
, 1, effective_prio(rq
));
639 intel_engine_flush_submission(engine
);
640 if (!READ_ONCE(engine
->execlists
.timer
.expires
) &&
641 !i915_request_completed(rq
)) {
642 struct drm_printer p
=
643 drm_info_printer(gt
->i915
->drm
.dev
);
645 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
647 intel_engine_dump(engine
, &p
,
648 "%s\n", engine
->name
);
651 memset(vaddr
, 0xff, PAGE_SIZE
);
655 /* Timeslice every jiffy, so within 2 we should signal */
656 if (i915_request_wait(rq
, 0, timeslice_threshold(engine
)) < 0) {
657 struct drm_printer p
=
658 drm_info_printer(gt
->i915
->drm
.dev
);
660 pr_err("%s: Failed to timeslice into queue\n",
662 intel_engine_dump(engine
, &p
,
663 "%s\n", engine
->name
);
665 memset(vaddr
, 0xff, PAGE_SIZE
);
669 i915_request_put(rq
);
671 engine_heartbeat_enable(engine
, saved
);
678 i915_gem_object_unpin_map(obj
);
680 i915_gem_object_put(obj
);
684 static int live_busywait_preempt(void *arg
)
686 struct intel_gt
*gt
= arg
;
687 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
688 struct intel_engine_cs
*engine
;
689 struct drm_i915_gem_object
*obj
;
690 struct i915_vma
*vma
;
691 enum intel_engine_id id
;
696 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
697 * preempt the busywaits used to synchronise between rings.
700 ctx_hi
= kernel_context(gt
->i915
);
703 ctx_hi
->sched
.priority
=
704 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
706 ctx_lo
= kernel_context(gt
->i915
);
709 ctx_lo
->sched
.priority
=
710 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
712 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
718 map
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
724 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
730 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
734 for_each_engine(engine
, gt
, id
) {
735 struct i915_request
*lo
, *hi
;
736 struct igt_live_test t
;
739 if (!intel_engine_has_preemption(engine
))
742 if (!intel_engine_can_store_dword(engine
))
745 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
751 * We create two requests. The low priority request
752 * busywaits on a semaphore (inside the ringbuffer where
753 * is should be preemptible) and the high priority requests
754 * uses a MI_STORE_DWORD_IMM to update the semaphore value
755 * allowing the first request to complete. If preemption
756 * fails, we hang instead.
759 lo
= igt_request_alloc(ctx_lo
, engine
);
765 cs
= intel_ring_begin(lo
, 8);
768 i915_request_add(lo
);
772 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
773 *cs
++ = i915_ggtt_offset(vma
);
777 /* XXX Do we need a flush + invalidate here? */
779 *cs
++ = MI_SEMAPHORE_WAIT
|
780 MI_SEMAPHORE_GLOBAL_GTT
|
782 MI_SEMAPHORE_SAD_EQ_SDD
;
784 *cs
++ = i915_ggtt_offset(vma
);
787 intel_ring_advance(lo
, cs
);
789 i915_request_get(lo
);
790 i915_request_add(lo
);
792 if (wait_for(READ_ONCE(*map
), 10)) {
793 i915_request_put(lo
);
798 /* Low priority request should be busywaiting now */
799 if (i915_request_wait(lo
, 0, 1) != -ETIME
) {
800 i915_request_put(lo
);
801 pr_err("%s: Busywaiting request did not!\n",
807 hi
= igt_request_alloc(ctx_hi
, engine
);
810 i915_request_put(lo
);
814 cs
= intel_ring_begin(hi
, 4);
817 i915_request_add(hi
);
818 i915_request_put(lo
);
822 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
823 *cs
++ = i915_ggtt_offset(vma
);
827 intel_ring_advance(hi
, cs
);
828 i915_request_add(hi
);
830 if (i915_request_wait(lo
, 0, HZ
/ 5) < 0) {
831 struct drm_printer p
= drm_info_printer(gt
->i915
->drm
.dev
);
833 pr_err("%s: Failed to preempt semaphore busywait!\n",
836 intel_engine_dump(engine
, &p
, "%s\n", engine
->name
);
839 i915_request_put(lo
);
840 intel_gt_set_wedged(gt
);
844 GEM_BUG_ON(READ_ONCE(*map
));
845 i915_request_put(lo
);
847 if (igt_live_test_end(&t
)) {
857 i915_gem_object_unpin_map(obj
);
859 i915_gem_object_put(obj
);
861 kernel_context_close(ctx_lo
);
863 kernel_context_close(ctx_hi
);
867 static struct i915_request
*
868 spinner_create_request(struct igt_spinner
*spin
,
869 struct i915_gem_context
*ctx
,
870 struct intel_engine_cs
*engine
,
873 struct intel_context
*ce
;
874 struct i915_request
*rq
;
876 ce
= i915_gem_context_get_engine(ctx
, engine
->legacy_idx
);
880 rq
= igt_spinner_create_request(spin
, ce
, arb
);
881 intel_context_put(ce
);
885 static int live_preempt(void *arg
)
887 struct intel_gt
*gt
= arg
;
888 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
889 struct igt_spinner spin_hi
, spin_lo
;
890 struct intel_engine_cs
*engine
;
891 enum intel_engine_id id
;
894 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
897 if (!(gt
->i915
->caps
.scheduler
& I915_SCHEDULER_CAP_PREEMPTION
))
898 pr_err("Logical preemption supported, but not exposed\n");
900 if (igt_spinner_init(&spin_hi
, gt
))
903 if (igt_spinner_init(&spin_lo
, gt
))
906 ctx_hi
= kernel_context(gt
->i915
);
909 ctx_hi
->sched
.priority
=
910 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
912 ctx_lo
= kernel_context(gt
->i915
);
915 ctx_lo
->sched
.priority
=
916 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
918 for_each_engine(engine
, gt
, id
) {
919 struct igt_live_test t
;
920 struct i915_request
*rq
;
922 if (!intel_engine_has_preemption(engine
))
925 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
930 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
937 i915_request_add(rq
);
938 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
939 GEM_TRACE("lo spinner failed to start\n");
941 intel_gt_set_wedged(gt
);
946 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
949 igt_spinner_end(&spin_lo
);
954 i915_request_add(rq
);
955 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
956 GEM_TRACE("hi spinner failed to start\n");
958 intel_gt_set_wedged(gt
);
963 igt_spinner_end(&spin_hi
);
964 igt_spinner_end(&spin_lo
);
966 if (igt_live_test_end(&t
)) {
974 kernel_context_close(ctx_lo
);
976 kernel_context_close(ctx_hi
);
978 igt_spinner_fini(&spin_lo
);
980 igt_spinner_fini(&spin_hi
);
984 static int live_late_preempt(void *arg
)
986 struct intel_gt
*gt
= arg
;
987 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
988 struct igt_spinner spin_hi
, spin_lo
;
989 struct intel_engine_cs
*engine
;
990 struct i915_sched_attr attr
= {};
991 enum intel_engine_id id
;
994 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
997 if (igt_spinner_init(&spin_hi
, gt
))
1000 if (igt_spinner_init(&spin_lo
, gt
))
1003 ctx_hi
= kernel_context(gt
->i915
);
1007 ctx_lo
= kernel_context(gt
->i915
);
1011 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1012 ctx_lo
->sched
.priority
= I915_USER_PRIORITY(1);
1014 for_each_engine(engine
, gt
, id
) {
1015 struct igt_live_test t
;
1016 struct i915_request
*rq
;
1018 if (!intel_engine_has_preemption(engine
))
1021 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1026 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
1033 i915_request_add(rq
);
1034 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
1035 pr_err("First context failed to start\n");
1039 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
1042 igt_spinner_end(&spin_lo
);
1047 i915_request_add(rq
);
1048 if (igt_wait_for_spinner(&spin_hi
, rq
)) {
1049 pr_err("Second context overtook first?\n");
1053 attr
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1054 engine
->schedule(rq
, &attr
);
1056 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
1057 pr_err("High priority context failed to preempt the low priority context\n");
1062 igt_spinner_end(&spin_hi
);
1063 igt_spinner_end(&spin_lo
);
1065 if (igt_live_test_end(&t
)) {
1073 kernel_context_close(ctx_lo
);
1075 kernel_context_close(ctx_hi
);
1077 igt_spinner_fini(&spin_lo
);
1079 igt_spinner_fini(&spin_hi
);
1083 igt_spinner_end(&spin_hi
);
1084 igt_spinner_end(&spin_lo
);
1085 intel_gt_set_wedged(gt
);
1090 struct preempt_client
{
1091 struct igt_spinner spin
;
1092 struct i915_gem_context
*ctx
;
1095 static int preempt_client_init(struct intel_gt
*gt
, struct preempt_client
*c
)
1097 c
->ctx
= kernel_context(gt
->i915
);
1101 if (igt_spinner_init(&c
->spin
, gt
))
1107 kernel_context_close(c
->ctx
);
1111 static void preempt_client_fini(struct preempt_client
*c
)
1113 igt_spinner_fini(&c
->spin
);
1114 kernel_context_close(c
->ctx
);
1117 static int live_nopreempt(void *arg
)
1119 struct intel_gt
*gt
= arg
;
1120 struct intel_engine_cs
*engine
;
1121 struct preempt_client a
, b
;
1122 enum intel_engine_id id
;
1126 * Verify that we can disable preemption for an individual request
1127 * that may be being observed and not want to be interrupted.
1130 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1133 if (preempt_client_init(gt
, &a
))
1135 if (preempt_client_init(gt
, &b
))
1137 b
.ctx
->sched
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1139 for_each_engine(engine
, gt
, id
) {
1140 struct i915_request
*rq_a
, *rq_b
;
1142 if (!intel_engine_has_preemption(engine
))
1145 engine
->execlists
.preempt_hang
.count
= 0;
1147 rq_a
= spinner_create_request(&a
.spin
,
1151 err
= PTR_ERR(rq_a
);
1155 /* Low priority client, but unpreemptable! */
1156 __set_bit(I915_FENCE_FLAG_NOPREEMPT
, &rq_a
->fence
.flags
);
1158 i915_request_add(rq_a
);
1159 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
1160 pr_err("First client failed to start\n");
1164 rq_b
= spinner_create_request(&b
.spin
,
1168 err
= PTR_ERR(rq_b
);
1172 i915_request_add(rq_b
);
1174 /* B is much more important than A! (But A is unpreemptable.) */
1175 GEM_BUG_ON(rq_prio(rq_b
) <= rq_prio(rq_a
));
1177 /* Wait long enough for preemption and timeslicing */
1178 if (igt_wait_for_spinner(&b
.spin
, rq_b
)) {
1179 pr_err("Second client started too early!\n");
1183 igt_spinner_end(&a
.spin
);
1185 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
1186 pr_err("Second client failed to start\n");
1190 igt_spinner_end(&b
.spin
);
1192 if (engine
->execlists
.preempt_hang
.count
) {
1193 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1194 engine
->execlists
.preempt_hang
.count
);
1199 if (igt_flush_test(gt
->i915
))
1205 preempt_client_fini(&b
);
1207 preempt_client_fini(&a
);
1211 igt_spinner_end(&b
.spin
);
1212 igt_spinner_end(&a
.spin
);
1213 intel_gt_set_wedged(gt
);
1218 struct live_preempt_cancel
{
1219 struct intel_engine_cs
*engine
;
1220 struct preempt_client a
, b
;
1223 static int __cancel_active0(struct live_preempt_cancel
*arg
)
1225 struct i915_request
*rq
;
1226 struct igt_live_test t
;
1229 /* Preempt cancel of ELSP0 */
1230 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1231 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1232 __func__
, arg
->engine
->name
))
1235 rq
= spinner_create_request(&arg
->a
.spin
,
1236 arg
->a
.ctx
, arg
->engine
,
1241 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
1242 i915_request_get(rq
);
1243 i915_request_add(rq
);
1244 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
1249 intel_context_set_banned(rq
->context
);
1250 err
= intel_engine_pulse(arg
->engine
);
1254 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
1259 if (rq
->fence
.error
!= -EIO
) {
1260 pr_err("Cancelled inflight0 request did not report -EIO\n");
1266 i915_request_put(rq
);
1267 if (igt_live_test_end(&t
))
1272 static int __cancel_active1(struct live_preempt_cancel
*arg
)
1274 struct i915_request
*rq
[2] = {};
1275 struct igt_live_test t
;
1278 /* Preempt cancel of ELSP1 */
1279 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1280 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1281 __func__
, arg
->engine
->name
))
1284 rq
[0] = spinner_create_request(&arg
->a
.spin
,
1285 arg
->a
.ctx
, arg
->engine
,
1286 MI_NOOP
); /* no preemption */
1288 return PTR_ERR(rq
[0]);
1290 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
1291 i915_request_get(rq
[0]);
1292 i915_request_add(rq
[0]);
1293 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
1298 rq
[1] = spinner_create_request(&arg
->b
.spin
,
1299 arg
->b
.ctx
, arg
->engine
,
1301 if (IS_ERR(rq
[1])) {
1302 err
= PTR_ERR(rq
[1]);
1306 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
1307 i915_request_get(rq
[1]);
1308 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
1309 i915_request_add(rq
[1]);
1313 intel_context_set_banned(rq
[1]->context
);
1314 err
= intel_engine_pulse(arg
->engine
);
1318 igt_spinner_end(&arg
->a
.spin
);
1319 if (i915_request_wait(rq
[1], 0, HZ
/ 5) < 0) {
1324 if (rq
[0]->fence
.error
!= 0) {
1325 pr_err("Normal inflight0 request did not complete\n");
1330 if (rq
[1]->fence
.error
!= -EIO
) {
1331 pr_err("Cancelled inflight1 request did not report -EIO\n");
1337 i915_request_put(rq
[1]);
1338 i915_request_put(rq
[0]);
1339 if (igt_live_test_end(&t
))
1344 static int __cancel_queued(struct live_preempt_cancel
*arg
)
1346 struct i915_request
*rq
[3] = {};
1347 struct igt_live_test t
;
1350 /* Full ELSP and one in the wings */
1351 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1352 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
1353 __func__
, arg
->engine
->name
))
1356 rq
[0] = spinner_create_request(&arg
->a
.spin
,
1357 arg
->a
.ctx
, arg
->engine
,
1360 return PTR_ERR(rq
[0]);
1362 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
1363 i915_request_get(rq
[0]);
1364 i915_request_add(rq
[0]);
1365 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
1370 rq
[1] = igt_request_alloc(arg
->b
.ctx
, arg
->engine
);
1371 if (IS_ERR(rq
[1])) {
1372 err
= PTR_ERR(rq
[1]);
1376 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
1377 i915_request_get(rq
[1]);
1378 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
1379 i915_request_add(rq
[1]);
1383 rq
[2] = spinner_create_request(&arg
->b
.spin
,
1384 arg
->a
.ctx
, arg
->engine
,
1386 if (IS_ERR(rq
[2])) {
1387 err
= PTR_ERR(rq
[2]);
1391 i915_request_get(rq
[2]);
1392 err
= i915_request_await_dma_fence(rq
[2], &rq
[1]->fence
);
1393 i915_request_add(rq
[2]);
1397 intel_context_set_banned(rq
[2]->context
);
1398 err
= intel_engine_pulse(arg
->engine
);
1402 if (i915_request_wait(rq
[2], 0, HZ
/ 5) < 0) {
1407 if (rq
[0]->fence
.error
!= -EIO
) {
1408 pr_err("Cancelled inflight0 request did not report -EIO\n");
1413 if (rq
[1]->fence
.error
!= 0) {
1414 pr_err("Normal inflight1 request did not complete\n");
1419 if (rq
[2]->fence
.error
!= -EIO
) {
1420 pr_err("Cancelled queued request did not report -EIO\n");
1426 i915_request_put(rq
[2]);
1427 i915_request_put(rq
[1]);
1428 i915_request_put(rq
[0]);
1429 if (igt_live_test_end(&t
))
1434 static int __cancel_hostile(struct live_preempt_cancel
*arg
)
1436 struct i915_request
*rq
;
1439 /* Preempt cancel non-preemptible spinner in ELSP0 */
1440 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
1443 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
1444 rq
= spinner_create_request(&arg
->a
.spin
,
1445 arg
->a
.ctx
, arg
->engine
,
1446 MI_NOOP
); /* preemption disabled */
1450 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
1451 i915_request_get(rq
);
1452 i915_request_add(rq
);
1453 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
1458 intel_context_set_banned(rq
->context
);
1459 err
= intel_engine_pulse(arg
->engine
); /* force reset */
1463 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
1468 if (rq
->fence
.error
!= -EIO
) {
1469 pr_err("Cancelled inflight0 request did not report -EIO\n");
1475 i915_request_put(rq
);
1476 if (igt_flush_test(arg
->engine
->i915
))
1481 static int live_preempt_cancel(void *arg
)
1483 struct intel_gt
*gt
= arg
;
1484 struct live_preempt_cancel data
;
1485 enum intel_engine_id id
;
1489 * To cancel an inflight context, we need to first remove it from the
1490 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1493 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1496 if (preempt_client_init(gt
, &data
.a
))
1498 if (preempt_client_init(gt
, &data
.b
))
1501 for_each_engine(data
.engine
, gt
, id
) {
1502 if (!intel_engine_has_preemption(data
.engine
))
1505 err
= __cancel_active0(&data
);
1509 err
= __cancel_active1(&data
);
1513 err
= __cancel_queued(&data
);
1517 err
= __cancel_hostile(&data
);
1524 preempt_client_fini(&data
.b
);
1526 preempt_client_fini(&data
.a
);
1531 igt_spinner_end(&data
.b
.spin
);
1532 igt_spinner_end(&data
.a
.spin
);
1533 intel_gt_set_wedged(gt
);
1537 static int live_suppress_self_preempt(void *arg
)
1539 struct intel_gt
*gt
= arg
;
1540 struct intel_engine_cs
*engine
;
1541 struct i915_sched_attr attr
= {
1542 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
)
1544 struct preempt_client a
, b
;
1545 enum intel_engine_id id
;
1549 * Verify that if a preemption request does not cause a change in
1550 * the current execution order, the preempt-to-idle injection is
1551 * skipped and that we do not accidentally apply it after the CS
1555 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1558 if (USES_GUC_SUBMISSION(gt
->i915
))
1559 return 0; /* presume black blox */
1561 if (intel_vgpu_active(gt
->i915
))
1562 return 0; /* GVT forces single port & request submission */
1564 if (preempt_client_init(gt
, &a
))
1566 if (preempt_client_init(gt
, &b
))
1569 for_each_engine(engine
, gt
, id
) {
1570 struct i915_request
*rq_a
, *rq_b
;
1573 if (!intel_engine_has_preemption(engine
))
1576 if (igt_flush_test(gt
->i915
))
1579 intel_engine_pm_get(engine
);
1580 engine
->execlists
.preempt_hang
.count
= 0;
1582 rq_a
= spinner_create_request(&a
.spin
,
1586 err
= PTR_ERR(rq_a
);
1587 intel_engine_pm_put(engine
);
1591 i915_request_add(rq_a
);
1592 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
1593 pr_err("First client failed to start\n");
1594 intel_engine_pm_put(engine
);
1598 /* Keep postponing the timer to avoid premature slicing */
1599 mod_timer(&engine
->execlists
.timer
, jiffies
+ HZ
);
1600 for (depth
= 0; depth
< 8; depth
++) {
1601 rq_b
= spinner_create_request(&b
.spin
,
1605 err
= PTR_ERR(rq_b
);
1606 intel_engine_pm_put(engine
);
1609 i915_request_add(rq_b
);
1611 GEM_BUG_ON(i915_request_completed(rq_a
));
1612 engine
->schedule(rq_a
, &attr
);
1613 igt_spinner_end(&a
.spin
);
1615 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
1616 pr_err("Second client failed to start\n");
1617 intel_engine_pm_put(engine
);
1624 igt_spinner_end(&a
.spin
);
1626 if (engine
->execlists
.preempt_hang
.count
) {
1627 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1629 engine
->execlists
.preempt_hang
.count
,
1631 intel_engine_pm_put(engine
);
1636 intel_engine_pm_put(engine
);
1637 if (igt_flush_test(gt
->i915
))
1643 preempt_client_fini(&b
);
1645 preempt_client_fini(&a
);
1649 igt_spinner_end(&b
.spin
);
1650 igt_spinner_end(&a
.spin
);
1651 intel_gt_set_wedged(gt
);
1656 static int __i915_sw_fence_call
1657 dummy_notify(struct i915_sw_fence
*fence
, enum i915_sw_fence_notify state
)
1662 static struct i915_request
*dummy_request(struct intel_engine_cs
*engine
)
1664 struct i915_request
*rq
;
1666 rq
= kzalloc(sizeof(*rq
), GFP_KERNEL
);
1670 rq
->engine
= engine
;
1672 spin_lock_init(&rq
->lock
);
1673 INIT_LIST_HEAD(&rq
->fence
.cb_list
);
1674 rq
->fence
.lock
= &rq
->lock
;
1675 rq
->fence
.ops
= &i915_fence_ops
;
1677 i915_sched_node_init(&rq
->sched
);
1679 /* mark this request as permanently incomplete */
1680 rq
->fence
.seqno
= 1;
1681 BUILD_BUG_ON(sizeof(rq
->fence
.seqno
) != 8); /* upper 32b == 0 */
1682 rq
->hwsp_seqno
= (u32
*)&rq
->fence
.seqno
+ 1;
1683 GEM_BUG_ON(i915_request_completed(rq
));
1685 i915_sw_fence_init(&rq
->submit
, dummy_notify
);
1686 set_bit(I915_FENCE_FLAG_ACTIVE
, &rq
->fence
.flags
);
1688 spin_lock_init(&rq
->lock
);
1689 rq
->fence
.lock
= &rq
->lock
;
1690 INIT_LIST_HEAD(&rq
->fence
.cb_list
);
1695 static void dummy_request_free(struct i915_request
*dummy
)
1697 /* We have to fake the CS interrupt to kick the next request */
1698 i915_sw_fence_commit(&dummy
->submit
);
1700 i915_request_mark_complete(dummy
);
1701 dma_fence_signal(&dummy
->fence
);
1703 i915_sched_node_fini(&dummy
->sched
);
1704 i915_sw_fence_fini(&dummy
->submit
);
1706 dma_fence_free(&dummy
->fence
);
1709 static int live_suppress_wait_preempt(void *arg
)
1711 struct intel_gt
*gt
= arg
;
1712 struct preempt_client client
[4];
1713 struct i915_request
*rq
[ARRAY_SIZE(client
)] = {};
1714 struct intel_engine_cs
*engine
;
1715 enum intel_engine_id id
;
1720 * Waiters are given a little priority nudge, but not enough
1721 * to actually cause any preemption. Double check that we do
1722 * not needlessly generate preempt-to-idle cycles.
1725 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1728 if (preempt_client_init(gt
, &client
[0])) /* ELSP[0] */
1730 if (preempt_client_init(gt
, &client
[1])) /* ELSP[1] */
1732 if (preempt_client_init(gt
, &client
[2])) /* head of queue */
1734 if (preempt_client_init(gt
, &client
[3])) /* bystander */
1737 for_each_engine(engine
, gt
, id
) {
1740 if (!intel_engine_has_preemption(engine
))
1743 if (!engine
->emit_init_breadcrumb
)
1746 for (depth
= 0; depth
< ARRAY_SIZE(client
); depth
++) {
1747 struct i915_request
*dummy
;
1749 engine
->execlists
.preempt_hang
.count
= 0;
1751 dummy
= dummy_request(engine
);
1755 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
1756 struct i915_request
*this;
1758 this = spinner_create_request(&client
[i
].spin
,
1759 client
[i
].ctx
, engine
,
1762 err
= PTR_ERR(this);
1766 /* Disable NEWCLIENT promotion */
1767 __i915_active_fence_set(&i915_request_timeline(this)->last_request
,
1770 rq
[i
] = i915_request_get(this);
1771 i915_request_add(this);
1774 dummy_request_free(dummy
);
1776 GEM_BUG_ON(i915_request_completed(rq
[0]));
1777 if (!igt_wait_for_spinner(&client
[0].spin
, rq
[0])) {
1778 pr_err("%s: First client failed to start\n",
1782 GEM_BUG_ON(!i915_request_started(rq
[0]));
1784 if (i915_request_wait(rq
[depth
],
1787 pr_err("%s: Waiter depth:%d completed!\n",
1788 engine
->name
, depth
);
1792 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
1793 igt_spinner_end(&client
[i
].spin
);
1794 i915_request_put(rq
[i
]);
1798 if (igt_flush_test(gt
->i915
))
1801 if (engine
->execlists
.preempt_hang
.count
) {
1802 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1804 engine
->execlists
.preempt_hang
.count
,
1814 preempt_client_fini(&client
[3]);
1816 preempt_client_fini(&client
[2]);
1818 preempt_client_fini(&client
[1]);
1820 preempt_client_fini(&client
[0]);
1824 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
1825 igt_spinner_end(&client
[i
].spin
);
1826 i915_request_put(rq
[i
]);
1828 intel_gt_set_wedged(gt
);
1833 static int live_chain_preempt(void *arg
)
1835 struct intel_gt
*gt
= arg
;
1836 struct intel_engine_cs
*engine
;
1837 struct preempt_client hi
, lo
;
1838 enum intel_engine_id id
;
1842 * Build a chain AB...BA between two contexts (A, B) and request
1843 * preemption of the last request. It should then complete before
1844 * the previously submitted spinner in B.
1847 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1850 if (preempt_client_init(gt
, &hi
))
1853 if (preempt_client_init(gt
, &lo
))
1856 for_each_engine(engine
, gt
, id
) {
1857 struct i915_sched_attr attr
= {
1858 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
1860 struct igt_live_test t
;
1861 struct i915_request
*rq
;
1862 int ring_size
, count
, i
;
1864 if (!intel_engine_has_preemption(engine
))
1867 rq
= spinner_create_request(&lo
.spin
,
1873 i915_request_get(rq
);
1874 i915_request_add(rq
);
1876 ring_size
= rq
->wa_tail
- rq
->head
;
1878 ring_size
+= rq
->ring
->size
;
1879 ring_size
= rq
->ring
->size
/ ring_size
;
1880 pr_debug("%s(%s): Using maximum of %d requests\n",
1881 __func__
, engine
->name
, ring_size
);
1883 igt_spinner_end(&lo
.spin
);
1884 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
1885 pr_err("Timed out waiting to flush %s\n", engine
->name
);
1886 i915_request_put(rq
);
1889 i915_request_put(rq
);
1891 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1896 for_each_prime_number_from(count
, 1, ring_size
) {
1897 rq
= spinner_create_request(&hi
.spin
,
1902 i915_request_add(rq
);
1903 if (!igt_wait_for_spinner(&hi
.spin
, rq
))
1906 rq
= spinner_create_request(&lo
.spin
,
1911 i915_request_add(rq
);
1913 for (i
= 0; i
< count
; i
++) {
1914 rq
= igt_request_alloc(lo
.ctx
, engine
);
1917 i915_request_add(rq
);
1920 rq
= igt_request_alloc(hi
.ctx
, engine
);
1924 i915_request_get(rq
);
1925 i915_request_add(rq
);
1926 engine
->schedule(rq
, &attr
);
1928 igt_spinner_end(&hi
.spin
);
1929 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
1930 struct drm_printer p
=
1931 drm_info_printer(gt
->i915
->drm
.dev
);
1933 pr_err("Failed to preempt over chain of %d\n",
1935 intel_engine_dump(engine
, &p
,
1936 "%s\n", engine
->name
);
1937 i915_request_put(rq
);
1940 igt_spinner_end(&lo
.spin
);
1941 i915_request_put(rq
);
1943 rq
= igt_request_alloc(lo
.ctx
, engine
);
1947 i915_request_get(rq
);
1948 i915_request_add(rq
);
1950 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
1951 struct drm_printer p
=
1952 drm_info_printer(gt
->i915
->drm
.dev
);
1954 pr_err("Failed to flush low priority chain of %d requests\n",
1956 intel_engine_dump(engine
, &p
,
1957 "%s\n", engine
->name
);
1959 i915_request_put(rq
);
1962 i915_request_put(rq
);
1965 if (igt_live_test_end(&t
)) {
1973 preempt_client_fini(&lo
);
1975 preempt_client_fini(&hi
);
1979 igt_spinner_end(&hi
.spin
);
1980 igt_spinner_end(&lo
.spin
);
1981 intel_gt_set_wedged(gt
);
1986 static int create_gang(struct intel_engine_cs
*engine
,
1987 struct i915_request
**prev
)
1989 struct drm_i915_gem_object
*obj
;
1990 struct intel_context
*ce
;
1991 struct i915_request
*rq
;
1992 struct i915_vma
*vma
;
1996 ce
= intel_context_create(engine
);
2000 obj
= i915_gem_object_create_internal(engine
->i915
, 4096);
2006 vma
= i915_vma_instance(obj
, ce
->vm
, NULL
);
2012 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
2016 cs
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
2020 /* Semaphore target: spin until zero */
2021 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
2023 *cs
++ = MI_SEMAPHORE_WAIT
|
2025 MI_SEMAPHORE_SAD_EQ_SDD
;
2027 *cs
++ = lower_32_bits(vma
->node
.start
);
2028 *cs
++ = upper_32_bits(vma
->node
.start
);
2031 u64 offset
= (*prev
)->batch
->node
.start
;
2033 /* Terminate the spinner in the next lower priority batch. */
2034 *cs
++ = MI_STORE_DWORD_IMM_GEN4
;
2035 *cs
++ = lower_32_bits(offset
);
2036 *cs
++ = upper_32_bits(offset
);
2040 *cs
++ = MI_BATCH_BUFFER_END
;
2041 i915_gem_object_flush_map(obj
);
2042 i915_gem_object_unpin_map(obj
);
2044 rq
= intel_context_create_request(ce
);
2049 i915_request_get(rq
);
2052 err
= i915_request_await_object(rq
, vma
->obj
, false);
2054 err
= i915_vma_move_to_active(vma
, rq
, 0);
2056 err
= rq
->engine
->emit_bb_start(rq
,
2059 i915_vma_unlock(vma
);
2060 i915_request_add(rq
);
2064 i915_gem_object_put(obj
);
2065 intel_context_put(ce
);
2067 rq
->client_link
.next
= &(*prev
)->client_link
;
2072 i915_request_put(rq
);
2074 i915_gem_object_put(obj
);
2076 intel_context_put(ce
);
2080 static int live_preempt_gang(void *arg
)
2082 struct intel_gt
*gt
= arg
;
2083 struct intel_engine_cs
*engine
;
2084 enum intel_engine_id id
;
2086 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2090 * Build as long a chain of preempters as we can, with each
2091 * request higher priority than the last. Once we are ready, we release
2092 * the last batch which then precolates down the chain, each releasing
2093 * the next oldest in turn. The intent is to simply push as hard as we
2094 * can with the number of preemptions, trying to exceed narrow HW
2095 * limits. At a minimum, we insist that we can sort all the user
2096 * high priority levels into execution order.
2099 for_each_engine(engine
, gt
, id
) {
2100 struct i915_request
*rq
= NULL
;
2101 struct igt_live_test t
;
2102 IGT_TIMEOUT(end_time
);
2107 if (!intel_engine_has_preemption(engine
))
2110 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
))
2114 struct i915_sched_attr attr
= {
2115 .priority
= I915_USER_PRIORITY(prio
++),
2118 err
= create_gang(engine
, &rq
);
2122 /* Submit each spinner at increasing priority */
2123 engine
->schedule(rq
, &attr
);
2125 if (prio
<= I915_PRIORITY_MAX
)
2128 if (prio
> (INT_MAX
>> I915_USER_PRIORITY_SHIFT
))
2131 if (__igt_timeout(end_time
, NULL
))
2134 pr_debug("%s: Preempt chain of %d requests\n",
2135 engine
->name
, prio
);
2138 * Such that the last spinner is the highest priority and
2139 * should execute first. When that spinner completes,
2140 * it will terminate the next lowest spinner until there
2141 * are no more spinners and the gang is complete.
2143 cs
= i915_gem_object_pin_map(rq
->batch
->obj
, I915_MAP_WC
);
2146 i915_gem_object_unpin_map(rq
->batch
->obj
);
2149 intel_gt_set_wedged(gt
);
2152 while (rq
) { /* wait for each rq from highest to lowest prio */
2153 struct i915_request
*n
=
2154 list_next_entry(rq
, client_link
);
2156 if (err
== 0 && i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2157 struct drm_printer p
=
2158 drm_info_printer(engine
->i915
->drm
.dev
);
2160 pr_err("Failed to flush chain of %d requests, at %d\n",
2161 prio
, rq_prio(rq
) >> I915_USER_PRIORITY_SHIFT
);
2162 intel_engine_dump(engine
, &p
,
2163 "%s\n", engine
->name
);
2168 i915_request_put(rq
);
2172 if (igt_live_test_end(&t
))
2181 static int live_preempt_hang(void *arg
)
2183 struct intel_gt
*gt
= arg
;
2184 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
2185 struct igt_spinner spin_hi
, spin_lo
;
2186 struct intel_engine_cs
*engine
;
2187 enum intel_engine_id id
;
2190 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2193 if (!intel_has_reset_engine(gt
))
2196 if (igt_spinner_init(&spin_hi
, gt
))
2199 if (igt_spinner_init(&spin_lo
, gt
))
2202 ctx_hi
= kernel_context(gt
->i915
);
2205 ctx_hi
->sched
.priority
=
2206 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
2208 ctx_lo
= kernel_context(gt
->i915
);
2211 ctx_lo
->sched
.priority
=
2212 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
2214 for_each_engine(engine
, gt
, id
) {
2215 struct i915_request
*rq
;
2217 if (!intel_engine_has_preemption(engine
))
2220 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
2227 i915_request_add(rq
);
2228 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
2229 GEM_TRACE("lo spinner failed to start\n");
2231 intel_gt_set_wedged(gt
);
2236 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
2239 igt_spinner_end(&spin_lo
);
2244 init_completion(&engine
->execlists
.preempt_hang
.completion
);
2245 engine
->execlists
.preempt_hang
.inject_hang
= true;
2247 i915_request_add(rq
);
2249 if (!wait_for_completion_timeout(&engine
->execlists
.preempt_hang
.completion
,
2251 pr_err("Preemption did not occur within timeout!");
2253 intel_gt_set_wedged(gt
);
2258 set_bit(I915_RESET_ENGINE
+ id
, >
->reset
.flags
);
2259 intel_engine_reset(engine
, NULL
);
2260 clear_bit(I915_RESET_ENGINE
+ id
, >
->reset
.flags
);
2262 engine
->execlists
.preempt_hang
.inject_hang
= false;
2264 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
2265 GEM_TRACE("hi spinner failed to start\n");
2267 intel_gt_set_wedged(gt
);
2272 igt_spinner_end(&spin_hi
);
2273 igt_spinner_end(&spin_lo
);
2274 if (igt_flush_test(gt
->i915
)) {
2282 kernel_context_close(ctx_lo
);
2284 kernel_context_close(ctx_hi
);
2286 igt_spinner_fini(&spin_lo
);
2288 igt_spinner_fini(&spin_hi
);
2292 static int live_preempt_timeout(void *arg
)
2294 struct intel_gt
*gt
= arg
;
2295 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
2296 struct igt_spinner spin_lo
;
2297 struct intel_engine_cs
*engine
;
2298 enum intel_engine_id id
;
2302 * Check that we force preemption to occur by cancelling the previous
2303 * context if it refuses to yield the GPU.
2305 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
2308 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2311 if (!intel_has_reset_engine(gt
))
2314 if (igt_spinner_init(&spin_lo
, gt
))
2317 ctx_hi
= kernel_context(gt
->i915
);
2320 ctx_hi
->sched
.priority
=
2321 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
2323 ctx_lo
= kernel_context(gt
->i915
);
2326 ctx_lo
->sched
.priority
=
2327 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
2329 for_each_engine(engine
, gt
, id
) {
2330 unsigned long saved_timeout
;
2331 struct i915_request
*rq
;
2333 if (!intel_engine_has_preemption(engine
))
2336 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
2337 MI_NOOP
); /* preemption disabled */
2343 i915_request_add(rq
);
2344 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
2345 intel_gt_set_wedged(gt
);
2350 rq
= igt_request_alloc(ctx_hi
, engine
);
2352 igt_spinner_end(&spin_lo
);
2357 /* Flush the previous CS ack before changing timeouts */
2358 while (READ_ONCE(engine
->execlists
.pending
[0]))
2361 saved_timeout
= engine
->props
.preempt_timeout_ms
;
2362 engine
->props
.preempt_timeout_ms
= 1; /* in ms, -> 1 jiffie */
2364 i915_request_get(rq
);
2365 i915_request_add(rq
);
2367 intel_engine_flush_submission(engine
);
2368 engine
->props
.preempt_timeout_ms
= saved_timeout
;
2370 if (i915_request_wait(rq
, 0, HZ
/ 10) < 0) {
2371 intel_gt_set_wedged(gt
);
2372 i915_request_put(rq
);
2377 igt_spinner_end(&spin_lo
);
2378 i915_request_put(rq
);
2383 kernel_context_close(ctx_lo
);
2385 kernel_context_close(ctx_hi
);
2387 igt_spinner_fini(&spin_lo
);
2391 static int random_range(struct rnd_state
*rnd
, int min
, int max
)
2393 return i915_prandom_u32_max_state(max
- min
, rnd
) + min
;
2396 static int random_priority(struct rnd_state
*rnd
)
2398 return random_range(rnd
, I915_PRIORITY_MIN
, I915_PRIORITY_MAX
);
2401 struct preempt_smoke
{
2402 struct intel_gt
*gt
;
2403 struct i915_gem_context
**contexts
;
2404 struct intel_engine_cs
*engine
;
2405 struct drm_i915_gem_object
*batch
;
2406 unsigned int ncontext
;
2407 struct rnd_state prng
;
2408 unsigned long count
;
2411 static struct i915_gem_context
*smoke_context(struct preempt_smoke
*smoke
)
2413 return smoke
->contexts
[i915_prandom_u32_max_state(smoke
->ncontext
,
2417 static int smoke_submit(struct preempt_smoke
*smoke
,
2418 struct i915_gem_context
*ctx
, int prio
,
2419 struct drm_i915_gem_object
*batch
)
2421 struct i915_request
*rq
;
2422 struct i915_vma
*vma
= NULL
;
2426 struct i915_address_space
*vm
;
2428 vm
= i915_gem_context_get_vm_rcu(ctx
);
2429 vma
= i915_vma_instance(batch
, vm
, NULL
);
2432 return PTR_ERR(vma
);
2434 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
2439 ctx
->sched
.priority
= prio
;
2441 rq
= igt_request_alloc(ctx
, smoke
->engine
);
2449 err
= i915_request_await_object(rq
, vma
->obj
, false);
2451 err
= i915_vma_move_to_active(vma
, rq
, 0);
2453 err
= rq
->engine
->emit_bb_start(rq
,
2456 i915_vma_unlock(vma
);
2459 i915_request_add(rq
);
2463 i915_vma_unpin(vma
);
2468 static int smoke_crescendo_thread(void *arg
)
2470 struct preempt_smoke
*smoke
= arg
;
2471 IGT_TIMEOUT(end_time
);
2472 unsigned long count
;
2476 struct i915_gem_context
*ctx
= smoke_context(smoke
);
2479 err
= smoke_submit(smoke
,
2480 ctx
, count
% I915_PRIORITY_MAX
,
2486 } while (!__igt_timeout(end_time
, NULL
));
2488 smoke
->count
= count
;
2492 static int smoke_crescendo(struct preempt_smoke
*smoke
, unsigned int flags
)
2493 #define BATCH BIT(0)
2495 struct task_struct
*tsk
[I915_NUM_ENGINES
] = {};
2496 struct preempt_smoke arg
[I915_NUM_ENGINES
];
2497 struct intel_engine_cs
*engine
;
2498 enum intel_engine_id id
;
2499 unsigned long count
;
2502 for_each_engine(engine
, smoke
->gt
, id
) {
2504 arg
[id
].engine
= engine
;
2505 if (!(flags
& BATCH
))
2506 arg
[id
].batch
= NULL
;
2509 tsk
[id
] = kthread_run(smoke_crescendo_thread
, &arg
,
2510 "igt/smoke:%d", id
);
2511 if (IS_ERR(tsk
[id
])) {
2512 err
= PTR_ERR(tsk
[id
]);
2515 get_task_struct(tsk
[id
]);
2518 yield(); /* start all threads before we kthread_stop() */
2521 for_each_engine(engine
, smoke
->gt
, id
) {
2524 if (IS_ERR_OR_NULL(tsk
[id
]))
2527 status
= kthread_stop(tsk
[id
]);
2531 count
+= arg
[id
].count
;
2533 put_task_struct(tsk
[id
]);
2536 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2538 RUNTIME_INFO(smoke
->gt
->i915
)->num_engines
, smoke
->ncontext
);
2542 static int smoke_random(struct preempt_smoke
*smoke
, unsigned int flags
)
2544 enum intel_engine_id id
;
2545 IGT_TIMEOUT(end_time
);
2546 unsigned long count
;
2550 for_each_engine(smoke
->engine
, smoke
->gt
, id
) {
2551 struct i915_gem_context
*ctx
= smoke_context(smoke
);
2554 err
= smoke_submit(smoke
,
2555 ctx
, random_priority(&smoke
->prng
),
2556 flags
& BATCH
? smoke
->batch
: NULL
);
2562 } while (!__igt_timeout(end_time
, NULL
));
2564 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2566 RUNTIME_INFO(smoke
->gt
->i915
)->num_engines
, smoke
->ncontext
);
2570 static int live_preempt_smoke(void *arg
)
2572 struct preempt_smoke smoke
= {
2574 .prng
= I915_RND_STATE_INITIALIZER(i915_selftest
.random_seed
),
2577 const unsigned int phase
[] = { 0, BATCH
};
2578 struct igt_live_test t
;
2583 if (!HAS_LOGICAL_RING_PREEMPTION(smoke
.gt
->i915
))
2586 smoke
.contexts
= kmalloc_array(smoke
.ncontext
,
2587 sizeof(*smoke
.contexts
),
2589 if (!smoke
.contexts
)
2593 i915_gem_object_create_internal(smoke
.gt
->i915
, PAGE_SIZE
);
2594 if (IS_ERR(smoke
.batch
)) {
2595 err
= PTR_ERR(smoke
.batch
);
2599 cs
= i915_gem_object_pin_map(smoke
.batch
, I915_MAP_WB
);
2604 for (n
= 0; n
< PAGE_SIZE
/ sizeof(*cs
) - 1; n
++)
2605 cs
[n
] = MI_ARB_CHECK
;
2606 cs
[n
] = MI_BATCH_BUFFER_END
;
2607 i915_gem_object_flush_map(smoke
.batch
);
2608 i915_gem_object_unpin_map(smoke
.batch
);
2610 if (igt_live_test_begin(&t
, smoke
.gt
->i915
, __func__
, "all")) {
2615 for (n
= 0; n
< smoke
.ncontext
; n
++) {
2616 smoke
.contexts
[n
] = kernel_context(smoke
.gt
->i915
);
2617 if (!smoke
.contexts
[n
])
2621 for (n
= 0; n
< ARRAY_SIZE(phase
); n
++) {
2622 err
= smoke_crescendo(&smoke
, phase
[n
]);
2626 err
= smoke_random(&smoke
, phase
[n
]);
2632 if (igt_live_test_end(&t
))
2635 for (n
= 0; n
< smoke
.ncontext
; n
++) {
2636 if (!smoke
.contexts
[n
])
2638 kernel_context_close(smoke
.contexts
[n
]);
2642 i915_gem_object_put(smoke
.batch
);
2644 kfree(smoke
.contexts
);
2649 static int nop_virtual_engine(struct intel_gt
*gt
,
2650 struct intel_engine_cs
**siblings
,
2651 unsigned int nsibling
,
2654 #define CHAIN BIT(0)
2656 IGT_TIMEOUT(end_time
);
2657 struct i915_request
*request
[16] = {};
2658 struct intel_context
*ve
[16];
2659 unsigned long n
, prime
, nc
;
2660 struct igt_live_test t
;
2661 ktime_t times
[2] = {};
2664 GEM_BUG_ON(!nctx
|| nctx
> ARRAY_SIZE(ve
));
2666 for (n
= 0; n
< nctx
; n
++) {
2667 ve
[n
] = intel_execlists_create_virtual(siblings
, nsibling
);
2668 if (IS_ERR(ve
[n
])) {
2669 err
= PTR_ERR(ve
[n
]);
2674 err
= intel_context_pin(ve
[n
]);
2676 intel_context_put(ve
[n
]);
2682 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
[0]->engine
->name
);
2686 for_each_prime_number_from(prime
, 1, 8192) {
2687 times
[1] = ktime_get_raw();
2689 if (flags
& CHAIN
) {
2690 for (nc
= 0; nc
< nctx
; nc
++) {
2691 for (n
= 0; n
< prime
; n
++) {
2692 struct i915_request
*rq
;
2694 rq
= i915_request_create(ve
[nc
]);
2701 i915_request_put(request
[nc
]);
2702 request
[nc
] = i915_request_get(rq
);
2703 i915_request_add(rq
);
2707 for (n
= 0; n
< prime
; n
++) {
2708 for (nc
= 0; nc
< nctx
; nc
++) {
2709 struct i915_request
*rq
;
2711 rq
= i915_request_create(ve
[nc
]);
2718 i915_request_put(request
[nc
]);
2719 request
[nc
] = i915_request_get(rq
);
2720 i915_request_add(rq
);
2725 for (nc
= 0; nc
< nctx
; nc
++) {
2726 if (i915_request_wait(request
[nc
], 0, HZ
/ 10) < 0) {
2727 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2728 __func__
, ve
[0]->engine
->name
,
2729 request
[nc
]->fence
.context
,
2730 request
[nc
]->fence
.seqno
);
2732 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2733 __func__
, ve
[0]->engine
->name
,
2734 request
[nc
]->fence
.context
,
2735 request
[nc
]->fence
.seqno
);
2737 intel_gt_set_wedged(gt
);
2742 times
[1] = ktime_sub(ktime_get_raw(), times
[1]);
2744 times
[0] = times
[1];
2746 for (nc
= 0; nc
< nctx
; nc
++) {
2747 i915_request_put(request
[nc
]);
2751 if (__igt_timeout(end_time
, NULL
))
2755 err
= igt_live_test_end(&t
);
2759 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2760 nctx
, ve
[0]->engine
->name
, ktime_to_ns(times
[0]),
2761 prime
, div64_u64(ktime_to_ns(times
[1]), prime
));
2764 if (igt_flush_test(gt
->i915
))
2767 for (nc
= 0; nc
< nctx
; nc
++) {
2768 i915_request_put(request
[nc
]);
2769 intel_context_unpin(ve
[nc
]);
2770 intel_context_put(ve
[nc
]);
2775 static int live_virtual_engine(void *arg
)
2777 struct intel_gt
*gt
= arg
;
2778 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
2779 struct intel_engine_cs
*engine
;
2780 enum intel_engine_id id
;
2781 unsigned int class, inst
;
2784 if (USES_GUC_SUBMISSION(gt
->i915
))
2787 for_each_engine(engine
, gt
, id
) {
2788 err
= nop_virtual_engine(gt
, &engine
, 1, 1, 0);
2790 pr_err("Failed to wrap engine %s: err=%d\n",
2796 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
2800 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
2801 if (!gt
->engine_class
[class][inst
])
2804 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
2809 for (n
= 1; n
<= nsibling
+ 1; n
++) {
2810 err
= nop_virtual_engine(gt
, siblings
, nsibling
,
2816 err
= nop_virtual_engine(gt
, siblings
, nsibling
, n
, CHAIN
);
2824 static int mask_virtual_engine(struct intel_gt
*gt
,
2825 struct intel_engine_cs
**siblings
,
2826 unsigned int nsibling
)
2828 struct i915_request
*request
[MAX_ENGINE_INSTANCE
+ 1];
2829 struct intel_context
*ve
;
2830 struct igt_live_test t
;
2835 * Check that by setting the execution mask on a request, we can
2836 * restrict it to our desired engine within the virtual engine.
2839 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
2845 err
= intel_context_pin(ve
);
2849 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
2853 for (n
= 0; n
< nsibling
; n
++) {
2854 request
[n
] = i915_request_create(ve
);
2855 if (IS_ERR(request
[n
])) {
2856 err
= PTR_ERR(request
[n
]);
2861 /* Reverse order as it's more likely to be unnatural */
2862 request
[n
]->execution_mask
= siblings
[nsibling
- n
- 1]->mask
;
2864 i915_request_get(request
[n
]);
2865 i915_request_add(request
[n
]);
2868 for (n
= 0; n
< nsibling
; n
++) {
2869 if (i915_request_wait(request
[n
], 0, HZ
/ 10) < 0) {
2870 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2871 __func__
, ve
->engine
->name
,
2872 request
[n
]->fence
.context
,
2873 request
[n
]->fence
.seqno
);
2875 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2876 __func__
, ve
->engine
->name
,
2877 request
[n
]->fence
.context
,
2878 request
[n
]->fence
.seqno
);
2880 intel_gt_set_wedged(gt
);
2885 if (request
[n
]->engine
!= siblings
[nsibling
- n
- 1]) {
2886 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2887 request
[n
]->engine
->name
,
2888 siblings
[nsibling
- n
- 1]->name
);
2894 err
= igt_live_test_end(&t
);
2896 if (igt_flush_test(gt
->i915
))
2899 for (n
= 0; n
< nsibling
; n
++)
2900 i915_request_put(request
[n
]);
2903 intel_context_unpin(ve
);
2905 intel_context_put(ve
);
2910 static int live_virtual_mask(void *arg
)
2912 struct intel_gt
*gt
= arg
;
2913 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
2914 unsigned int class, inst
;
2917 if (USES_GUC_SUBMISSION(gt
->i915
))
2920 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
2921 unsigned int nsibling
;
2924 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
2925 if (!gt
->engine_class
[class][inst
])
2928 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
2933 err
= mask_virtual_engine(gt
, siblings
, nsibling
);
2941 static int preserved_virtual_engine(struct intel_gt
*gt
,
2942 struct intel_engine_cs
**siblings
,
2943 unsigned int nsibling
)
2945 struct i915_request
*last
= NULL
;
2946 struct intel_context
*ve
;
2947 struct i915_vma
*scratch
;
2948 struct igt_live_test t
;
2953 scratch
= create_scratch(siblings
[0]->gt
);
2954 if (IS_ERR(scratch
))
2955 return PTR_ERR(scratch
);
2957 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
2963 err
= intel_context_pin(ve
);
2967 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
2971 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
2972 struct intel_engine_cs
*engine
= siblings
[n
% nsibling
];
2973 struct i915_request
*rq
;
2975 rq
= i915_request_create(ve
);
2981 i915_request_put(last
);
2982 last
= i915_request_get(rq
);
2984 cs
= intel_ring_begin(rq
, 8);
2986 i915_request_add(rq
);
2991 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
2992 *cs
++ = CS_GPR(engine
, n
);
2993 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
2996 *cs
++ = MI_LOAD_REGISTER_IMM(1);
2997 *cs
++ = CS_GPR(engine
, (n
+ 1) % NUM_GPR_DW
);
3001 intel_ring_advance(rq
, cs
);
3003 /* Restrict this request to run on a particular engine */
3004 rq
->execution_mask
= engine
->mask
;
3005 i915_request_add(rq
);
3008 if (i915_request_wait(last
, 0, HZ
/ 5) < 0) {
3013 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
3019 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3021 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3028 i915_gem_object_unpin_map(scratch
->obj
);
3031 if (igt_live_test_end(&t
))
3033 i915_request_put(last
);
3035 intel_context_unpin(ve
);
3037 intel_context_put(ve
);
3039 i915_vma_unpin_and_release(&scratch
, 0);
3043 static int live_virtual_preserved(void *arg
)
3045 struct intel_gt
*gt
= arg
;
3046 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3047 unsigned int class, inst
;
3050 * Check that the context image retains non-privileged (user) registers
3051 * from one engine to the next. For this we check that the CS_GPR
3055 if (USES_GUC_SUBMISSION(gt
->i915
))
3058 /* As we use CS_GPR we cannot run before they existed on all engines. */
3059 if (INTEL_GEN(gt
->i915
) < 9)
3062 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3066 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3067 if (!gt
->engine_class
[class][inst
])
3070 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3075 err
= preserved_virtual_engine(gt
, siblings
, nsibling
);
3083 static int bond_virtual_engine(struct intel_gt
*gt
,
3085 struct intel_engine_cs
**siblings
,
3086 unsigned int nsibling
,
3088 #define BOND_SCHEDULE BIT(0)
3090 struct intel_engine_cs
*master
;
3091 struct i915_request
*rq
[16];
3092 enum intel_engine_id id
;
3093 struct igt_spinner spin
;
3098 * A set of bonded requests is intended to be run concurrently
3099 * across a number of engines. We use one request per-engine
3100 * and a magic fence to schedule each of the bonded requests
3101 * at the same time. A consequence of our current scheduler is that
3102 * we only move requests to the HW ready queue when the request
3103 * becomes ready, that is when all of its prerequisite fences have
3104 * been signaled. As one of those fences is the master submit fence,
3105 * there is a delay on all secondary fences as the HW may be
3106 * currently busy. Equally, as all the requests are independent,
3107 * they may have other fences that delay individual request
3108 * submission to HW. Ergo, we do not guarantee that all requests are
3109 * immediately submitted to HW at the same time, just that if the
3110 * rules are abided by, they are ready at the same time as the
3111 * first is submitted. Userspace can embed semaphores in its batch
3112 * to ensure parallel execution of its phases as it requires.
3113 * Though naturally it gets requested that perhaps the scheduler should
3114 * take care of parallel execution, even across preemption events on
3115 * different HW. (The proper answer is of course "lalalala".)
3117 * With the submit-fence, we have identified three possible phases
3118 * of synchronisation depending on the master fence: queued (not
3119 * ready), executing, and signaled. The first two are quite simple
3120 * and checked below. However, the signaled master fence handling is
3121 * contentious. Currently we do not distinguish between a signaled
3122 * fence and an expired fence, as once signaled it does not convey
3123 * any information about the previous execution. It may even be freed
3124 * and hence checking later it may not exist at all. Ergo we currently
3125 * do not apply the bonding constraint for an already signaled fence,
3126 * as our expectation is that it should not constrain the secondaries
3127 * and is outside of the scope of the bonded request API (i.e. all
3128 * userspace requests are meant to be running in parallel). As
3129 * it imposes no constraint, and is effectively a no-op, we do not
3130 * check below as normal execution flows are checked extensively above.
3132 * XXX Is the degenerate handling of signaled submit fences the
3133 * expected behaviour for userpace?
3136 GEM_BUG_ON(nsibling
>= ARRAY_SIZE(rq
) - 1);
3138 if (igt_spinner_init(&spin
, gt
))
3142 rq
[0] = ERR_PTR(-ENOMEM
);
3143 for_each_engine(master
, gt
, id
) {
3144 struct i915_sw_fence fence
= {};
3146 if (master
->class == class)
3149 memset_p((void *)rq
, ERR_PTR(-EINVAL
), ARRAY_SIZE(rq
));
3151 rq
[0] = igt_spinner_create_request(&spin
,
3152 master
->kernel_context
,
3154 if (IS_ERR(rq
[0])) {
3155 err
= PTR_ERR(rq
[0]);
3158 i915_request_get(rq
[0]);
3160 if (flags
& BOND_SCHEDULE
) {
3161 onstack_fence_init(&fence
);
3162 err
= i915_sw_fence_await_sw_fence_gfp(&rq
[0]->submit
,
3167 i915_request_add(rq
[0]);
3171 if (!(flags
& BOND_SCHEDULE
) &&
3172 !igt_wait_for_spinner(&spin
, rq
[0])) {
3177 for (n
= 0; n
< nsibling
; n
++) {
3178 struct intel_context
*ve
;
3180 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3183 onstack_fence_fini(&fence
);
3187 err
= intel_virtual_engine_attach_bond(ve
->engine
,
3191 intel_context_put(ve
);
3192 onstack_fence_fini(&fence
);
3196 err
= intel_context_pin(ve
);
3197 intel_context_put(ve
);
3199 onstack_fence_fini(&fence
);
3203 rq
[n
+ 1] = i915_request_create(ve
);
3204 intel_context_unpin(ve
);
3205 if (IS_ERR(rq
[n
+ 1])) {
3206 err
= PTR_ERR(rq
[n
+ 1]);
3207 onstack_fence_fini(&fence
);
3210 i915_request_get(rq
[n
+ 1]);
3212 err
= i915_request_await_execution(rq
[n
+ 1],
3214 ve
->engine
->bond_execute
);
3215 i915_request_add(rq
[n
+ 1]);
3217 onstack_fence_fini(&fence
);
3221 onstack_fence_fini(&fence
);
3222 intel_engine_flush_submission(master
);
3223 igt_spinner_end(&spin
);
3225 if (i915_request_wait(rq
[0], 0, HZ
/ 10) < 0) {
3226 pr_err("Master request did not execute (on %s)!\n",
3227 rq
[0]->engine
->name
);
3232 for (n
= 0; n
< nsibling
; n
++) {
3233 if (i915_request_wait(rq
[n
+ 1], 0,
3234 MAX_SCHEDULE_TIMEOUT
) < 0) {
3239 if (rq
[n
+ 1]->engine
!= siblings
[n
]) {
3240 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3242 rq
[n
+ 1]->engine
->name
,
3243 rq
[0]->engine
->name
);
3249 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
3250 i915_request_put(rq
[n
]);
3251 rq
[0] = ERR_PTR(-ENOMEM
);
3255 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
3256 i915_request_put(rq
[n
]);
3257 if (igt_flush_test(gt
->i915
))
3260 igt_spinner_fini(&spin
);
3264 static int live_virtual_bond(void *arg
)
3266 static const struct phase
{
3271 { "schedule", BOND_SCHEDULE
},
3274 struct intel_gt
*gt
= arg
;
3275 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3276 unsigned int class, inst
;
3279 if (USES_GUC_SUBMISSION(gt
->i915
))
3282 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3283 const struct phase
*p
;
3287 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3288 if (!gt
->engine_class
[class][inst
])
3291 GEM_BUG_ON(nsibling
== ARRAY_SIZE(siblings
));
3292 siblings
[nsibling
++] = gt
->engine_class
[class][inst
];
3297 for (p
= phases
; p
->name
; p
++) {
3298 err
= bond_virtual_engine(gt
,
3299 class, siblings
, nsibling
,
3302 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3303 __func__
, p
->name
, class, nsibling
, err
);
3312 int intel_execlists_live_selftests(struct drm_i915_private
*i915
)
3314 static const struct i915_subtest tests
[] = {
3315 SUBTEST(live_sanitycheck
),
3316 SUBTEST(live_unlite_switch
),
3317 SUBTEST(live_unlite_preempt
),
3318 SUBTEST(live_timeslice_preempt
),
3319 SUBTEST(live_timeslice_queue
),
3320 SUBTEST(live_busywait_preempt
),
3321 SUBTEST(live_preempt
),
3322 SUBTEST(live_late_preempt
),
3323 SUBTEST(live_nopreempt
),
3324 SUBTEST(live_preempt_cancel
),
3325 SUBTEST(live_suppress_self_preempt
),
3326 SUBTEST(live_suppress_wait_preempt
),
3327 SUBTEST(live_chain_preempt
),
3328 SUBTEST(live_preempt_gang
),
3329 SUBTEST(live_preempt_hang
),
3330 SUBTEST(live_preempt_timeout
),
3331 SUBTEST(live_preempt_smoke
),
3332 SUBTEST(live_virtual_engine
),
3333 SUBTEST(live_virtual_mask
),
3334 SUBTEST(live_virtual_preserved
),
3335 SUBTEST(live_virtual_bond
),
3338 if (!HAS_EXECLISTS(i915
))
3341 if (intel_gt_is_wedged(&i915
->gt
))
3344 return intel_gt_live_subtests(tests
, &i915
->gt
);
3347 static void hexdump(const void *buf
, size_t len
)
3349 const size_t rowsize
= 8 * sizeof(u32
);
3350 const void *prev
= NULL
;
3354 for (pos
= 0; pos
< len
; pos
+= rowsize
) {
3357 if (prev
&& !memcmp(prev
, buf
+ pos
, rowsize
)) {
3365 WARN_ON_ONCE(hex_dump_to_buffer(buf
+ pos
, len
- pos
,
3366 rowsize
, sizeof(u32
),
3368 false) >= sizeof(line
));
3369 pr_info("[%04zx] %s\n", pos
, line
);
3376 static int live_lrc_layout(void *arg
)
3378 struct intel_gt
*gt
= arg
;
3379 struct intel_engine_cs
*engine
;
3380 enum intel_engine_id id
;
3385 * Check the registers offsets we use to create the initial reg state
3386 * match the layout saved by HW.
3389 lrc
= kmalloc(PAGE_SIZE
, GFP_KERNEL
);
3394 for_each_engine(engine
, gt
, id
) {
3398 if (!engine
->default_state
)
3401 hw
= i915_gem_object_pin_map(engine
->default_state
,
3407 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
3409 execlists_init_reg_state(memset(lrc
, POISON_INUSE
, PAGE_SIZE
),
3410 engine
->kernel_context
,
3412 engine
->kernel_context
->ring
,
3425 pr_debug("%s: skipped instruction %x at dword %d\n",
3426 engine
->name
, lri
, dw
);
3431 if ((lri
& GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3432 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3433 engine
->name
, dw
, lri
);
3438 if (lrc
[dw
] != lri
) {
3439 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3440 engine
->name
, dw
, lri
, lrc
[dw
]);
3450 if (hw
[dw
] != lrc
[dw
]) {
3451 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3452 engine
->name
, dw
, hw
[dw
], lrc
[dw
]);
3458 * Skip over the actual register value as we
3459 * expect that to differ.
3464 } while ((lrc
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
3467 pr_info("%s: HW register image:\n", engine
->name
);
3468 hexdump(hw
, PAGE_SIZE
);
3470 pr_info("%s: SW register image:\n", engine
->name
);
3471 hexdump(lrc
, PAGE_SIZE
);
3474 i915_gem_object_unpin_map(engine
->default_state
);
3483 static int find_offset(const u32
*lri
, u32 offset
)
3487 for (i
= 0; i
< PAGE_SIZE
/ sizeof(u32
); i
++)
3488 if (lri
[i
] == offset
)
3494 static int live_lrc_fixed(void *arg
)
3496 struct intel_gt
*gt
= arg
;
3497 struct intel_engine_cs
*engine
;
3498 enum intel_engine_id id
;
3502 * Check the assumed register offsets match the actual locations in
3503 * the context image.
3506 for_each_engine(engine
, gt
, id
) {
3513 i915_mmio_reg_offset(RING_START(engine
->mmio_base
)),
3518 i915_mmio_reg_offset(RING_CTL(engine
->mmio_base
)),
3523 i915_mmio_reg_offset(RING_HEAD(engine
->mmio_base
)),
3528 i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
)),
3533 i915_mmio_reg_offset(RING_MI_MODE(engine
->mmio_base
)),
3534 lrc_ring_mi_mode(engine
),
3538 i915_mmio_reg_offset(RING_BBSTATE(engine
->mmio_base
)),
3546 if (!engine
->default_state
)
3549 hw
= i915_gem_object_pin_map(engine
->default_state
,
3555 hw
+= LRC_STATE_PN
* PAGE_SIZE
/ sizeof(*hw
);
3557 for (t
= tbl
; t
->name
; t
++) {
3558 int dw
= find_offset(hw
, t
->reg
);
3560 if (dw
!= t
->offset
) {
3561 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3571 i915_gem_object_unpin_map(engine
->default_state
);
3577 static int __live_lrc_state(struct intel_engine_cs
*engine
,
3578 struct i915_vma
*scratch
)
3580 struct intel_context
*ce
;
3581 struct i915_request
*rq
;
3587 u32 expected
[MAX_IDX
];
3592 ce
= intel_context_create(engine
);
3596 err
= intel_context_pin(ce
);
3600 rq
= i915_request_create(ce
);
3606 cs
= intel_ring_begin(rq
, 4 * MAX_IDX
);
3609 i915_request_add(rq
);
3613 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
3614 *cs
++ = i915_mmio_reg_offset(RING_START(engine
->mmio_base
));
3615 *cs
++ = i915_ggtt_offset(scratch
) + RING_START_IDX
* sizeof(u32
);
3618 expected
[RING_START_IDX
] = i915_ggtt_offset(ce
->ring
->vma
);
3620 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
3621 *cs
++ = i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
));
3622 *cs
++ = i915_ggtt_offset(scratch
) + RING_TAIL_IDX
* sizeof(u32
);
3625 i915_request_get(rq
);
3626 i915_request_add(rq
);
3628 intel_engine_flush_submission(engine
);
3629 expected
[RING_TAIL_IDX
] = ce
->ring
->tail
;
3631 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
3636 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
3642 for (n
= 0; n
< MAX_IDX
; n
++) {
3643 if (cs
[n
] != expected
[n
]) {
3644 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3645 engine
->name
, n
, cs
[n
], expected
[n
]);
3651 i915_gem_object_unpin_map(scratch
->obj
);
3654 i915_request_put(rq
);
3656 intel_context_unpin(ce
);
3658 intel_context_put(ce
);
3662 static int live_lrc_state(void *arg
)
3664 struct intel_gt
*gt
= arg
;
3665 struct intel_engine_cs
*engine
;
3666 struct i915_vma
*scratch
;
3667 enum intel_engine_id id
;
3671 * Check the live register state matches what we expect for this
3675 scratch
= create_scratch(gt
);
3676 if (IS_ERR(scratch
))
3677 return PTR_ERR(scratch
);
3679 for_each_engine(engine
, gt
, id
) {
3680 err
= __live_lrc_state(engine
, scratch
);
3685 if (igt_flush_test(gt
->i915
))
3688 i915_vma_unpin_and_release(&scratch
, 0);
3692 static int gpr_make_dirty(struct intel_engine_cs
*engine
)
3694 struct i915_request
*rq
;
3698 rq
= intel_engine_create_kernel_request(engine
);
3702 cs
= intel_ring_begin(rq
, 2 * NUM_GPR_DW
+ 2);
3704 i915_request_add(rq
);
3708 *cs
++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW
);
3709 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3710 *cs
++ = CS_GPR(engine
, n
);
3711 *cs
++ = STACK_MAGIC
;
3715 intel_ring_advance(rq
, cs
);
3716 i915_request_add(rq
);
3721 static int __live_gpr_clear(struct intel_engine_cs
*engine
,
3722 struct i915_vma
*scratch
)
3724 struct intel_context
*ce
;
3725 struct i915_request
*rq
;
3730 if (INTEL_GEN(engine
->i915
) < 9 && engine
->class != RENDER_CLASS
)
3731 return 0; /* GPR only on rcs0 for gen8 */
3733 err
= gpr_make_dirty(engine
);
3737 ce
= intel_context_create(engine
);
3741 rq
= intel_context_create_request(ce
);
3747 cs
= intel_ring_begin(rq
, 4 * NUM_GPR_DW
);
3750 i915_request_add(rq
);
3754 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3755 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
3756 *cs
++ = CS_GPR(engine
, n
);
3757 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
3761 i915_request_get(rq
);
3762 i915_request_add(rq
);
3764 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
3769 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
3775 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
3777 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
3779 n
/ 2, n
& 1 ? "udw" : "ldw",
3786 i915_gem_object_unpin_map(scratch
->obj
);
3789 i915_request_put(rq
);
3791 intel_context_put(ce
);
3795 static int live_gpr_clear(void *arg
)
3797 struct intel_gt
*gt
= arg
;
3798 struct intel_engine_cs
*engine
;
3799 struct i915_vma
*scratch
;
3800 enum intel_engine_id id
;
3804 * Check that GPR registers are cleared in new contexts as we need
3805 * to avoid leaking any information from previous contexts.
3808 scratch
= create_scratch(gt
);
3809 if (IS_ERR(scratch
))
3810 return PTR_ERR(scratch
);
3812 for_each_engine(engine
, gt
, id
) {
3813 err
= __live_gpr_clear(engine
, scratch
);
3818 if (igt_flush_test(gt
->i915
))
3821 i915_vma_unpin_and_release(&scratch
, 0);
3825 int intel_lrc_live_selftests(struct drm_i915_private
*i915
)
3827 static const struct i915_subtest tests
[] = {
3828 SUBTEST(live_lrc_layout
),
3829 SUBTEST(live_lrc_fixed
),
3830 SUBTEST(live_lrc_state
),
3831 SUBTEST(live_gpr_clear
),
3834 if (!HAS_LOGICAL_RING_CONTEXTS(i915
))
3837 return intel_gt_live_subtests(tests
, &i915
->gt
);