2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
28 static struct i915_vma
*create_scratch(struct intel_gt
*gt
)
30 struct drm_i915_gem_object
*obj
;
34 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
38 i915_gem_object_set_cache_coherency(obj
, I915_CACHING_CACHED
);
40 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
42 i915_gem_object_put(obj
);
46 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
48 i915_gem_object_put(obj
);
55 static bool is_active(struct i915_request
*rq
)
57 if (i915_request_is_active(rq
))
60 if (i915_request_on_hold(rq
))
63 if (i915_request_has_initial_breadcrumb(rq
) && i915_request_started(rq
))
69 static int wait_for_submit(struct intel_engine_cs
*engine
,
70 struct i915_request
*rq
,
71 unsigned long timeout
)
75 bool done
= time_after(jiffies
, timeout
);
77 if (i915_request_completed(rq
)) /* that was quick! */
80 /* Wait until the HW has acknowleged the submission (or err) */
81 intel_engine_flush_submission(engine
);
82 if (!READ_ONCE(engine
->execlists
.pending
[0]) && is_active(rq
))
92 static int wait_for_reset(struct intel_engine_cs
*engine
,
93 struct i915_request
*rq
,
94 unsigned long timeout
)
100 intel_engine_flush_submission(engine
);
102 if (READ_ONCE(engine
->execlists
.pending
[0]))
105 if (i915_request_completed(rq
))
108 if (READ_ONCE(rq
->fence
.error
))
110 } while (time_before(jiffies
, timeout
));
112 flush_scheduled_work();
114 if (rq
->fence
.error
!= -EIO
) {
115 pr_err("%s: hanging request %llx:%lld not reset\n",
122 /* Give the request a jiffie to complete after flushing the worker */
123 if (i915_request_wait(rq
, 0,
124 max(0l, (long)(timeout
- jiffies
)) + 1) < 0) {
125 pr_err("%s: hanging request %llx:%lld did not complete\n",
135 static int live_sanitycheck(void *arg
)
137 struct intel_gt
*gt
= arg
;
138 struct intel_engine_cs
*engine
;
139 enum intel_engine_id id
;
140 struct igt_spinner spin
;
143 if (!HAS_LOGICAL_RING_CONTEXTS(gt
->i915
))
146 if (igt_spinner_init(&spin
, gt
))
149 for_each_engine(engine
, gt
, id
) {
150 struct intel_context
*ce
;
151 struct i915_request
*rq
;
153 ce
= intel_context_create(engine
);
159 rq
= igt_spinner_create_request(&spin
, ce
, MI_NOOP
);
165 i915_request_add(rq
);
166 if (!igt_wait_for_spinner(&spin
, rq
)) {
167 GEM_TRACE("spinner failed to start\n");
169 intel_gt_set_wedged(gt
);
174 igt_spinner_end(&spin
);
175 if (igt_flush_test(gt
->i915
)) {
181 intel_context_put(ce
);
186 igt_spinner_fini(&spin
);
190 static int live_unlite_restore(struct intel_gt
*gt
, int prio
)
192 struct intel_engine_cs
*engine
;
193 enum intel_engine_id id
;
194 struct igt_spinner spin
;
198 * Check that we can correctly context switch between 2 instances
199 * on the same engine from the same parent context.
202 if (igt_spinner_init(&spin
, gt
))
206 for_each_engine(engine
, gt
, id
) {
207 struct intel_context
*ce
[2] = {};
208 struct i915_request
*rq
[2];
209 struct igt_live_test t
;
212 if (prio
&& !intel_engine_has_preemption(engine
))
215 if (!intel_engine_can_store_dword(engine
))
218 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
222 st_engine_heartbeat_disable(engine
);
224 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
225 struct intel_context
*tmp
;
227 tmp
= intel_context_create(engine
);
233 err
= intel_context_pin(tmp
);
235 intel_context_put(tmp
);
240 * Setup the pair of contexts such that if we
241 * lite-restore using the RING_TAIL from ce[1] it
242 * will execute garbage from ce[0]->ring.
244 memset(tmp
->ring
->vaddr
,
245 POISON_INUSE
, /* IPEHR: 0x5a5a5a5a [hung!] */
246 tmp
->ring
->vma
->size
);
250 GEM_BUG_ON(!ce
[1]->ring
->size
);
251 intel_ring_reset(ce
[1]->ring
, ce
[1]->ring
->size
/ 2);
252 __execlists_update_reg_state(ce
[1], engine
, ce
[1]->ring
->head
);
254 rq
[0] = igt_spinner_create_request(&spin
, ce
[0], MI_ARB_CHECK
);
256 err
= PTR_ERR(rq
[0]);
260 i915_request_get(rq
[0]);
261 i915_request_add(rq
[0]);
262 GEM_BUG_ON(rq
[0]->postfix
> ce
[1]->ring
->emit
);
264 if (!igt_wait_for_spinner(&spin
, rq
[0])) {
265 i915_request_put(rq
[0]);
269 rq
[1] = i915_request_create(ce
[1]);
271 err
= PTR_ERR(rq
[1]);
272 i915_request_put(rq
[0]);
278 * Ensure we do the switch to ce[1] on completion.
280 * rq[0] is already submitted, so this should reduce
281 * to a no-op (a wait on a request on the same engine
282 * uses the submit fence, not the completion fence),
283 * but it will install a dependency on rq[1] for rq[0]
284 * that will prevent the pair being reordered by
287 i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
290 i915_request_get(rq
[1]);
291 i915_request_add(rq
[1]);
292 GEM_BUG_ON(rq
[1]->postfix
<= rq
[0]->postfix
);
293 i915_request_put(rq
[0]);
296 struct i915_sched_attr attr
= {
300 /* Alternatively preempt the spinner with ce[1] */
301 engine
->schedule(rq
[1], &attr
);
304 /* And switch back to ce[0] for good measure */
305 rq
[0] = i915_request_create(ce
[0]);
307 err
= PTR_ERR(rq
[0]);
308 i915_request_put(rq
[1]);
312 i915_request_await_dma_fence(rq
[0], &rq
[1]->fence
);
313 i915_request_get(rq
[0]);
314 i915_request_add(rq
[0]);
315 GEM_BUG_ON(rq
[0]->postfix
> rq
[1]->postfix
);
316 i915_request_put(rq
[1]);
317 i915_request_put(rq
[0]);
320 intel_engine_flush_submission(engine
);
321 igt_spinner_end(&spin
);
322 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
323 if (IS_ERR_OR_NULL(ce
[n
]))
326 intel_context_unpin(ce
[n
]);
327 intel_context_put(ce
[n
]);
330 st_engine_heartbeat_enable(engine
);
331 if (igt_live_test_end(&t
))
337 igt_spinner_fini(&spin
);
341 static int live_unlite_switch(void *arg
)
343 return live_unlite_restore(arg
, 0);
346 static int live_unlite_preempt(void *arg
)
348 return live_unlite_restore(arg
, I915_USER_PRIORITY(I915_PRIORITY_MAX
));
351 static int live_unlite_ring(void *arg
)
353 struct intel_gt
*gt
= arg
;
354 struct intel_engine_cs
*engine
;
355 struct igt_spinner spin
;
356 enum intel_engine_id id
;
360 * Setup a preemption event that will cause almost the entire ring
361 * to be unwound, potentially fooling our intel_ring_direction()
362 * into emitting a forward lite-restore instead of the rollback.
365 if (igt_spinner_init(&spin
, gt
))
368 for_each_engine(engine
, gt
, id
) {
369 struct intel_context
*ce
[2] = {};
370 struct i915_request
*rq
;
371 struct igt_live_test t
;
374 if (!intel_engine_has_preemption(engine
))
377 if (!intel_engine_can_store_dword(engine
))
380 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
384 st_engine_heartbeat_disable(engine
);
386 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
387 struct intel_context
*tmp
;
389 tmp
= intel_context_create(engine
);
395 err
= intel_context_pin(tmp
);
397 intel_context_put(tmp
);
401 memset32(tmp
->ring
->vaddr
,
402 0xdeadbeef, /* trigger a hang if executed */
403 tmp
->ring
->vma
->size
/ sizeof(u32
));
408 /* Create max prio spinner, followed by N low prio nops */
409 rq
= igt_spinner_create_request(&spin
, ce
[0], MI_ARB_CHECK
);
415 i915_request_get(rq
);
416 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
417 i915_request_add(rq
);
419 if (!igt_wait_for_spinner(&spin
, rq
)) {
420 intel_gt_set_wedged(gt
);
421 i915_request_put(rq
);
426 /* Fill the ring, until we will cause a wrap */
428 while (intel_ring_direction(ce
[0]->ring
,
430 ce
[0]->ring
->tail
) <= 0) {
431 struct i915_request
*tmp
;
433 tmp
= intel_context_create_request(ce
[0]);
436 i915_request_put(rq
);
440 i915_request_add(tmp
);
441 intel_engine_flush_submission(engine
);
444 intel_engine_flush_submission(engine
);
445 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
451 GEM_BUG_ON(intel_ring_direction(ce
[0]->ring
,
453 ce
[0]->ring
->tail
) <= 0);
454 i915_request_put(rq
);
456 /* Create a second ring to preempt the first ring after rq[0] */
457 rq
= intel_context_create_request(ce
[1]);
463 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
464 i915_request_get(rq
);
465 i915_request_add(rq
);
467 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
468 i915_request_put(rq
);
470 pr_err("%s: preemption request was not submitted\n",
475 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
477 ce
[0]->ring
->tail
, ce
[0]->ring
->emit
,
478 ce
[1]->ring
->tail
, ce
[1]->ring
->emit
);
481 intel_engine_flush_submission(engine
);
482 igt_spinner_end(&spin
);
483 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
484 if (IS_ERR_OR_NULL(ce
[n
]))
487 intel_context_unpin(ce
[n
]);
488 intel_context_put(ce
[n
]);
490 st_engine_heartbeat_enable(engine
);
491 if (igt_live_test_end(&t
))
497 igt_spinner_fini(&spin
);
501 static int live_pin_rewind(void *arg
)
503 struct intel_gt
*gt
= arg
;
504 struct intel_engine_cs
*engine
;
505 enum intel_engine_id id
;
509 * We have to be careful not to trust intel_ring too much, for example
510 * ring->head is updated upon retire which is out of sync with pinning
511 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
512 * or else we risk writing an older, stale value.
514 * To simulate this, let's apply a bit of deliberate sabotague.
517 for_each_engine(engine
, gt
, id
) {
518 struct intel_context
*ce
;
519 struct i915_request
*rq
;
520 struct intel_ring
*ring
;
521 struct igt_live_test t
;
523 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
528 ce
= intel_context_create(engine
);
534 err
= intel_context_pin(ce
);
536 intel_context_put(ce
);
540 /* Keep the context awake while we play games */
541 err
= i915_active_acquire(&ce
->active
);
543 intel_context_unpin(ce
);
544 intel_context_put(ce
);
549 /* Poison the ring, and offset the next request from HEAD */
550 memset32(ring
->vaddr
, STACK_MAGIC
, ring
->size
/ sizeof(u32
));
551 ring
->emit
= ring
->size
/ 2;
552 ring
->tail
= ring
->emit
;
553 GEM_BUG_ON(ring
->head
);
555 intel_context_unpin(ce
);
557 /* Submit a simple nop request */
558 GEM_BUG_ON(intel_context_is_pinned(ce
));
559 rq
= intel_context_create_request(ce
);
560 i915_active_release(&ce
->active
); /* e.g. async retire */
561 intel_context_put(ce
);
566 GEM_BUG_ON(!rq
->head
);
567 i915_request_add(rq
);
569 /* Expect not to hang! */
570 if (igt_live_test_end(&t
)) {
579 static int live_hold_reset(void *arg
)
581 struct intel_gt
*gt
= arg
;
582 struct intel_engine_cs
*engine
;
583 enum intel_engine_id id
;
584 struct igt_spinner spin
;
588 * In order to support offline error capture for fast preempt reset,
589 * we need to decouple the guilty request and ensure that it and its
590 * descendents are not executed while the capture is in progress.
593 if (!intel_has_reset_engine(gt
))
596 if (igt_spinner_init(&spin
, gt
))
599 for_each_engine(engine
, gt
, id
) {
600 struct intel_context
*ce
;
601 struct i915_request
*rq
;
603 ce
= intel_context_create(engine
);
609 st_engine_heartbeat_disable(engine
);
611 rq
= igt_spinner_create_request(&spin
, ce
, MI_ARB_CHECK
);
616 i915_request_add(rq
);
618 if (!igt_wait_for_spinner(&spin
, rq
)) {
619 intel_gt_set_wedged(gt
);
624 /* We have our request executing, now remove it and reset */
626 if (test_and_set_bit(I915_RESET_ENGINE
+ id
,
628 intel_gt_set_wedged(gt
);
632 tasklet_disable(&engine
->execlists
.tasklet
);
634 engine
->execlists
.tasklet
.func(engine
->execlists
.tasklet
.data
);
635 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
637 i915_request_get(rq
);
638 execlists_hold(engine
, rq
);
639 GEM_BUG_ON(!i915_request_on_hold(rq
));
641 intel_engine_reset(engine
, NULL
);
642 GEM_BUG_ON(rq
->fence
.error
!= -EIO
);
644 tasklet_enable(&engine
->execlists
.tasklet
);
645 clear_and_wake_up_bit(I915_RESET_ENGINE
+ id
,
648 /* Check that we do not resubmit the held request */
649 if (!i915_request_wait(rq
, 0, HZ
/ 5)) {
650 pr_err("%s: on hold request completed!\n",
652 i915_request_put(rq
);
656 GEM_BUG_ON(!i915_request_on_hold(rq
));
658 /* But is resubmitted on release */
659 execlists_unhold(engine
, rq
);
660 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
661 pr_err("%s: held request did not complete!\n",
663 intel_gt_set_wedged(gt
);
666 i915_request_put(rq
);
669 st_engine_heartbeat_enable(engine
);
670 intel_context_put(ce
);
675 igt_spinner_fini(&spin
);
679 static const char *error_repr(int err
)
681 return err
? "bad" : "good";
684 static int live_error_interrupt(void *arg
)
686 static const struct error_phase
{
687 enum { GOOD
= 0, BAD
= -EIO
} error
[2];
692 { { GOOD
, GOOD
} }, /* sentinel */
694 struct intel_gt
*gt
= arg
;
695 struct intel_engine_cs
*engine
;
696 enum intel_engine_id id
;
699 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
700 * of invalid commands in user batches that will cause a GPU hang.
701 * This is a faster mechanism than using hangcheck/heartbeats, but
702 * only detects problems the HW knows about -- it will not warn when
705 * To verify our detection and reset, we throw some invalid commands
706 * at the HW and wait for the interrupt.
709 if (!intel_has_reset_engine(gt
))
712 for_each_engine(engine
, gt
, id
) {
713 const struct error_phase
*p
;
716 st_engine_heartbeat_disable(engine
);
718 for (p
= phases
; p
->error
[0] != GOOD
; p
++) {
719 struct i915_request
*client
[ARRAY_SIZE(phases
->error
)];
723 memset(client
, 0, sizeof(*client
));
724 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
725 struct intel_context
*ce
;
726 struct i915_request
*rq
;
728 ce
= intel_context_create(engine
);
734 rq
= intel_context_create_request(ce
);
735 intel_context_put(ce
);
741 if (rq
->engine
->emit_init_breadcrumb
) {
742 err
= rq
->engine
->emit_init_breadcrumb(rq
);
744 i915_request_add(rq
);
749 cs
= intel_ring_begin(rq
, 2);
751 i915_request_add(rq
);
764 client
[i
] = i915_request_get(rq
);
765 i915_request_add(rq
);
768 err
= wait_for_submit(engine
, client
[0], HZ
/ 2);
770 pr_err("%s: first request did not start within time!\n",
776 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
777 if (i915_request_wait(client
[i
], 0, HZ
/ 5) < 0)
778 pr_debug("%s: %s request incomplete!\n",
780 error_repr(p
->error
[i
]));
782 if (!i915_request_started(client
[i
])) {
783 pr_err("%s: %s request not started!\n",
785 error_repr(p
->error
[i
]));
790 /* Kick the tasklet to process the error */
791 intel_engine_flush_submission(engine
);
792 if (client
[i
]->fence
.error
!= p
->error
[i
]) {
793 pr_err("%s: %s request (%s) with wrong error code: %d\n",
795 error_repr(p
->error
[i
]),
796 i915_request_completed(client
[i
]) ? "completed" : "running",
797 client
[i
]->fence
.error
);
804 for (i
= 0; i
< ARRAY_SIZE(client
); i
++)
806 i915_request_put(client
[i
]);
808 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
809 engine
->name
, p
- phases
,
810 p
->error
[0], p
->error
[1]);
815 st_engine_heartbeat_enable(engine
);
817 intel_gt_set_wedged(gt
);
826 emit_semaphore_chain(struct i915_request
*rq
, struct i915_vma
*vma
, int idx
)
830 cs
= intel_ring_begin(rq
, 10);
834 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
836 *cs
++ = MI_SEMAPHORE_WAIT
|
837 MI_SEMAPHORE_GLOBAL_GTT
|
839 MI_SEMAPHORE_SAD_NEQ_SDD
;
841 *cs
++ = i915_ggtt_offset(vma
) + 4 * idx
;
845 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
846 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
856 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
858 intel_ring_advance(rq
, cs
);
862 static struct i915_request
*
863 semaphore_queue(struct intel_engine_cs
*engine
, struct i915_vma
*vma
, int idx
)
865 struct intel_context
*ce
;
866 struct i915_request
*rq
;
869 ce
= intel_context_create(engine
);
873 rq
= intel_context_create_request(ce
);
878 if (rq
->engine
->emit_init_breadcrumb
)
879 err
= rq
->engine
->emit_init_breadcrumb(rq
);
881 err
= emit_semaphore_chain(rq
, vma
, idx
);
883 i915_request_get(rq
);
884 i915_request_add(rq
);
889 intel_context_put(ce
);
894 release_queue(struct intel_engine_cs
*engine
,
895 struct i915_vma
*vma
,
898 struct i915_sched_attr attr
= {
901 struct i915_request
*rq
;
904 rq
= intel_engine_create_kernel_request(engine
);
908 cs
= intel_ring_begin(rq
, 4);
910 i915_request_add(rq
);
914 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
915 *cs
++ = i915_ggtt_offset(vma
) + 4 * (idx
- 1);
919 intel_ring_advance(rq
, cs
);
921 i915_request_get(rq
);
922 i915_request_add(rq
);
925 engine
->schedule(rq
, &attr
);
926 local_bh_enable(); /* kick tasklet */
928 i915_request_put(rq
);
934 slice_semaphore_queue(struct intel_engine_cs
*outer
,
935 struct i915_vma
*vma
,
938 struct intel_engine_cs
*engine
;
939 struct i915_request
*head
;
940 enum intel_engine_id id
;
943 head
= semaphore_queue(outer
, vma
, n
++);
945 return PTR_ERR(head
);
947 for_each_engine(engine
, outer
->gt
, id
) {
948 for (i
= 0; i
< count
; i
++) {
949 struct i915_request
*rq
;
951 rq
= semaphore_queue(engine
, vma
, n
++);
957 i915_request_put(rq
);
961 err
= release_queue(outer
, vma
, n
, I915_PRIORITY_BARRIER
);
965 if (i915_request_wait(head
, 0,
966 2 * outer
->gt
->info
.num_engines
* (count
+ 2) * (count
+ 3)) < 0) {
967 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
970 intel_gt_set_wedged(outer
->gt
);
975 i915_request_put(head
);
979 static int live_timeslice_preempt(void *arg
)
981 struct intel_gt
*gt
= arg
;
982 struct drm_i915_gem_object
*obj
;
983 struct intel_engine_cs
*engine
;
984 enum intel_engine_id id
;
985 struct i915_vma
*vma
;
990 * If a request takes too long, we would like to give other users
991 * a fair go on the GPU. In particular, users may create batches
992 * that wait upon external input, where that input may even be
993 * supplied by another GPU job. To avoid blocking forever, we
994 * need to preempt the current task and replace it with another
997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
1000 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
1002 return PTR_ERR(obj
);
1004 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
1010 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
1011 if (IS_ERR(vaddr
)) {
1012 err
= PTR_ERR(vaddr
);
1016 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
1020 err
= i915_vma_sync(vma
);
1024 for_each_engine(engine
, gt
, id
) {
1025 if (!intel_engine_has_preemption(engine
))
1028 memset(vaddr
, 0, PAGE_SIZE
);
1030 st_engine_heartbeat_disable(engine
);
1031 err
= slice_semaphore_queue(engine
, vma
, 5);
1032 st_engine_heartbeat_enable(engine
);
1036 if (igt_flush_test(gt
->i915
)) {
1043 i915_vma_unpin(vma
);
1045 i915_gem_object_unpin_map(obj
);
1047 i915_gem_object_put(obj
);
1051 static struct i915_request
*
1052 create_rewinder(struct intel_context
*ce
,
1053 struct i915_request
*wait
,
1054 void *slot
, int idx
)
1057 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
1058 offset_in_page(slot
);
1059 struct i915_request
*rq
;
1063 rq
= intel_context_create_request(ce
);
1068 err
= i915_request_await_dma_fence(rq
, &wait
->fence
);
1073 cs
= intel_ring_begin(rq
, 14);
1079 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
1082 *cs
++ = MI_SEMAPHORE_WAIT
|
1083 MI_SEMAPHORE_GLOBAL_GTT
|
1085 MI_SEMAPHORE_SAD_GTE_SDD
;
1090 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
1091 *cs
++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq
->engine
->mmio_base
));
1092 *cs
++ = offset
+ idx
* sizeof(u32
);
1095 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
1100 intel_ring_advance(rq
, cs
);
1102 rq
->sched
.attr
.priority
= I915_PRIORITY_MASK
;
1105 i915_request_get(rq
);
1106 i915_request_add(rq
);
1108 i915_request_put(rq
);
1109 return ERR_PTR(err
);
1115 static int live_timeslice_rewind(void *arg
)
1117 struct intel_gt
*gt
= arg
;
1118 struct intel_engine_cs
*engine
;
1119 enum intel_engine_id id
;
1122 * The usual presumption on timeslice expiration is that we replace
1123 * the active context with another. However, given a chain of
1124 * dependencies we may end up with replacing the context with itself,
1125 * but only a few of those requests, forcing us to rewind the
1126 * RING_TAIL of the original request.
1128 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
1131 for_each_engine(engine
, gt
, id
) {
1132 enum { A1
, A2
, B1
};
1133 enum { X
= 1, Z
, Y
};
1134 struct i915_request
*rq
[3] = {};
1135 struct intel_context
*ce
;
1136 unsigned long timeslice
;
1140 if (!intel_engine_has_timeslices(engine
))
1144 * A:rq1 -- semaphore wait, timestamp X
1145 * A:rq2 -- write timestamp Y
1147 * B:rq1 [await A:rq1] -- write timestamp Z
1149 * Force timeslice, release semaphore.
1151 * Expect execution/evaluation order XZY
1154 st_engine_heartbeat_disable(engine
);
1155 timeslice
= xchg(&engine
->props
.timeslice_duration_ms
, 1);
1157 slot
= memset32(engine
->status_page
.addr
+ 1000, 0, 4);
1159 ce
= intel_context_create(engine
);
1165 rq
[A1
] = create_rewinder(ce
, NULL
, slot
, X
);
1166 if (IS_ERR(rq
[A1
])) {
1167 intel_context_put(ce
);
1171 rq
[A2
] = create_rewinder(ce
, NULL
, slot
, Y
);
1172 intel_context_put(ce
);
1176 err
= wait_for_submit(engine
, rq
[A2
], HZ
/ 2);
1178 pr_err("%s: failed to submit first context\n",
1183 ce
= intel_context_create(engine
);
1189 rq
[B1
] = create_rewinder(ce
, rq
[A1
], slot
, Z
);
1190 intel_context_put(ce
);
1194 err
= wait_for_submit(engine
, rq
[B1
], HZ
/ 2);
1196 pr_err("%s: failed to submit second context\n",
1201 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1202 ENGINE_TRACE(engine
, "forcing tasklet for rewind\n");
1203 if (i915_request_is_active(rq
[A2
])) { /* semaphore yielded! */
1204 /* Wait for the timeslice to kick in */
1205 del_timer(&engine
->execlists
.timer
);
1206 tasklet_hi_schedule(&engine
->execlists
.tasklet
);
1207 intel_engine_flush_submission(engine
);
1209 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1210 GEM_BUG_ON(!i915_request_is_active(rq
[A1
]));
1211 GEM_BUG_ON(!i915_request_is_active(rq
[B1
]));
1212 GEM_BUG_ON(i915_request_is_active(rq
[A2
]));
1214 /* Release the hounds! */
1216 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1218 for (i
= 1; i
<= 3; i
++) {
1219 unsigned long timeout
= jiffies
+ HZ
/ 2;
1221 while (!READ_ONCE(slot
[i
]) &&
1222 time_before(jiffies
, timeout
))
1225 if (!time_before(jiffies
, timeout
)) {
1226 pr_err("%s: rq[%d] timed out\n",
1227 engine
->name
, i
- 1);
1232 pr_debug("%s: slot[%d]:%x\n", engine
->name
, i
, slot
[i
]);
1236 if (slot
[Z
] - slot
[X
] >= slot
[Y
] - slot
[X
]) {
1237 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1245 memset32(&slot
[0], -1, 4);
1248 engine
->props
.timeslice_duration_ms
= timeslice
;
1249 st_engine_heartbeat_enable(engine
);
1250 for (i
= 0; i
< 3; i
++)
1251 i915_request_put(rq
[i
]);
1252 if (igt_flush_test(gt
->i915
))
1261 static struct i915_request
*nop_request(struct intel_engine_cs
*engine
)
1263 struct i915_request
*rq
;
1265 rq
= intel_engine_create_kernel_request(engine
);
1269 i915_request_get(rq
);
1270 i915_request_add(rq
);
1275 static long slice_timeout(struct intel_engine_cs
*engine
)
1279 /* Enough time for a timeslice to kick in, and kick out */
1280 timeout
= 2 * msecs_to_jiffies_timeout(timeslice(engine
));
1282 /* Enough time for the nop request to complete */
1288 static int live_timeslice_queue(void *arg
)
1290 struct intel_gt
*gt
= arg
;
1291 struct drm_i915_gem_object
*obj
;
1292 struct intel_engine_cs
*engine
;
1293 enum intel_engine_id id
;
1294 struct i915_vma
*vma
;
1299 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1300 * timeslicing between them disabled, we *do* enable timeslicing
1301 * if the queue demands it. (Normally, we do not submit if
1302 * ELSP[1] is already occupied, so must rely on timeslicing to
1303 * eject ELSP[0] in favour of the queue.)
1305 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
1308 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
1310 return PTR_ERR(obj
);
1312 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
1318 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
1319 if (IS_ERR(vaddr
)) {
1320 err
= PTR_ERR(vaddr
);
1324 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
1328 err
= i915_vma_sync(vma
);
1332 for_each_engine(engine
, gt
, id
) {
1333 struct i915_sched_attr attr
= {
1334 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
1336 struct i915_request
*rq
, *nop
;
1338 if (!intel_engine_has_preemption(engine
))
1341 st_engine_heartbeat_disable(engine
);
1342 memset(vaddr
, 0, PAGE_SIZE
);
1344 /* ELSP[0]: semaphore wait */
1345 rq
= semaphore_queue(engine
, vma
, 0);
1350 engine
->schedule(rq
, &attr
);
1351 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
1353 pr_err("%s: Timed out trying to submit semaphores\n",
1358 /* ELSP[1]: nop request */
1359 nop
= nop_request(engine
);
1364 err
= wait_for_submit(engine
, nop
, HZ
/ 2);
1365 i915_request_put(nop
);
1367 pr_err("%s: Timed out trying to submit nop\n",
1372 GEM_BUG_ON(i915_request_completed(rq
));
1373 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
1375 /* Queue: semaphore signal, matching priority as semaphore */
1376 err
= release_queue(engine
, vma
, 1, effective_prio(rq
));
1380 /* Wait until we ack the release_queue and start timeslicing */
1383 intel_engine_flush_submission(engine
);
1384 } while (READ_ONCE(engine
->execlists
.pending
[0]));
1386 /* Timeslice every jiffy, so within 2 we should signal */
1387 if (i915_request_wait(rq
, 0, slice_timeout(engine
)) < 0) {
1388 struct drm_printer p
=
1389 drm_info_printer(gt
->i915
->drm
.dev
);
1391 pr_err("%s: Failed to timeslice into queue\n",
1393 intel_engine_dump(engine
, &p
,
1394 "%s\n", engine
->name
);
1396 memset(vaddr
, 0xff, PAGE_SIZE
);
1400 i915_request_put(rq
);
1402 st_engine_heartbeat_enable(engine
);
1408 i915_vma_unpin(vma
);
1410 i915_gem_object_unpin_map(obj
);
1412 i915_gem_object_put(obj
);
1416 static int live_timeslice_nopreempt(void *arg
)
1418 struct intel_gt
*gt
= arg
;
1419 struct intel_engine_cs
*engine
;
1420 enum intel_engine_id id
;
1421 struct igt_spinner spin
;
1425 * We should not timeslice into a request that is marked with
1426 * I915_REQUEST_NOPREEMPT.
1428 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION
))
1431 if (igt_spinner_init(&spin
, gt
))
1434 for_each_engine(engine
, gt
, id
) {
1435 struct intel_context
*ce
;
1436 struct i915_request
*rq
;
1437 unsigned long timeslice
;
1439 if (!intel_engine_has_preemption(engine
))
1442 ce
= intel_context_create(engine
);
1448 st_engine_heartbeat_disable(engine
);
1449 timeslice
= xchg(&engine
->props
.timeslice_duration_ms
, 1);
1451 /* Create an unpreemptible spinner */
1453 rq
= igt_spinner_create_request(&spin
, ce
, MI_ARB_CHECK
);
1454 intel_context_put(ce
);
1460 i915_request_get(rq
);
1461 i915_request_add(rq
);
1463 if (!igt_wait_for_spinner(&spin
, rq
)) {
1464 i915_request_put(rq
);
1469 set_bit(I915_FENCE_FLAG_NOPREEMPT
, &rq
->fence
.flags
);
1470 i915_request_put(rq
);
1472 /* Followed by a maximum priority barrier (heartbeat) */
1474 ce
= intel_context_create(engine
);
1480 rq
= intel_context_create_request(ce
);
1481 intel_context_put(ce
);
1487 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
1488 i915_request_get(rq
);
1489 i915_request_add(rq
);
1492 * Wait until the barrier is in ELSP, and we know timeslicing
1493 * will have been activated.
1495 if (wait_for_submit(engine
, rq
, HZ
/ 2)) {
1496 i915_request_put(rq
);
1502 * Since the ELSP[0] request is unpreemptible, it should not
1503 * allow the maximum priority barrier through. Wait long
1504 * enough to see if it is timesliced in by mistake.
1506 if (i915_request_wait(rq
, 0, slice_timeout(engine
)) >= 0) {
1507 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1511 i915_request_put(rq
);
1514 igt_spinner_end(&spin
);
1516 xchg(&engine
->props
.timeslice_duration_ms
, timeslice
);
1517 st_engine_heartbeat_enable(engine
);
1521 if (igt_flush_test(gt
->i915
)) {
1527 igt_spinner_fini(&spin
);
1531 static int live_busywait_preempt(void *arg
)
1533 struct intel_gt
*gt
= arg
;
1534 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1535 struct intel_engine_cs
*engine
;
1536 struct drm_i915_gem_object
*obj
;
1537 struct i915_vma
*vma
;
1538 enum intel_engine_id id
;
1543 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1544 * preempt the busywaits used to synchronise between rings.
1547 ctx_hi
= kernel_context(gt
->i915
);
1550 ctx_hi
->sched
.priority
=
1551 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
1553 ctx_lo
= kernel_context(gt
->i915
);
1556 ctx_lo
->sched
.priority
=
1557 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
1559 obj
= i915_gem_object_create_internal(gt
->i915
, PAGE_SIZE
);
1565 map
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
1571 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
1577 err
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
);
1581 err
= i915_vma_sync(vma
);
1585 for_each_engine(engine
, gt
, id
) {
1586 struct i915_request
*lo
, *hi
;
1587 struct igt_live_test t
;
1590 if (!intel_engine_has_preemption(engine
))
1593 if (!intel_engine_can_store_dword(engine
))
1596 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1602 * We create two requests. The low priority request
1603 * busywaits on a semaphore (inside the ringbuffer where
1604 * is should be preemptible) and the high priority requests
1605 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1606 * allowing the first request to complete. If preemption
1607 * fails, we hang instead.
1610 lo
= igt_request_alloc(ctx_lo
, engine
);
1616 cs
= intel_ring_begin(lo
, 8);
1619 i915_request_add(lo
);
1623 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
1624 *cs
++ = i915_ggtt_offset(vma
);
1628 /* XXX Do we need a flush + invalidate here? */
1630 *cs
++ = MI_SEMAPHORE_WAIT
|
1631 MI_SEMAPHORE_GLOBAL_GTT
|
1633 MI_SEMAPHORE_SAD_EQ_SDD
;
1635 *cs
++ = i915_ggtt_offset(vma
);
1638 intel_ring_advance(lo
, cs
);
1640 i915_request_get(lo
);
1641 i915_request_add(lo
);
1643 if (wait_for(READ_ONCE(*map
), 10)) {
1644 i915_request_put(lo
);
1649 /* Low priority request should be busywaiting now */
1650 if (i915_request_wait(lo
, 0, 1) != -ETIME
) {
1651 i915_request_put(lo
);
1652 pr_err("%s: Busywaiting request did not!\n",
1658 hi
= igt_request_alloc(ctx_hi
, engine
);
1661 i915_request_put(lo
);
1665 cs
= intel_ring_begin(hi
, 4);
1668 i915_request_add(hi
);
1669 i915_request_put(lo
);
1673 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
1674 *cs
++ = i915_ggtt_offset(vma
);
1678 intel_ring_advance(hi
, cs
);
1679 i915_request_add(hi
);
1681 if (i915_request_wait(lo
, 0, HZ
/ 5) < 0) {
1682 struct drm_printer p
= drm_info_printer(gt
->i915
->drm
.dev
);
1684 pr_err("%s: Failed to preempt semaphore busywait!\n",
1687 intel_engine_dump(engine
, &p
, "%s\n", engine
->name
);
1690 i915_request_put(lo
);
1691 intel_gt_set_wedged(gt
);
1695 GEM_BUG_ON(READ_ONCE(*map
));
1696 i915_request_put(lo
);
1698 if (igt_live_test_end(&t
)) {
1706 i915_vma_unpin(vma
);
1708 i915_gem_object_unpin_map(obj
);
1710 i915_gem_object_put(obj
);
1712 kernel_context_close(ctx_lo
);
1714 kernel_context_close(ctx_hi
);
1718 static struct i915_request
*
1719 spinner_create_request(struct igt_spinner
*spin
,
1720 struct i915_gem_context
*ctx
,
1721 struct intel_engine_cs
*engine
,
1724 struct intel_context
*ce
;
1725 struct i915_request
*rq
;
1727 ce
= i915_gem_context_get_engine(ctx
, engine
->legacy_idx
);
1729 return ERR_CAST(ce
);
1731 rq
= igt_spinner_create_request(spin
, ce
, arb
);
1732 intel_context_put(ce
);
1736 static int live_preempt(void *arg
)
1738 struct intel_gt
*gt
= arg
;
1739 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1740 struct igt_spinner spin_hi
, spin_lo
;
1741 struct intel_engine_cs
*engine
;
1742 enum intel_engine_id id
;
1745 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1748 if (!(gt
->i915
->caps
.scheduler
& I915_SCHEDULER_CAP_PREEMPTION
))
1749 pr_err("Logical preemption supported, but not exposed\n");
1751 if (igt_spinner_init(&spin_hi
, gt
))
1754 if (igt_spinner_init(&spin_lo
, gt
))
1757 ctx_hi
= kernel_context(gt
->i915
);
1760 ctx_hi
->sched
.priority
=
1761 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
1763 ctx_lo
= kernel_context(gt
->i915
);
1766 ctx_lo
->sched
.priority
=
1767 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
1769 for_each_engine(engine
, gt
, id
) {
1770 struct igt_live_test t
;
1771 struct i915_request
*rq
;
1773 if (!intel_engine_has_preemption(engine
))
1776 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1781 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
1788 i915_request_add(rq
);
1789 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
1790 GEM_TRACE("lo spinner failed to start\n");
1792 intel_gt_set_wedged(gt
);
1797 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
1800 igt_spinner_end(&spin_lo
);
1805 i915_request_add(rq
);
1806 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
1807 GEM_TRACE("hi spinner failed to start\n");
1809 intel_gt_set_wedged(gt
);
1814 igt_spinner_end(&spin_hi
);
1815 igt_spinner_end(&spin_lo
);
1817 if (igt_live_test_end(&t
)) {
1825 kernel_context_close(ctx_lo
);
1827 kernel_context_close(ctx_hi
);
1829 igt_spinner_fini(&spin_lo
);
1831 igt_spinner_fini(&spin_hi
);
1835 static int live_late_preempt(void *arg
)
1837 struct intel_gt
*gt
= arg
;
1838 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
1839 struct igt_spinner spin_hi
, spin_lo
;
1840 struct intel_engine_cs
*engine
;
1841 struct i915_sched_attr attr
= {};
1842 enum intel_engine_id id
;
1845 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1848 if (igt_spinner_init(&spin_hi
, gt
))
1851 if (igt_spinner_init(&spin_lo
, gt
))
1854 ctx_hi
= kernel_context(gt
->i915
);
1858 ctx_lo
= kernel_context(gt
->i915
);
1862 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1863 ctx_lo
->sched
.priority
= I915_USER_PRIORITY(1);
1865 for_each_engine(engine
, gt
, id
) {
1866 struct igt_live_test t
;
1867 struct i915_request
*rq
;
1869 if (!intel_engine_has_preemption(engine
))
1872 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
1877 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
1884 i915_request_add(rq
);
1885 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
1886 pr_err("First context failed to start\n");
1890 rq
= spinner_create_request(&spin_hi
, ctx_hi
, engine
,
1893 igt_spinner_end(&spin_lo
);
1898 i915_request_add(rq
);
1899 if (igt_wait_for_spinner(&spin_hi
, rq
)) {
1900 pr_err("Second context overtook first?\n");
1904 attr
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1905 engine
->schedule(rq
, &attr
);
1907 if (!igt_wait_for_spinner(&spin_hi
, rq
)) {
1908 pr_err("High priority context failed to preempt the low priority context\n");
1913 igt_spinner_end(&spin_hi
);
1914 igt_spinner_end(&spin_lo
);
1916 if (igt_live_test_end(&t
)) {
1924 kernel_context_close(ctx_lo
);
1926 kernel_context_close(ctx_hi
);
1928 igt_spinner_fini(&spin_lo
);
1930 igt_spinner_fini(&spin_hi
);
1934 igt_spinner_end(&spin_hi
);
1935 igt_spinner_end(&spin_lo
);
1936 intel_gt_set_wedged(gt
);
1941 struct preempt_client
{
1942 struct igt_spinner spin
;
1943 struct i915_gem_context
*ctx
;
1946 static int preempt_client_init(struct intel_gt
*gt
, struct preempt_client
*c
)
1948 c
->ctx
= kernel_context(gt
->i915
);
1952 if (igt_spinner_init(&c
->spin
, gt
))
1958 kernel_context_close(c
->ctx
);
1962 static void preempt_client_fini(struct preempt_client
*c
)
1964 igt_spinner_fini(&c
->spin
);
1965 kernel_context_close(c
->ctx
);
1968 static int live_nopreempt(void *arg
)
1970 struct intel_gt
*gt
= arg
;
1971 struct intel_engine_cs
*engine
;
1972 struct preempt_client a
, b
;
1973 enum intel_engine_id id
;
1977 * Verify that we can disable preemption for an individual request
1978 * that may be being observed and not want to be interrupted.
1981 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
1984 if (preempt_client_init(gt
, &a
))
1986 if (preempt_client_init(gt
, &b
))
1988 b
.ctx
->sched
.priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
);
1990 for_each_engine(engine
, gt
, id
) {
1991 struct i915_request
*rq_a
, *rq_b
;
1993 if (!intel_engine_has_preemption(engine
))
1996 engine
->execlists
.preempt_hang
.count
= 0;
1998 rq_a
= spinner_create_request(&a
.spin
,
2002 err
= PTR_ERR(rq_a
);
2006 /* Low priority client, but unpreemptable! */
2007 __set_bit(I915_FENCE_FLAG_NOPREEMPT
, &rq_a
->fence
.flags
);
2009 i915_request_add(rq_a
);
2010 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
2011 pr_err("First client failed to start\n");
2015 rq_b
= spinner_create_request(&b
.spin
,
2019 err
= PTR_ERR(rq_b
);
2023 i915_request_add(rq_b
);
2025 /* B is much more important than A! (But A is unpreemptable.) */
2026 GEM_BUG_ON(rq_prio(rq_b
) <= rq_prio(rq_a
));
2028 /* Wait long enough for preemption and timeslicing */
2029 if (igt_wait_for_spinner(&b
.spin
, rq_b
)) {
2030 pr_err("Second client started too early!\n");
2034 igt_spinner_end(&a
.spin
);
2036 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
2037 pr_err("Second client failed to start\n");
2041 igt_spinner_end(&b
.spin
);
2043 if (engine
->execlists
.preempt_hang
.count
) {
2044 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2045 engine
->execlists
.preempt_hang
.count
);
2050 if (igt_flush_test(gt
->i915
))
2056 preempt_client_fini(&b
);
2058 preempt_client_fini(&a
);
2062 igt_spinner_end(&b
.spin
);
2063 igt_spinner_end(&a
.spin
);
2064 intel_gt_set_wedged(gt
);
2069 struct live_preempt_cancel
{
2070 struct intel_engine_cs
*engine
;
2071 struct preempt_client a
, b
;
2074 static int __cancel_active0(struct live_preempt_cancel
*arg
)
2076 struct i915_request
*rq
;
2077 struct igt_live_test t
;
2080 /* Preempt cancel of ELSP0 */
2081 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
2082 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
2083 __func__
, arg
->engine
->name
))
2086 rq
= spinner_create_request(&arg
->a
.spin
,
2087 arg
->a
.ctx
, arg
->engine
,
2092 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
2093 i915_request_get(rq
);
2094 i915_request_add(rq
);
2095 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
2100 intel_context_set_banned(rq
->context
);
2101 err
= intel_engine_pulse(arg
->engine
);
2105 err
= wait_for_reset(arg
->engine
, rq
, HZ
/ 2);
2107 pr_err("Cancelled inflight0 request did not reset\n");
2112 i915_request_put(rq
);
2113 if (igt_live_test_end(&t
))
2118 static int __cancel_active1(struct live_preempt_cancel
*arg
)
2120 struct i915_request
*rq
[2] = {};
2121 struct igt_live_test t
;
2124 /* Preempt cancel of ELSP1 */
2125 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
2126 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
2127 __func__
, arg
->engine
->name
))
2130 rq
[0] = spinner_create_request(&arg
->a
.spin
,
2131 arg
->a
.ctx
, arg
->engine
,
2132 MI_NOOP
); /* no preemption */
2134 return PTR_ERR(rq
[0]);
2136 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
2137 i915_request_get(rq
[0]);
2138 i915_request_add(rq
[0]);
2139 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
2144 rq
[1] = spinner_create_request(&arg
->b
.spin
,
2145 arg
->b
.ctx
, arg
->engine
,
2147 if (IS_ERR(rq
[1])) {
2148 err
= PTR_ERR(rq
[1]);
2152 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
2153 i915_request_get(rq
[1]);
2154 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
2155 i915_request_add(rq
[1]);
2159 intel_context_set_banned(rq
[1]->context
);
2160 err
= intel_engine_pulse(arg
->engine
);
2164 igt_spinner_end(&arg
->a
.spin
);
2165 err
= wait_for_reset(arg
->engine
, rq
[1], HZ
/ 2);
2169 if (rq
[0]->fence
.error
!= 0) {
2170 pr_err("Normal inflight0 request did not complete\n");
2175 if (rq
[1]->fence
.error
!= -EIO
) {
2176 pr_err("Cancelled inflight1 request did not report -EIO\n");
2182 i915_request_put(rq
[1]);
2183 i915_request_put(rq
[0]);
2184 if (igt_live_test_end(&t
))
2189 static int __cancel_queued(struct live_preempt_cancel
*arg
)
2191 struct i915_request
*rq
[3] = {};
2192 struct igt_live_test t
;
2195 /* Full ELSP and one in the wings */
2196 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
2197 if (igt_live_test_begin(&t
, arg
->engine
->i915
,
2198 __func__
, arg
->engine
->name
))
2201 rq
[0] = spinner_create_request(&arg
->a
.spin
,
2202 arg
->a
.ctx
, arg
->engine
,
2205 return PTR_ERR(rq
[0]);
2207 clear_bit(CONTEXT_BANNED
, &rq
[0]->context
->flags
);
2208 i915_request_get(rq
[0]);
2209 i915_request_add(rq
[0]);
2210 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
[0])) {
2215 rq
[1] = igt_request_alloc(arg
->b
.ctx
, arg
->engine
);
2216 if (IS_ERR(rq
[1])) {
2217 err
= PTR_ERR(rq
[1]);
2221 clear_bit(CONTEXT_BANNED
, &rq
[1]->context
->flags
);
2222 i915_request_get(rq
[1]);
2223 err
= i915_request_await_dma_fence(rq
[1], &rq
[0]->fence
);
2224 i915_request_add(rq
[1]);
2228 rq
[2] = spinner_create_request(&arg
->b
.spin
,
2229 arg
->a
.ctx
, arg
->engine
,
2231 if (IS_ERR(rq
[2])) {
2232 err
= PTR_ERR(rq
[2]);
2236 i915_request_get(rq
[2]);
2237 err
= i915_request_await_dma_fence(rq
[2], &rq
[1]->fence
);
2238 i915_request_add(rq
[2]);
2242 intel_context_set_banned(rq
[2]->context
);
2243 err
= intel_engine_pulse(arg
->engine
);
2247 err
= wait_for_reset(arg
->engine
, rq
[2], HZ
/ 2);
2251 if (rq
[0]->fence
.error
!= -EIO
) {
2252 pr_err("Cancelled inflight0 request did not report -EIO\n");
2257 if (rq
[1]->fence
.error
!= 0) {
2258 pr_err("Normal inflight1 request did not complete\n");
2263 if (rq
[2]->fence
.error
!= -EIO
) {
2264 pr_err("Cancelled queued request did not report -EIO\n");
2270 i915_request_put(rq
[2]);
2271 i915_request_put(rq
[1]);
2272 i915_request_put(rq
[0]);
2273 if (igt_live_test_end(&t
))
2278 static int __cancel_hostile(struct live_preempt_cancel
*arg
)
2280 struct i915_request
*rq
;
2283 /* Preempt cancel non-preemptible spinner in ELSP0 */
2284 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
2287 if (!intel_has_reset_engine(arg
->engine
->gt
))
2290 GEM_TRACE("%s(%s)\n", __func__
, arg
->engine
->name
);
2291 rq
= spinner_create_request(&arg
->a
.spin
,
2292 arg
->a
.ctx
, arg
->engine
,
2293 MI_NOOP
); /* preemption disabled */
2297 clear_bit(CONTEXT_BANNED
, &rq
->context
->flags
);
2298 i915_request_get(rq
);
2299 i915_request_add(rq
);
2300 if (!igt_wait_for_spinner(&arg
->a
.spin
, rq
)) {
2305 intel_context_set_banned(rq
->context
);
2306 err
= intel_engine_pulse(arg
->engine
); /* force reset */
2310 err
= wait_for_reset(arg
->engine
, rq
, HZ
/ 2);
2312 pr_err("Cancelled inflight0 request did not reset\n");
2317 i915_request_put(rq
);
2318 if (igt_flush_test(arg
->engine
->i915
))
2323 static int live_preempt_cancel(void *arg
)
2325 struct intel_gt
*gt
= arg
;
2326 struct live_preempt_cancel data
;
2327 enum intel_engine_id id
;
2331 * To cancel an inflight context, we need to first remove it from the
2332 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2335 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2338 if (preempt_client_init(gt
, &data
.a
))
2340 if (preempt_client_init(gt
, &data
.b
))
2343 for_each_engine(data
.engine
, gt
, id
) {
2344 if (!intel_engine_has_preemption(data
.engine
))
2347 err
= __cancel_active0(&data
);
2351 err
= __cancel_active1(&data
);
2355 err
= __cancel_queued(&data
);
2359 err
= __cancel_hostile(&data
);
2366 preempt_client_fini(&data
.b
);
2368 preempt_client_fini(&data
.a
);
2373 igt_spinner_end(&data
.b
.spin
);
2374 igt_spinner_end(&data
.a
.spin
);
2375 intel_gt_set_wedged(gt
);
2379 static int live_suppress_self_preempt(void *arg
)
2381 struct intel_gt
*gt
= arg
;
2382 struct intel_engine_cs
*engine
;
2383 struct i915_sched_attr attr
= {
2384 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
)
2386 struct preempt_client a
, b
;
2387 enum intel_engine_id id
;
2391 * Verify that if a preemption request does not cause a change in
2392 * the current execution order, the preempt-to-idle injection is
2393 * skipped and that we do not accidentally apply it after the CS
2397 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2400 if (intel_uc_uses_guc_submission(>
->uc
))
2401 return 0; /* presume black blox */
2403 if (intel_vgpu_active(gt
->i915
))
2404 return 0; /* GVT forces single port & request submission */
2406 if (preempt_client_init(gt
, &a
))
2408 if (preempt_client_init(gt
, &b
))
2411 for_each_engine(engine
, gt
, id
) {
2412 struct i915_request
*rq_a
, *rq_b
;
2415 if (!intel_engine_has_preemption(engine
))
2418 if (igt_flush_test(gt
->i915
))
2421 st_engine_heartbeat_disable(engine
);
2422 engine
->execlists
.preempt_hang
.count
= 0;
2424 rq_a
= spinner_create_request(&a
.spin
,
2428 err
= PTR_ERR(rq_a
);
2429 st_engine_heartbeat_enable(engine
);
2433 i915_request_add(rq_a
);
2434 if (!igt_wait_for_spinner(&a
.spin
, rq_a
)) {
2435 pr_err("First client failed to start\n");
2436 st_engine_heartbeat_enable(engine
);
2440 /* Keep postponing the timer to avoid premature slicing */
2441 mod_timer(&engine
->execlists
.timer
, jiffies
+ HZ
);
2442 for (depth
= 0; depth
< 8; depth
++) {
2443 rq_b
= spinner_create_request(&b
.spin
,
2447 err
= PTR_ERR(rq_b
);
2448 st_engine_heartbeat_enable(engine
);
2451 i915_request_add(rq_b
);
2453 GEM_BUG_ON(i915_request_completed(rq_a
));
2454 engine
->schedule(rq_a
, &attr
);
2455 igt_spinner_end(&a
.spin
);
2457 if (!igt_wait_for_spinner(&b
.spin
, rq_b
)) {
2458 pr_err("Second client failed to start\n");
2459 st_engine_heartbeat_enable(engine
);
2466 igt_spinner_end(&a
.spin
);
2468 if (engine
->execlists
.preempt_hang
.count
) {
2469 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2471 engine
->execlists
.preempt_hang
.count
,
2473 st_engine_heartbeat_enable(engine
);
2478 st_engine_heartbeat_enable(engine
);
2479 if (igt_flush_test(gt
->i915
))
2485 preempt_client_fini(&b
);
2487 preempt_client_fini(&a
);
2491 igt_spinner_end(&b
.spin
);
2492 igt_spinner_end(&a
.spin
);
2493 intel_gt_set_wedged(gt
);
2498 static int live_chain_preempt(void *arg
)
2500 struct intel_gt
*gt
= arg
;
2501 struct intel_engine_cs
*engine
;
2502 struct preempt_client hi
, lo
;
2503 enum intel_engine_id id
;
2507 * Build a chain AB...BA between two contexts (A, B) and request
2508 * preemption of the last request. It should then complete before
2509 * the previously submitted spinner in B.
2512 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2515 if (preempt_client_init(gt
, &hi
))
2518 if (preempt_client_init(gt
, &lo
))
2521 for_each_engine(engine
, gt
, id
) {
2522 struct i915_sched_attr attr
= {
2523 .priority
= I915_USER_PRIORITY(I915_PRIORITY_MAX
),
2525 struct igt_live_test t
;
2526 struct i915_request
*rq
;
2527 int ring_size
, count
, i
;
2529 if (!intel_engine_has_preemption(engine
))
2532 rq
= spinner_create_request(&lo
.spin
,
2538 i915_request_get(rq
);
2539 i915_request_add(rq
);
2541 ring_size
= rq
->wa_tail
- rq
->head
;
2543 ring_size
+= rq
->ring
->size
;
2544 ring_size
= rq
->ring
->size
/ ring_size
;
2545 pr_debug("%s(%s): Using maximum of %d requests\n",
2546 __func__
, engine
->name
, ring_size
);
2548 igt_spinner_end(&lo
.spin
);
2549 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
2550 pr_err("Timed out waiting to flush %s\n", engine
->name
);
2551 i915_request_put(rq
);
2554 i915_request_put(rq
);
2556 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
2561 for_each_prime_number_from(count
, 1, ring_size
) {
2562 rq
= spinner_create_request(&hi
.spin
,
2567 i915_request_add(rq
);
2568 if (!igt_wait_for_spinner(&hi
.spin
, rq
))
2571 rq
= spinner_create_request(&lo
.spin
,
2576 i915_request_add(rq
);
2578 for (i
= 0; i
< count
; i
++) {
2579 rq
= igt_request_alloc(lo
.ctx
, engine
);
2582 i915_request_add(rq
);
2585 rq
= igt_request_alloc(hi
.ctx
, engine
);
2589 i915_request_get(rq
);
2590 i915_request_add(rq
);
2591 engine
->schedule(rq
, &attr
);
2593 igt_spinner_end(&hi
.spin
);
2594 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2595 struct drm_printer p
=
2596 drm_info_printer(gt
->i915
->drm
.dev
);
2598 pr_err("Failed to preempt over chain of %d\n",
2600 intel_engine_dump(engine
, &p
,
2601 "%s\n", engine
->name
);
2602 i915_request_put(rq
);
2605 igt_spinner_end(&lo
.spin
);
2606 i915_request_put(rq
);
2608 rq
= igt_request_alloc(lo
.ctx
, engine
);
2612 i915_request_get(rq
);
2613 i915_request_add(rq
);
2615 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2616 struct drm_printer p
=
2617 drm_info_printer(gt
->i915
->drm
.dev
);
2619 pr_err("Failed to flush low priority chain of %d requests\n",
2621 intel_engine_dump(engine
, &p
,
2622 "%s\n", engine
->name
);
2624 i915_request_put(rq
);
2627 i915_request_put(rq
);
2630 if (igt_live_test_end(&t
)) {
2638 preempt_client_fini(&lo
);
2640 preempt_client_fini(&hi
);
2644 igt_spinner_end(&hi
.spin
);
2645 igt_spinner_end(&lo
.spin
);
2646 intel_gt_set_wedged(gt
);
2651 static int create_gang(struct intel_engine_cs
*engine
,
2652 struct i915_request
**prev
)
2654 struct drm_i915_gem_object
*obj
;
2655 struct intel_context
*ce
;
2656 struct i915_request
*rq
;
2657 struct i915_vma
*vma
;
2661 ce
= intel_context_create(engine
);
2665 obj
= i915_gem_object_create_internal(engine
->i915
, 4096);
2671 vma
= i915_vma_instance(obj
, ce
->vm
, NULL
);
2677 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
2681 cs
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
2685 /* Semaphore target: spin until zero */
2686 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
2688 *cs
++ = MI_SEMAPHORE_WAIT
|
2690 MI_SEMAPHORE_SAD_EQ_SDD
;
2692 *cs
++ = lower_32_bits(vma
->node
.start
);
2693 *cs
++ = upper_32_bits(vma
->node
.start
);
2696 u64 offset
= (*prev
)->batch
->node
.start
;
2698 /* Terminate the spinner in the next lower priority batch. */
2699 *cs
++ = MI_STORE_DWORD_IMM_GEN4
;
2700 *cs
++ = lower_32_bits(offset
);
2701 *cs
++ = upper_32_bits(offset
);
2705 *cs
++ = MI_BATCH_BUFFER_END
;
2706 i915_gem_object_flush_map(obj
);
2707 i915_gem_object_unpin_map(obj
);
2709 rq
= intel_context_create_request(ce
);
2713 rq
->batch
= i915_vma_get(vma
);
2714 i915_request_get(rq
);
2717 err
= i915_request_await_object(rq
, vma
->obj
, false);
2719 err
= i915_vma_move_to_active(vma
, rq
, 0);
2721 err
= rq
->engine
->emit_bb_start(rq
,
2724 i915_vma_unlock(vma
);
2725 i915_request_add(rq
);
2729 i915_gem_object_put(obj
);
2730 intel_context_put(ce
);
2732 rq
->mock
.link
.next
= &(*prev
)->mock
.link
;
2737 i915_vma_put(rq
->batch
);
2738 i915_request_put(rq
);
2740 i915_gem_object_put(obj
);
2742 intel_context_put(ce
);
2746 static int __live_preempt_ring(struct intel_engine_cs
*engine
,
2747 struct igt_spinner
*spin
,
2748 int queue_sz
, int ring_sz
)
2750 struct intel_context
*ce
[2] = {};
2751 struct i915_request
*rq
;
2752 struct igt_live_test t
;
2756 if (igt_live_test_begin(&t
, engine
->i915
, __func__
, engine
->name
))
2759 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
2760 struct intel_context
*tmp
;
2762 tmp
= intel_context_create(engine
);
2768 tmp
->ring
= __intel_context_ring_size(ring_sz
);
2770 err
= intel_context_pin(tmp
);
2772 intel_context_put(tmp
);
2776 memset32(tmp
->ring
->vaddr
,
2777 0xdeadbeef, /* trigger a hang if executed */
2778 tmp
->ring
->vma
->size
/ sizeof(u32
));
2783 rq
= igt_spinner_create_request(spin
, ce
[0], MI_ARB_CHECK
);
2789 i915_request_get(rq
);
2790 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
2791 i915_request_add(rq
);
2793 if (!igt_wait_for_spinner(spin
, rq
)) {
2794 intel_gt_set_wedged(engine
->gt
);
2795 i915_request_put(rq
);
2800 /* Fill the ring, until we will cause a wrap */
2802 while (ce
[0]->ring
->tail
- rq
->wa_tail
<= queue_sz
) {
2803 struct i915_request
*tmp
;
2805 tmp
= intel_context_create_request(ce
[0]);
2808 i915_request_put(rq
);
2812 i915_request_add(tmp
);
2813 intel_engine_flush_submission(engine
);
2816 intel_engine_flush_submission(engine
);
2817 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2818 engine
->name
, queue_sz
, n
,
2823 i915_request_put(rq
);
2825 /* Create a second request to preempt the first ring */
2826 rq
= intel_context_create_request(ce
[1]);
2832 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
2833 i915_request_get(rq
);
2834 i915_request_add(rq
);
2836 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
2837 i915_request_put(rq
);
2839 pr_err("%s: preemption request was not submited\n",
2844 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2846 ce
[0]->ring
->tail
, ce
[0]->ring
->emit
,
2847 ce
[1]->ring
->tail
, ce
[1]->ring
->emit
);
2850 intel_engine_flush_submission(engine
);
2851 igt_spinner_end(spin
);
2852 for (n
= 0; n
< ARRAY_SIZE(ce
); n
++) {
2853 if (IS_ERR_OR_NULL(ce
[n
]))
2856 intel_context_unpin(ce
[n
]);
2857 intel_context_put(ce
[n
]);
2859 if (igt_live_test_end(&t
))
2864 static int live_preempt_ring(void *arg
)
2866 struct intel_gt
*gt
= arg
;
2867 struct intel_engine_cs
*engine
;
2868 struct igt_spinner spin
;
2869 enum intel_engine_id id
;
2873 * Check that we rollback large chunks of a ring in order to do a
2874 * preemption event. Similar to live_unlite_ring, but looking at
2875 * ring size rather than the impact of intel_ring_direction().
2878 if (igt_spinner_init(&spin
, gt
))
2881 for_each_engine(engine
, gt
, id
) {
2884 if (!intel_engine_has_preemption(engine
))
2887 if (!intel_engine_can_store_dword(engine
))
2890 st_engine_heartbeat_disable(engine
);
2892 for (n
= 0; n
<= 3; n
++) {
2893 err
= __live_preempt_ring(engine
, &spin
,
2894 n
* SZ_4K
/ 4, SZ_4K
);
2899 st_engine_heartbeat_enable(engine
);
2904 igt_spinner_fini(&spin
);
2908 static int live_preempt_gang(void *arg
)
2910 struct intel_gt
*gt
= arg
;
2911 struct intel_engine_cs
*engine
;
2912 enum intel_engine_id id
;
2914 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
2918 * Build as long a chain of preempters as we can, with each
2919 * request higher priority than the last. Once we are ready, we release
2920 * the last batch which then precolates down the chain, each releasing
2921 * the next oldest in turn. The intent is to simply push as hard as we
2922 * can with the number of preemptions, trying to exceed narrow HW
2923 * limits. At a minimum, we insist that we can sort all the user
2924 * high priority levels into execution order.
2927 for_each_engine(engine
, gt
, id
) {
2928 struct i915_request
*rq
= NULL
;
2929 struct igt_live_test t
;
2930 IGT_TIMEOUT(end_time
);
2935 if (!intel_engine_has_preemption(engine
))
2938 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
))
2942 struct i915_sched_attr attr
= {
2943 .priority
= I915_USER_PRIORITY(prio
++),
2946 err
= create_gang(engine
, &rq
);
2950 /* Submit each spinner at increasing priority */
2951 engine
->schedule(rq
, &attr
);
2952 } while (prio
<= I915_PRIORITY_MAX
&&
2953 !__igt_timeout(end_time
, NULL
));
2954 pr_debug("%s: Preempt chain of %d requests\n",
2955 engine
->name
, prio
);
2958 * Such that the last spinner is the highest priority and
2959 * should execute first. When that spinner completes,
2960 * it will terminate the next lowest spinner until there
2961 * are no more spinners and the gang is complete.
2963 cs
= i915_gem_object_pin_map(rq
->batch
->obj
, I915_MAP_WC
);
2966 i915_gem_object_unpin_map(rq
->batch
->obj
);
2969 intel_gt_set_wedged(gt
);
2972 while (rq
) { /* wait for each rq from highest to lowest prio */
2973 struct i915_request
*n
= list_next_entry(rq
, mock
.link
);
2975 if (err
== 0 && i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
2976 struct drm_printer p
=
2977 drm_info_printer(engine
->i915
->drm
.dev
);
2979 pr_err("Failed to flush chain of %d requests, at %d\n",
2980 prio
, rq_prio(rq
) >> I915_USER_PRIORITY_SHIFT
);
2981 intel_engine_dump(engine
, &p
,
2982 "%s\n", engine
->name
);
2987 i915_vma_put(rq
->batch
);
2988 i915_request_put(rq
);
2992 if (igt_live_test_end(&t
))
3001 static struct i915_vma
*
3002 create_gpr_user(struct intel_engine_cs
*engine
,
3003 struct i915_vma
*result
,
3004 unsigned int offset
)
3006 struct drm_i915_gem_object
*obj
;
3007 struct i915_vma
*vma
;
3012 obj
= i915_gem_object_create_internal(engine
->i915
, 4096);
3014 return ERR_CAST(obj
);
3016 vma
= i915_vma_instance(obj
, result
->vm
, NULL
);
3018 i915_gem_object_put(obj
);
3022 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
3025 return ERR_PTR(err
);
3028 cs
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
3031 return ERR_CAST(cs
);
3034 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3035 *cs
++ = MI_LOAD_REGISTER_IMM(1);
3036 *cs
++ = CS_GPR(engine
, 0);
3039 for (i
= 1; i
< NUM_GPR
; i
++) {
3045 * As we read and write into the context saved GPR[i], if
3046 * we restart this batch buffer from an earlier point, we
3047 * will repeat the increment and store a value > 1.
3050 *cs
++ = MI_MATH_LOAD(MI_MATH_REG_SRCA
, MI_MATH_REG(i
));
3051 *cs
++ = MI_MATH_LOAD(MI_MATH_REG_SRCB
, MI_MATH_REG(0));
3052 *cs
++ = MI_MATH_ADD
;
3053 *cs
++ = MI_MATH_STORE(MI_MATH_REG(i
), MI_MATH_REG_ACCU
);
3055 addr
= result
->node
.start
+ offset
+ i
* sizeof(*cs
);
3056 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
;
3057 *cs
++ = CS_GPR(engine
, 2 * i
);
3058 *cs
++ = lower_32_bits(addr
);
3059 *cs
++ = upper_32_bits(addr
);
3061 *cs
++ = MI_SEMAPHORE_WAIT
|
3063 MI_SEMAPHORE_SAD_GTE_SDD
;
3065 *cs
++ = lower_32_bits(result
->node
.start
);
3066 *cs
++ = upper_32_bits(result
->node
.start
);
3069 *cs
++ = MI_BATCH_BUFFER_END
;
3070 i915_gem_object_flush_map(obj
);
3071 i915_gem_object_unpin_map(obj
);
3076 static struct i915_vma
*create_global(struct intel_gt
*gt
, size_t sz
)
3078 struct drm_i915_gem_object
*obj
;
3079 struct i915_vma
*vma
;
3082 obj
= i915_gem_object_create_internal(gt
->i915
, sz
);
3084 return ERR_CAST(obj
);
3086 vma
= i915_vma_instance(obj
, >
->ggtt
->vm
, NULL
);
3088 i915_gem_object_put(obj
);
3092 err
= i915_ggtt_pin(vma
, NULL
, 0, 0);
3095 return ERR_PTR(err
);
3101 static struct i915_request
*
3102 create_gpr_client(struct intel_engine_cs
*engine
,
3103 struct i915_vma
*global
,
3104 unsigned int offset
)
3106 struct i915_vma
*batch
, *vma
;
3107 struct intel_context
*ce
;
3108 struct i915_request
*rq
;
3111 ce
= intel_context_create(engine
);
3113 return ERR_CAST(ce
);
3115 vma
= i915_vma_instance(global
->obj
, ce
->vm
, NULL
);
3121 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
3125 batch
= create_gpr_user(engine
, vma
, offset
);
3126 if (IS_ERR(batch
)) {
3127 err
= PTR_ERR(batch
);
3131 rq
= intel_context_create_request(ce
);
3138 err
= i915_request_await_object(rq
, vma
->obj
, false);
3140 err
= i915_vma_move_to_active(vma
, rq
, 0);
3141 i915_vma_unlock(vma
);
3143 i915_vma_lock(batch
);
3145 err
= i915_request_await_object(rq
, batch
->obj
, false);
3147 err
= i915_vma_move_to_active(batch
, rq
, 0);
3149 err
= rq
->engine
->emit_bb_start(rq
,
3152 i915_vma_unlock(batch
);
3153 i915_vma_unpin(batch
);
3156 i915_request_get(rq
);
3157 i915_request_add(rq
);
3160 i915_vma_put(batch
);
3162 i915_vma_unpin(vma
);
3164 intel_context_put(ce
);
3165 return err
? ERR_PTR(err
) : rq
;
3168 static int preempt_user(struct intel_engine_cs
*engine
,
3169 struct i915_vma
*global
,
3172 struct i915_sched_attr attr
= {
3173 .priority
= I915_PRIORITY_MAX
3175 struct i915_request
*rq
;
3179 rq
= intel_engine_create_kernel_request(engine
);
3183 cs
= intel_ring_begin(rq
, 4);
3185 i915_request_add(rq
);
3189 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
3190 *cs
++ = i915_ggtt_offset(global
);
3194 intel_ring_advance(rq
, cs
);
3196 i915_request_get(rq
);
3197 i915_request_add(rq
);
3199 engine
->schedule(rq
, &attr
);
3201 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0)
3203 i915_request_put(rq
);
3208 static int live_preempt_user(void *arg
)
3210 struct intel_gt
*gt
= arg
;
3211 struct intel_engine_cs
*engine
;
3212 struct i915_vma
*global
;
3213 enum intel_engine_id id
;
3217 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
3221 * In our other tests, we look at preemption in carefully
3222 * controlled conditions in the ringbuffer. Since most of the
3223 * time is spent in user batches, most of our preemptions naturally
3224 * occur there. We want to verify that when we preempt inside a batch
3225 * we continue on from the current instruction and do not roll back
3226 * to the start, or another earlier arbitration point.
3228 * To verify this, we create a batch which is a mixture of
3229 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3230 * a few preempting contexts thrown into the mix, we look for any
3231 * repeated instructions (which show up as incorrect values).
3234 global
= create_global(gt
, 4096);
3236 return PTR_ERR(global
);
3238 result
= i915_gem_object_pin_map(global
->obj
, I915_MAP_WC
);
3239 if (IS_ERR(result
)) {
3240 i915_vma_unpin_and_release(&global
, 0);
3241 return PTR_ERR(result
);
3244 for_each_engine(engine
, gt
, id
) {
3245 struct i915_request
*client
[3] = {};
3246 struct igt_live_test t
;
3249 if (!intel_engine_has_preemption(engine
))
3252 if (IS_GEN(gt
->i915
, 8) && engine
->class != RENDER_CLASS
)
3253 continue; /* we need per-context GPR */
3255 if (igt_live_test_begin(&t
, gt
->i915
, __func__
, engine
->name
)) {
3260 memset(result
, 0, 4096);
3262 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
3263 struct i915_request
*rq
;
3265 rq
= create_gpr_client(engine
, global
,
3266 NUM_GPR
* i
* sizeof(u32
));
3273 /* Continuously preempt the set of 3 running contexts */
3274 for (i
= 1; i
<= NUM_GPR
; i
++) {
3275 err
= preempt_user(engine
, global
, i
);
3280 if (READ_ONCE(result
[0]) != NUM_GPR
) {
3281 pr_err("%s: Failed to release semaphore\n",
3287 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
3290 if (i915_request_wait(client
[i
], 0, HZ
/ 2) < 0) {
3295 for (gpr
= 1; gpr
< NUM_GPR
; gpr
++) {
3296 if (result
[NUM_GPR
* i
+ gpr
] != 1) {
3297 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3299 i
, gpr
, result
[NUM_GPR
* i
+ gpr
]);
3307 for (i
= 0; i
< ARRAY_SIZE(client
); i
++) {
3311 i915_request_put(client
[i
]);
3314 /* Flush the semaphores on error */
3315 smp_store_mb(result
[0], -1);
3316 if (igt_live_test_end(&t
))
3322 i915_vma_unpin_and_release(&global
, I915_VMA_RELEASE_MAP
);
3326 static int live_preempt_timeout(void *arg
)
3328 struct intel_gt
*gt
= arg
;
3329 struct i915_gem_context
*ctx_hi
, *ctx_lo
;
3330 struct igt_spinner spin_lo
;
3331 struct intel_engine_cs
*engine
;
3332 enum intel_engine_id id
;
3336 * Check that we force preemption to occur by cancelling the previous
3337 * context if it refuses to yield the GPU.
3339 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT
))
3342 if (!HAS_LOGICAL_RING_PREEMPTION(gt
->i915
))
3345 if (!intel_has_reset_engine(gt
))
3348 if (igt_spinner_init(&spin_lo
, gt
))
3351 ctx_hi
= kernel_context(gt
->i915
);
3354 ctx_hi
->sched
.priority
=
3355 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY
);
3357 ctx_lo
= kernel_context(gt
->i915
);
3360 ctx_lo
->sched
.priority
=
3361 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY
);
3363 for_each_engine(engine
, gt
, id
) {
3364 unsigned long saved_timeout
;
3365 struct i915_request
*rq
;
3367 if (!intel_engine_has_preemption(engine
))
3370 rq
= spinner_create_request(&spin_lo
, ctx_lo
, engine
,
3371 MI_NOOP
); /* preemption disabled */
3377 i915_request_add(rq
);
3378 if (!igt_wait_for_spinner(&spin_lo
, rq
)) {
3379 intel_gt_set_wedged(gt
);
3384 rq
= igt_request_alloc(ctx_hi
, engine
);
3386 igt_spinner_end(&spin_lo
);
3391 /* Flush the previous CS ack before changing timeouts */
3392 while (READ_ONCE(engine
->execlists
.pending
[0]))
3395 saved_timeout
= engine
->props
.preempt_timeout_ms
;
3396 engine
->props
.preempt_timeout_ms
= 1; /* in ms, -> 1 jiffie */
3398 i915_request_get(rq
);
3399 i915_request_add(rq
);
3401 intel_engine_flush_submission(engine
);
3402 engine
->props
.preempt_timeout_ms
= saved_timeout
;
3404 if (i915_request_wait(rq
, 0, HZ
/ 10) < 0) {
3405 intel_gt_set_wedged(gt
);
3406 i915_request_put(rq
);
3411 igt_spinner_end(&spin_lo
);
3412 i915_request_put(rq
);
3417 kernel_context_close(ctx_lo
);
3419 kernel_context_close(ctx_hi
);
3421 igt_spinner_fini(&spin_lo
);
3425 static int random_range(struct rnd_state
*rnd
, int min
, int max
)
3427 return i915_prandom_u32_max_state(max
- min
, rnd
) + min
;
3430 static int random_priority(struct rnd_state
*rnd
)
3432 return random_range(rnd
, I915_PRIORITY_MIN
, I915_PRIORITY_MAX
);
3435 struct preempt_smoke
{
3436 struct intel_gt
*gt
;
3437 struct i915_gem_context
**contexts
;
3438 struct intel_engine_cs
*engine
;
3439 struct drm_i915_gem_object
*batch
;
3440 unsigned int ncontext
;
3441 struct rnd_state prng
;
3442 unsigned long count
;
3445 static struct i915_gem_context
*smoke_context(struct preempt_smoke
*smoke
)
3447 return smoke
->contexts
[i915_prandom_u32_max_state(smoke
->ncontext
,
3451 static int smoke_submit(struct preempt_smoke
*smoke
,
3452 struct i915_gem_context
*ctx
, int prio
,
3453 struct drm_i915_gem_object
*batch
)
3455 struct i915_request
*rq
;
3456 struct i915_vma
*vma
= NULL
;
3460 struct i915_address_space
*vm
;
3462 vm
= i915_gem_context_get_vm_rcu(ctx
);
3463 vma
= i915_vma_instance(batch
, vm
, NULL
);
3466 return PTR_ERR(vma
);
3468 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
3473 ctx
->sched
.priority
= prio
;
3475 rq
= igt_request_alloc(ctx
, smoke
->engine
);
3483 err
= i915_request_await_object(rq
, vma
->obj
, false);
3485 err
= i915_vma_move_to_active(vma
, rq
, 0);
3487 err
= rq
->engine
->emit_bb_start(rq
,
3490 i915_vma_unlock(vma
);
3493 i915_request_add(rq
);
3497 i915_vma_unpin(vma
);
3502 static int smoke_crescendo_thread(void *arg
)
3504 struct preempt_smoke
*smoke
= arg
;
3505 IGT_TIMEOUT(end_time
);
3506 unsigned long count
;
3510 struct i915_gem_context
*ctx
= smoke_context(smoke
);
3513 err
= smoke_submit(smoke
,
3514 ctx
, count
% I915_PRIORITY_MAX
,
3520 } while (count
< smoke
->ncontext
&& !__igt_timeout(end_time
, NULL
));
3522 smoke
->count
= count
;
3526 static int smoke_crescendo(struct preempt_smoke
*smoke
, unsigned int flags
)
3527 #define BATCH BIT(0)
3529 struct task_struct
*tsk
[I915_NUM_ENGINES
] = {};
3530 struct preempt_smoke arg
[I915_NUM_ENGINES
];
3531 struct intel_engine_cs
*engine
;
3532 enum intel_engine_id id
;
3533 unsigned long count
;
3536 for_each_engine(engine
, smoke
->gt
, id
) {
3538 arg
[id
].engine
= engine
;
3539 if (!(flags
& BATCH
))
3540 arg
[id
].batch
= NULL
;
3543 tsk
[id
] = kthread_run(smoke_crescendo_thread
, &arg
,
3544 "igt/smoke:%d", id
);
3545 if (IS_ERR(tsk
[id
])) {
3546 err
= PTR_ERR(tsk
[id
]);
3549 get_task_struct(tsk
[id
]);
3552 yield(); /* start all threads before we kthread_stop() */
3555 for_each_engine(engine
, smoke
->gt
, id
) {
3558 if (IS_ERR_OR_NULL(tsk
[id
]))
3561 status
= kthread_stop(tsk
[id
]);
3565 count
+= arg
[id
].count
;
3567 put_task_struct(tsk
[id
]);
3570 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3571 count
, flags
, smoke
->gt
->info
.num_engines
, smoke
->ncontext
);
3575 static int smoke_random(struct preempt_smoke
*smoke
, unsigned int flags
)
3577 enum intel_engine_id id
;
3578 IGT_TIMEOUT(end_time
);
3579 unsigned long count
;
3583 for_each_engine(smoke
->engine
, smoke
->gt
, id
) {
3584 struct i915_gem_context
*ctx
= smoke_context(smoke
);
3587 err
= smoke_submit(smoke
,
3588 ctx
, random_priority(&smoke
->prng
),
3589 flags
& BATCH
? smoke
->batch
: NULL
);
3595 } while (count
< smoke
->ncontext
&& !__igt_timeout(end_time
, NULL
));
3597 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3598 count
, flags
, smoke
->gt
->info
.num_engines
, smoke
->ncontext
);
3602 static int live_preempt_smoke(void *arg
)
3604 struct preempt_smoke smoke
= {
3606 .prng
= I915_RND_STATE_INITIALIZER(i915_selftest
.random_seed
),
3609 const unsigned int phase
[] = { 0, BATCH
};
3610 struct igt_live_test t
;
3615 if (!HAS_LOGICAL_RING_PREEMPTION(smoke
.gt
->i915
))
3618 smoke
.contexts
= kmalloc_array(smoke
.ncontext
,
3619 sizeof(*smoke
.contexts
),
3621 if (!smoke
.contexts
)
3625 i915_gem_object_create_internal(smoke
.gt
->i915
, PAGE_SIZE
);
3626 if (IS_ERR(smoke
.batch
)) {
3627 err
= PTR_ERR(smoke
.batch
);
3631 cs
= i915_gem_object_pin_map(smoke
.batch
, I915_MAP_WB
);
3636 for (n
= 0; n
< PAGE_SIZE
/ sizeof(*cs
) - 1; n
++)
3637 cs
[n
] = MI_ARB_CHECK
;
3638 cs
[n
] = MI_BATCH_BUFFER_END
;
3639 i915_gem_object_flush_map(smoke
.batch
);
3640 i915_gem_object_unpin_map(smoke
.batch
);
3642 if (igt_live_test_begin(&t
, smoke
.gt
->i915
, __func__
, "all")) {
3647 for (n
= 0; n
< smoke
.ncontext
; n
++) {
3648 smoke
.contexts
[n
] = kernel_context(smoke
.gt
->i915
);
3649 if (!smoke
.contexts
[n
])
3653 for (n
= 0; n
< ARRAY_SIZE(phase
); n
++) {
3654 err
= smoke_crescendo(&smoke
, phase
[n
]);
3658 err
= smoke_random(&smoke
, phase
[n
]);
3664 if (igt_live_test_end(&t
))
3667 for (n
= 0; n
< smoke
.ncontext
; n
++) {
3668 if (!smoke
.contexts
[n
])
3670 kernel_context_close(smoke
.contexts
[n
]);
3674 i915_gem_object_put(smoke
.batch
);
3676 kfree(smoke
.contexts
);
3681 static int nop_virtual_engine(struct intel_gt
*gt
,
3682 struct intel_engine_cs
**siblings
,
3683 unsigned int nsibling
,
3686 #define CHAIN BIT(0)
3688 IGT_TIMEOUT(end_time
);
3689 struct i915_request
*request
[16] = {};
3690 struct intel_context
*ve
[16];
3691 unsigned long n
, prime
, nc
;
3692 struct igt_live_test t
;
3693 ktime_t times
[2] = {};
3696 GEM_BUG_ON(!nctx
|| nctx
> ARRAY_SIZE(ve
));
3698 for (n
= 0; n
< nctx
; n
++) {
3699 ve
[n
] = intel_execlists_create_virtual(siblings
, nsibling
);
3700 if (IS_ERR(ve
[n
])) {
3701 err
= PTR_ERR(ve
[n
]);
3706 err
= intel_context_pin(ve
[n
]);
3708 intel_context_put(ve
[n
]);
3714 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
[0]->engine
->name
);
3718 for_each_prime_number_from(prime
, 1, 8192) {
3719 times
[1] = ktime_get_raw();
3721 if (flags
& CHAIN
) {
3722 for (nc
= 0; nc
< nctx
; nc
++) {
3723 for (n
= 0; n
< prime
; n
++) {
3724 struct i915_request
*rq
;
3726 rq
= i915_request_create(ve
[nc
]);
3733 i915_request_put(request
[nc
]);
3734 request
[nc
] = i915_request_get(rq
);
3735 i915_request_add(rq
);
3739 for (n
= 0; n
< prime
; n
++) {
3740 for (nc
= 0; nc
< nctx
; nc
++) {
3741 struct i915_request
*rq
;
3743 rq
= i915_request_create(ve
[nc
]);
3750 i915_request_put(request
[nc
]);
3751 request
[nc
] = i915_request_get(rq
);
3752 i915_request_add(rq
);
3757 for (nc
= 0; nc
< nctx
; nc
++) {
3758 if (i915_request_wait(request
[nc
], 0, HZ
/ 10) < 0) {
3759 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3760 __func__
, ve
[0]->engine
->name
,
3761 request
[nc
]->fence
.context
,
3762 request
[nc
]->fence
.seqno
);
3764 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3765 __func__
, ve
[0]->engine
->name
,
3766 request
[nc
]->fence
.context
,
3767 request
[nc
]->fence
.seqno
);
3769 intel_gt_set_wedged(gt
);
3774 times
[1] = ktime_sub(ktime_get_raw(), times
[1]);
3776 times
[0] = times
[1];
3778 for (nc
= 0; nc
< nctx
; nc
++) {
3779 i915_request_put(request
[nc
]);
3783 if (__igt_timeout(end_time
, NULL
))
3787 err
= igt_live_test_end(&t
);
3791 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3792 nctx
, ve
[0]->engine
->name
, ktime_to_ns(times
[0]),
3793 prime
, div64_u64(ktime_to_ns(times
[1]), prime
));
3796 if (igt_flush_test(gt
->i915
))
3799 for (nc
= 0; nc
< nctx
; nc
++) {
3800 i915_request_put(request
[nc
]);
3801 intel_context_unpin(ve
[nc
]);
3802 intel_context_put(ve
[nc
]);
3808 __select_siblings(struct intel_gt
*gt
,
3810 struct intel_engine_cs
**siblings
,
3811 bool (*filter
)(const struct intel_engine_cs
*))
3816 for (inst
= 0; inst
<= MAX_ENGINE_INSTANCE
; inst
++) {
3817 if (!gt
->engine_class
[class][inst
])
3820 if (filter
&& !filter(gt
->engine_class
[class][inst
]))
3823 siblings
[n
++] = gt
->engine_class
[class][inst
];
3830 select_siblings(struct intel_gt
*gt
,
3832 struct intel_engine_cs
**siblings
)
3834 return __select_siblings(gt
, class, siblings
, NULL
);
3837 static int live_virtual_engine(void *arg
)
3839 struct intel_gt
*gt
= arg
;
3840 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3841 struct intel_engine_cs
*engine
;
3842 enum intel_engine_id id
;
3846 if (intel_uc_uses_guc_submission(>
->uc
))
3849 for_each_engine(engine
, gt
, id
) {
3850 err
= nop_virtual_engine(gt
, &engine
, 1, 1, 0);
3852 pr_err("Failed to wrap engine %s: err=%d\n",
3858 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3861 nsibling
= select_siblings(gt
, class, siblings
);
3865 for (n
= 1; n
<= nsibling
+ 1; n
++) {
3866 err
= nop_virtual_engine(gt
, siblings
, nsibling
,
3872 err
= nop_virtual_engine(gt
, siblings
, nsibling
, n
, CHAIN
);
3880 static int mask_virtual_engine(struct intel_gt
*gt
,
3881 struct intel_engine_cs
**siblings
,
3882 unsigned int nsibling
)
3884 struct i915_request
*request
[MAX_ENGINE_INSTANCE
+ 1];
3885 struct intel_context
*ve
;
3886 struct igt_live_test t
;
3891 * Check that by setting the execution mask on a request, we can
3892 * restrict it to our desired engine within the virtual engine.
3895 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
3901 err
= intel_context_pin(ve
);
3905 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
3909 for (n
= 0; n
< nsibling
; n
++) {
3910 request
[n
] = i915_request_create(ve
);
3911 if (IS_ERR(request
[n
])) {
3912 err
= PTR_ERR(request
[n
]);
3917 /* Reverse order as it's more likely to be unnatural */
3918 request
[n
]->execution_mask
= siblings
[nsibling
- n
- 1]->mask
;
3920 i915_request_get(request
[n
]);
3921 i915_request_add(request
[n
]);
3924 for (n
= 0; n
< nsibling
; n
++) {
3925 if (i915_request_wait(request
[n
], 0, HZ
/ 10) < 0) {
3926 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3927 __func__
, ve
->engine
->name
,
3928 request
[n
]->fence
.context
,
3929 request
[n
]->fence
.seqno
);
3931 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3932 __func__
, ve
->engine
->name
,
3933 request
[n
]->fence
.context
,
3934 request
[n
]->fence
.seqno
);
3936 intel_gt_set_wedged(gt
);
3941 if (request
[n
]->engine
!= siblings
[nsibling
- n
- 1]) {
3942 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3943 request
[n
]->engine
->name
,
3944 siblings
[nsibling
- n
- 1]->name
);
3950 err
= igt_live_test_end(&t
);
3952 if (igt_flush_test(gt
->i915
))
3955 for (n
= 0; n
< nsibling
; n
++)
3956 i915_request_put(request
[n
]);
3959 intel_context_unpin(ve
);
3961 intel_context_put(ve
);
3966 static int live_virtual_mask(void *arg
)
3968 struct intel_gt
*gt
= arg
;
3969 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
3973 if (intel_uc_uses_guc_submission(>
->uc
))
3976 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
3977 unsigned int nsibling
;
3979 nsibling
= select_siblings(gt
, class, siblings
);
3983 err
= mask_virtual_engine(gt
, siblings
, nsibling
);
3991 static int slicein_virtual_engine(struct intel_gt
*gt
,
3992 struct intel_engine_cs
**siblings
,
3993 unsigned int nsibling
)
3995 const long timeout
= slice_timeout(siblings
[0]);
3996 struct intel_context
*ce
;
3997 struct i915_request
*rq
;
3998 struct igt_spinner spin
;
4003 * Virtual requests must take part in timeslicing on the target engines.
4006 if (igt_spinner_init(&spin
, gt
))
4009 for (n
= 0; n
< nsibling
; n
++) {
4010 ce
= intel_context_create(siblings
[n
]);
4016 rq
= igt_spinner_create_request(&spin
, ce
, MI_ARB_CHECK
);
4017 intel_context_put(ce
);
4023 i915_request_add(rq
);
4026 ce
= intel_execlists_create_virtual(siblings
, nsibling
);
4032 rq
= intel_context_create_request(ce
);
4033 intel_context_put(ce
);
4039 i915_request_get(rq
);
4040 i915_request_add(rq
);
4041 if (i915_request_wait(rq
, 0, timeout
) < 0) {
4042 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4043 __func__
, rq
->engine
->name
);
4045 intel_gt_set_wedged(gt
);
4048 i915_request_put(rq
);
4051 igt_spinner_end(&spin
);
4052 if (igt_flush_test(gt
->i915
))
4054 igt_spinner_fini(&spin
);
4058 static int sliceout_virtual_engine(struct intel_gt
*gt
,
4059 struct intel_engine_cs
**siblings
,
4060 unsigned int nsibling
)
4062 const long timeout
= slice_timeout(siblings
[0]);
4063 struct intel_context
*ce
;
4064 struct i915_request
*rq
;
4065 struct igt_spinner spin
;
4070 * Virtual requests must allow others a fair timeslice.
4073 if (igt_spinner_init(&spin
, gt
))
4076 /* XXX We do not handle oversubscription and fairness with normal rq */
4077 for (n
= 0; n
< nsibling
; n
++) {
4078 ce
= intel_execlists_create_virtual(siblings
, nsibling
);
4084 rq
= igt_spinner_create_request(&spin
, ce
, MI_ARB_CHECK
);
4085 intel_context_put(ce
);
4091 i915_request_add(rq
);
4094 for (n
= 0; !err
&& n
< nsibling
; n
++) {
4095 ce
= intel_context_create(siblings
[n
]);
4101 rq
= intel_context_create_request(ce
);
4102 intel_context_put(ce
);
4108 i915_request_get(rq
);
4109 i915_request_add(rq
);
4110 if (i915_request_wait(rq
, 0, timeout
) < 0) {
4111 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4112 __func__
, siblings
[n
]->name
);
4114 intel_gt_set_wedged(gt
);
4117 i915_request_put(rq
);
4121 igt_spinner_end(&spin
);
4122 if (igt_flush_test(gt
->i915
))
4124 igt_spinner_fini(&spin
);
4128 static int live_virtual_slice(void *arg
)
4130 struct intel_gt
*gt
= arg
;
4131 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
4135 if (intel_uc_uses_guc_submission(>
->uc
))
4138 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
4139 unsigned int nsibling
;
4141 nsibling
= __select_siblings(gt
, class, siblings
,
4142 intel_engine_has_timeslices
);
4146 err
= slicein_virtual_engine(gt
, siblings
, nsibling
);
4150 err
= sliceout_virtual_engine(gt
, siblings
, nsibling
);
4158 static int preserved_virtual_engine(struct intel_gt
*gt
,
4159 struct intel_engine_cs
**siblings
,
4160 unsigned int nsibling
)
4162 struct i915_request
*last
= NULL
;
4163 struct intel_context
*ve
;
4164 struct i915_vma
*scratch
;
4165 struct igt_live_test t
;
4170 scratch
= create_scratch(siblings
[0]->gt
);
4171 if (IS_ERR(scratch
))
4172 return PTR_ERR(scratch
);
4174 err
= i915_vma_sync(scratch
);
4178 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
4184 err
= intel_context_pin(ve
);
4188 err
= igt_live_test_begin(&t
, gt
->i915
, __func__
, ve
->engine
->name
);
4192 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4193 struct intel_engine_cs
*engine
= siblings
[n
% nsibling
];
4194 struct i915_request
*rq
;
4196 rq
= i915_request_create(ve
);
4202 i915_request_put(last
);
4203 last
= i915_request_get(rq
);
4205 cs
= intel_ring_begin(rq
, 8);
4207 i915_request_add(rq
);
4212 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
4213 *cs
++ = CS_GPR(engine
, n
);
4214 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
4217 *cs
++ = MI_LOAD_REGISTER_IMM(1);
4218 *cs
++ = CS_GPR(engine
, (n
+ 1) % NUM_GPR_DW
);
4222 intel_ring_advance(rq
, cs
);
4224 /* Restrict this request to run on a particular engine */
4225 rq
->execution_mask
= engine
->mask
;
4226 i915_request_add(rq
);
4229 if (i915_request_wait(last
, 0, HZ
/ 5) < 0) {
4234 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
4240 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
4242 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4249 i915_gem_object_unpin_map(scratch
->obj
);
4252 if (igt_live_test_end(&t
))
4254 i915_request_put(last
);
4256 intel_context_unpin(ve
);
4258 intel_context_put(ve
);
4260 i915_vma_unpin_and_release(&scratch
, 0);
4264 static int live_virtual_preserved(void *arg
)
4266 struct intel_gt
*gt
= arg
;
4267 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
4271 * Check that the context image retains non-privileged (user) registers
4272 * from one engine to the next. For this we check that the CS_GPR
4276 if (intel_uc_uses_guc_submission(>
->uc
))
4279 /* As we use CS_GPR we cannot run before they existed on all engines. */
4280 if (INTEL_GEN(gt
->i915
) < 9)
4283 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
4286 nsibling
= select_siblings(gt
, class, siblings
);
4290 err
= preserved_virtual_engine(gt
, siblings
, nsibling
);
4298 static int bond_virtual_engine(struct intel_gt
*gt
,
4300 struct intel_engine_cs
**siblings
,
4301 unsigned int nsibling
,
4303 #define BOND_SCHEDULE BIT(0)
4305 struct intel_engine_cs
*master
;
4306 struct i915_request
*rq
[16];
4307 enum intel_engine_id id
;
4308 struct igt_spinner spin
;
4313 * A set of bonded requests is intended to be run concurrently
4314 * across a number of engines. We use one request per-engine
4315 * and a magic fence to schedule each of the bonded requests
4316 * at the same time. A consequence of our current scheduler is that
4317 * we only move requests to the HW ready queue when the request
4318 * becomes ready, that is when all of its prerequisite fences have
4319 * been signaled. As one of those fences is the master submit fence,
4320 * there is a delay on all secondary fences as the HW may be
4321 * currently busy. Equally, as all the requests are independent,
4322 * they may have other fences that delay individual request
4323 * submission to HW. Ergo, we do not guarantee that all requests are
4324 * immediately submitted to HW at the same time, just that if the
4325 * rules are abided by, they are ready at the same time as the
4326 * first is submitted. Userspace can embed semaphores in its batch
4327 * to ensure parallel execution of its phases as it requires.
4328 * Though naturally it gets requested that perhaps the scheduler should
4329 * take care of parallel execution, even across preemption events on
4330 * different HW. (The proper answer is of course "lalalala".)
4332 * With the submit-fence, we have identified three possible phases
4333 * of synchronisation depending on the master fence: queued (not
4334 * ready), executing, and signaled. The first two are quite simple
4335 * and checked below. However, the signaled master fence handling is
4336 * contentious. Currently we do not distinguish between a signaled
4337 * fence and an expired fence, as once signaled it does not convey
4338 * any information about the previous execution. It may even be freed
4339 * and hence checking later it may not exist at all. Ergo we currently
4340 * do not apply the bonding constraint for an already signaled fence,
4341 * as our expectation is that it should not constrain the secondaries
4342 * and is outside of the scope of the bonded request API (i.e. all
4343 * userspace requests are meant to be running in parallel). As
4344 * it imposes no constraint, and is effectively a no-op, we do not
4345 * check below as normal execution flows are checked extensively above.
4347 * XXX Is the degenerate handling of signaled submit fences the
4348 * expected behaviour for userpace?
4351 GEM_BUG_ON(nsibling
>= ARRAY_SIZE(rq
) - 1);
4353 if (igt_spinner_init(&spin
, gt
))
4357 rq
[0] = ERR_PTR(-ENOMEM
);
4358 for_each_engine(master
, gt
, id
) {
4359 struct i915_sw_fence fence
= {};
4360 struct intel_context
*ce
;
4362 if (master
->class == class)
4365 ce
= intel_context_create(master
);
4371 memset_p((void *)rq
, ERR_PTR(-EINVAL
), ARRAY_SIZE(rq
));
4373 rq
[0] = igt_spinner_create_request(&spin
, ce
, MI_NOOP
);
4374 intel_context_put(ce
);
4375 if (IS_ERR(rq
[0])) {
4376 err
= PTR_ERR(rq
[0]);
4379 i915_request_get(rq
[0]);
4381 if (flags
& BOND_SCHEDULE
) {
4382 onstack_fence_init(&fence
);
4383 err
= i915_sw_fence_await_sw_fence_gfp(&rq
[0]->submit
,
4388 i915_request_add(rq
[0]);
4392 if (!(flags
& BOND_SCHEDULE
) &&
4393 !igt_wait_for_spinner(&spin
, rq
[0])) {
4398 for (n
= 0; n
< nsibling
; n
++) {
4399 struct intel_context
*ve
;
4401 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
4404 onstack_fence_fini(&fence
);
4408 err
= intel_virtual_engine_attach_bond(ve
->engine
,
4412 intel_context_put(ve
);
4413 onstack_fence_fini(&fence
);
4417 err
= intel_context_pin(ve
);
4418 intel_context_put(ve
);
4420 onstack_fence_fini(&fence
);
4424 rq
[n
+ 1] = i915_request_create(ve
);
4425 intel_context_unpin(ve
);
4426 if (IS_ERR(rq
[n
+ 1])) {
4427 err
= PTR_ERR(rq
[n
+ 1]);
4428 onstack_fence_fini(&fence
);
4431 i915_request_get(rq
[n
+ 1]);
4433 err
= i915_request_await_execution(rq
[n
+ 1],
4435 ve
->engine
->bond_execute
);
4436 i915_request_add(rq
[n
+ 1]);
4438 onstack_fence_fini(&fence
);
4442 onstack_fence_fini(&fence
);
4443 intel_engine_flush_submission(master
);
4444 igt_spinner_end(&spin
);
4446 if (i915_request_wait(rq
[0], 0, HZ
/ 10) < 0) {
4447 pr_err("Master request did not execute (on %s)!\n",
4448 rq
[0]->engine
->name
);
4453 for (n
= 0; n
< nsibling
; n
++) {
4454 if (i915_request_wait(rq
[n
+ 1], 0,
4455 MAX_SCHEDULE_TIMEOUT
) < 0) {
4460 if (rq
[n
+ 1]->engine
!= siblings
[n
]) {
4461 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4463 rq
[n
+ 1]->engine
->name
,
4464 rq
[0]->engine
->name
);
4470 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
4471 i915_request_put(rq
[n
]);
4472 rq
[0] = ERR_PTR(-ENOMEM
);
4476 for (n
= 0; !IS_ERR(rq
[n
]); n
++)
4477 i915_request_put(rq
[n
]);
4478 if (igt_flush_test(gt
->i915
))
4481 igt_spinner_fini(&spin
);
4485 static int live_virtual_bond(void *arg
)
4487 static const struct phase
{
4492 { "schedule", BOND_SCHEDULE
},
4495 struct intel_gt
*gt
= arg
;
4496 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
4500 if (intel_uc_uses_guc_submission(>
->uc
))
4503 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
4504 const struct phase
*p
;
4507 nsibling
= select_siblings(gt
, class, siblings
);
4511 for (p
= phases
; p
->name
; p
++) {
4512 err
= bond_virtual_engine(gt
,
4513 class, siblings
, nsibling
,
4516 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4517 __func__
, p
->name
, class, nsibling
, err
);
4526 static int reset_virtual_engine(struct intel_gt
*gt
,
4527 struct intel_engine_cs
**siblings
,
4528 unsigned int nsibling
)
4530 struct intel_engine_cs
*engine
;
4531 struct intel_context
*ve
;
4532 struct igt_spinner spin
;
4533 struct i915_request
*rq
;
4538 * In order to support offline error capture for fast preempt reset,
4539 * we need to decouple the guilty request and ensure that it and its
4540 * descendents are not executed while the capture is in progress.
4543 if (igt_spinner_init(&spin
, gt
))
4546 ve
= intel_execlists_create_virtual(siblings
, nsibling
);
4552 for (n
= 0; n
< nsibling
; n
++)
4553 st_engine_heartbeat_disable(siblings
[n
]);
4555 rq
= igt_spinner_create_request(&spin
, ve
, MI_ARB_CHECK
);
4560 i915_request_add(rq
);
4562 if (!igt_wait_for_spinner(&spin
, rq
)) {
4563 intel_gt_set_wedged(gt
);
4568 engine
= rq
->engine
;
4569 GEM_BUG_ON(engine
== ve
->engine
);
4571 /* Take ownership of the reset and tasklet */
4572 if (test_and_set_bit(I915_RESET_ENGINE
+ engine
->id
,
4573 >
->reset
.flags
)) {
4574 intel_gt_set_wedged(gt
);
4578 tasklet_disable(&engine
->execlists
.tasklet
);
4580 engine
->execlists
.tasklet
.func(engine
->execlists
.tasklet
.data
);
4581 GEM_BUG_ON(execlists_active(&engine
->execlists
) != rq
);
4583 /* Fake a preemption event; failed of course */
4584 spin_lock_irq(&engine
->active
.lock
);
4585 __unwind_incomplete_requests(engine
);
4586 spin_unlock_irq(&engine
->active
.lock
);
4587 GEM_BUG_ON(rq
->engine
!= ve
->engine
);
4589 /* Reset the engine while keeping our active request on hold */
4590 execlists_hold(engine
, rq
);
4591 GEM_BUG_ON(!i915_request_on_hold(rq
));
4593 intel_engine_reset(engine
, NULL
);
4594 GEM_BUG_ON(rq
->fence
.error
!= -EIO
);
4596 /* Release our grasp on the engine, letting CS flow again */
4597 tasklet_enable(&engine
->execlists
.tasklet
);
4598 clear_and_wake_up_bit(I915_RESET_ENGINE
+ engine
->id
, >
->reset
.flags
);
4600 /* Check that we do not resubmit the held request */
4601 i915_request_get(rq
);
4602 if (!i915_request_wait(rq
, 0, HZ
/ 5)) {
4603 pr_err("%s: on hold request completed!\n",
4605 intel_gt_set_wedged(gt
);
4609 GEM_BUG_ON(!i915_request_on_hold(rq
));
4611 /* But is resubmitted on release */
4612 execlists_unhold(engine
, rq
);
4613 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
4614 pr_err("%s: held request did not complete!\n",
4616 intel_gt_set_wedged(gt
);
4621 i915_request_put(rq
);
4623 for (n
= 0; n
< nsibling
; n
++)
4624 st_engine_heartbeat_enable(siblings
[n
]);
4626 intel_context_put(ve
);
4628 igt_spinner_fini(&spin
);
4632 static int live_virtual_reset(void *arg
)
4634 struct intel_gt
*gt
= arg
;
4635 struct intel_engine_cs
*siblings
[MAX_ENGINE_INSTANCE
+ 1];
4639 * Check that we handle a reset event within a virtual engine.
4640 * Only the physical engine is reset, but we have to check the flow
4641 * of the virtual requests around the reset, and make sure it is not
4645 if (intel_uc_uses_guc_submission(>
->uc
))
4648 if (!intel_has_reset_engine(gt
))
4651 for (class = 0; class <= MAX_ENGINE_CLASS
; class++) {
4654 nsibling
= select_siblings(gt
, class, siblings
);
4658 err
= reset_virtual_engine(gt
, siblings
, nsibling
);
4666 int intel_execlists_live_selftests(struct drm_i915_private
*i915
)
4668 static const struct i915_subtest tests
[] = {
4669 SUBTEST(live_sanitycheck
),
4670 SUBTEST(live_unlite_switch
),
4671 SUBTEST(live_unlite_preempt
),
4672 SUBTEST(live_unlite_ring
),
4673 SUBTEST(live_pin_rewind
),
4674 SUBTEST(live_hold_reset
),
4675 SUBTEST(live_error_interrupt
),
4676 SUBTEST(live_timeslice_preempt
),
4677 SUBTEST(live_timeslice_rewind
),
4678 SUBTEST(live_timeslice_queue
),
4679 SUBTEST(live_timeslice_nopreempt
),
4680 SUBTEST(live_busywait_preempt
),
4681 SUBTEST(live_preempt
),
4682 SUBTEST(live_late_preempt
),
4683 SUBTEST(live_nopreempt
),
4684 SUBTEST(live_preempt_cancel
),
4685 SUBTEST(live_suppress_self_preempt
),
4686 SUBTEST(live_chain_preempt
),
4687 SUBTEST(live_preempt_ring
),
4688 SUBTEST(live_preempt_gang
),
4689 SUBTEST(live_preempt_timeout
),
4690 SUBTEST(live_preempt_user
),
4691 SUBTEST(live_preempt_smoke
),
4692 SUBTEST(live_virtual_engine
),
4693 SUBTEST(live_virtual_mask
),
4694 SUBTEST(live_virtual_preserved
),
4695 SUBTEST(live_virtual_slice
),
4696 SUBTEST(live_virtual_bond
),
4697 SUBTEST(live_virtual_reset
),
4700 if (!HAS_EXECLISTS(i915
))
4703 if (intel_gt_is_wedged(&i915
->gt
))
4706 return intel_gt_live_subtests(tests
, &i915
->gt
);
4709 static int emit_semaphore_signal(struct intel_context
*ce
, void *slot
)
4712 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
4713 offset_in_page(slot
);
4714 struct i915_request
*rq
;
4717 rq
= intel_context_create_request(ce
);
4721 cs
= intel_ring_begin(rq
, 4);
4723 i915_request_add(rq
);
4727 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
4732 intel_ring_advance(rq
, cs
);
4734 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
4735 i915_request_add(rq
);
4739 static int context_flush(struct intel_context
*ce
, long timeout
)
4741 struct i915_request
*rq
;
4742 struct dma_fence
*fence
;
4745 rq
= intel_engine_create_kernel_request(ce
->engine
);
4749 fence
= i915_active_fence_get(&ce
->timeline
->last_request
);
4751 i915_request_await_dma_fence(rq
, fence
);
4752 dma_fence_put(fence
);
4755 rq
= i915_request_get(rq
);
4756 i915_request_add(rq
);
4757 if (i915_request_wait(rq
, 0, timeout
) < 0)
4759 i915_request_put(rq
);
4761 rmb(); /* We know the request is written, make sure all state is too! */
4765 static int live_lrc_layout(void *arg
)
4767 struct intel_gt
*gt
= arg
;
4768 struct intel_engine_cs
*engine
;
4769 enum intel_engine_id id
;
4774 * Check the registers offsets we use to create the initial reg state
4775 * match the layout saved by HW.
4778 lrc
= kmalloc(PAGE_SIZE
, GFP_KERNEL
);
4783 for_each_engine(engine
, gt
, id
) {
4787 if (!engine
->default_state
)
4790 hw
= shmem_pin_map(engine
->default_state
);
4795 hw
+= LRC_STATE_OFFSET
/ sizeof(*hw
);
4797 execlists_init_reg_state(memset(lrc
, POISON_INUSE
, PAGE_SIZE
),
4798 engine
->kernel_context
,
4800 engine
->kernel_context
->ring
,
4813 pr_debug("%s: skipped instruction %x at dword %d\n",
4814 engine
->name
, lri
, dw
);
4819 if ((lri
& GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4820 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4821 engine
->name
, dw
, lri
);
4826 if (lrc
[dw
] != lri
) {
4827 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4828 engine
->name
, dw
, lri
, lrc
[dw
]);
4838 if (hw
[dw
] != lrc
[dw
]) {
4839 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4840 engine
->name
, dw
, hw
[dw
], lrc
[dw
]);
4846 * Skip over the actual register value as we
4847 * expect that to differ.
4852 } while ((lrc
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
4855 pr_info("%s: HW register image:\n", engine
->name
);
4856 igt_hexdump(hw
, PAGE_SIZE
);
4858 pr_info("%s: SW register image:\n", engine
->name
);
4859 igt_hexdump(lrc
, PAGE_SIZE
);
4862 shmem_unpin_map(engine
->default_state
, hw
);
4871 static int find_offset(const u32
*lri
, u32 offset
)
4875 for (i
= 0; i
< PAGE_SIZE
/ sizeof(u32
); i
++)
4876 if (lri
[i
] == offset
)
4882 static int live_lrc_fixed(void *arg
)
4884 struct intel_gt
*gt
= arg
;
4885 struct intel_engine_cs
*engine
;
4886 enum intel_engine_id id
;
4890 * Check the assumed register offsets match the actual locations in
4891 * the context image.
4894 for_each_engine(engine
, gt
, id
) {
4901 i915_mmio_reg_offset(RING_START(engine
->mmio_base
)),
4906 i915_mmio_reg_offset(RING_CTL(engine
->mmio_base
)),
4911 i915_mmio_reg_offset(RING_HEAD(engine
->mmio_base
)),
4916 i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
)),
4921 i915_mmio_reg_offset(RING_MI_MODE(engine
->mmio_base
)),
4922 lrc_ring_mi_mode(engine
),
4926 i915_mmio_reg_offset(RING_BBSTATE(engine
->mmio_base
)),
4931 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine
->mmio_base
)),
4932 lrc_ring_wa_bb_per_ctx(engine
),
4933 "RING_BB_PER_CTX_PTR"
4936 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine
->mmio_base
)),
4937 lrc_ring_indirect_ptr(engine
),
4938 "RING_INDIRECT_CTX_PTR"
4941 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine
->mmio_base
)),
4942 lrc_ring_indirect_offset(engine
),
4943 "RING_INDIRECT_CTX_OFFSET"
4946 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine
->mmio_base
)),
4948 "RING_CTX_TIMESTAMP"
4951 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine
->mmio_base
, 0)),
4952 lrc_ring_gpr0(engine
),
4956 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine
->mmio_base
)),
4957 lrc_ring_cmd_buf_cctl(engine
),
4964 if (!engine
->default_state
)
4967 hw
= shmem_pin_map(engine
->default_state
);
4972 hw
+= LRC_STATE_OFFSET
/ sizeof(*hw
);
4974 for (t
= tbl
; t
->name
; t
++) {
4975 int dw
= find_offset(hw
, t
->reg
);
4977 if (dw
!= t
->offset
) {
4978 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4988 shmem_unpin_map(engine
->default_state
, hw
);
4994 static int __live_lrc_state(struct intel_engine_cs
*engine
,
4995 struct i915_vma
*scratch
)
4997 struct intel_context
*ce
;
4998 struct i915_request
*rq
;
4999 struct i915_gem_ww_ctx ww
;
5005 u32 expected
[MAX_IDX
];
5010 ce
= intel_context_create(engine
);
5014 i915_gem_ww_ctx_init(&ww
, false);
5016 err
= i915_gem_object_lock(scratch
->obj
, &ww
);
5018 err
= intel_context_pin_ww(ce
, &ww
);
5022 rq
= i915_request_create(ce
);
5028 cs
= intel_ring_begin(rq
, 4 * MAX_IDX
);
5031 i915_request_add(rq
);
5035 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
5036 *cs
++ = i915_mmio_reg_offset(RING_START(engine
->mmio_base
));
5037 *cs
++ = i915_ggtt_offset(scratch
) + RING_START_IDX
* sizeof(u32
);
5040 expected
[RING_START_IDX
] = i915_ggtt_offset(ce
->ring
->vma
);
5042 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
5043 *cs
++ = i915_mmio_reg_offset(RING_TAIL(engine
->mmio_base
));
5044 *cs
++ = i915_ggtt_offset(scratch
) + RING_TAIL_IDX
* sizeof(u32
);
5047 err
= i915_request_await_object(rq
, scratch
->obj
, true);
5049 err
= i915_vma_move_to_active(scratch
, rq
, EXEC_OBJECT_WRITE
);
5051 i915_request_get(rq
);
5052 i915_request_add(rq
);
5056 intel_engine_flush_submission(engine
);
5057 expected
[RING_TAIL_IDX
] = ce
->ring
->tail
;
5059 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
5064 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
5070 for (n
= 0; n
< MAX_IDX
; n
++) {
5071 if (cs
[n
] != expected
[n
]) {
5072 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
5073 engine
->name
, n
, cs
[n
], expected
[n
]);
5079 i915_gem_object_unpin_map(scratch
->obj
);
5082 i915_request_put(rq
);
5084 intel_context_unpin(ce
);
5086 if (err
== -EDEADLK
) {
5087 err
= i915_gem_ww_ctx_backoff(&ww
);
5091 i915_gem_ww_ctx_fini(&ww
);
5092 intel_context_put(ce
);
5096 static int live_lrc_state(void *arg
)
5098 struct intel_gt
*gt
= arg
;
5099 struct intel_engine_cs
*engine
;
5100 struct i915_vma
*scratch
;
5101 enum intel_engine_id id
;
5105 * Check the live register state matches what we expect for this
5109 scratch
= create_scratch(gt
);
5110 if (IS_ERR(scratch
))
5111 return PTR_ERR(scratch
);
5113 for_each_engine(engine
, gt
, id
) {
5114 err
= __live_lrc_state(engine
, scratch
);
5119 if (igt_flush_test(gt
->i915
))
5122 i915_vma_unpin_and_release(&scratch
, 0);
5126 static int gpr_make_dirty(struct intel_context
*ce
)
5128 struct i915_request
*rq
;
5132 rq
= intel_context_create_request(ce
);
5136 cs
= intel_ring_begin(rq
, 2 * NUM_GPR_DW
+ 2);
5138 i915_request_add(rq
);
5142 *cs
++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW
);
5143 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
5144 *cs
++ = CS_GPR(ce
->engine
, n
);
5145 *cs
++ = STACK_MAGIC
;
5149 intel_ring_advance(rq
, cs
);
5151 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
5152 i915_request_add(rq
);
5157 static struct i915_request
*
5158 __gpr_read(struct intel_context
*ce
, struct i915_vma
*scratch
, u32
*slot
)
5161 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
5162 offset_in_page(slot
);
5163 struct i915_request
*rq
;
5168 rq
= intel_context_create_request(ce
);
5172 cs
= intel_ring_begin(rq
, 6 + 4 * NUM_GPR_DW
);
5174 i915_request_add(rq
);
5175 return ERR_CAST(cs
);
5178 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
5181 *cs
++ = MI_SEMAPHORE_WAIT
|
5182 MI_SEMAPHORE_GLOBAL_GTT
|
5184 MI_SEMAPHORE_SAD_NEQ_SDD
;
5189 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
5190 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
5191 *cs
++ = CS_GPR(ce
->engine
, n
);
5192 *cs
++ = i915_ggtt_offset(scratch
) + n
* sizeof(u32
);
5196 i915_vma_lock(scratch
);
5197 err
= i915_request_await_object(rq
, scratch
->obj
, true);
5199 err
= i915_vma_move_to_active(scratch
, rq
, EXEC_OBJECT_WRITE
);
5200 i915_vma_unlock(scratch
);
5202 i915_request_get(rq
);
5203 i915_request_add(rq
);
5205 i915_request_put(rq
);
5212 static int __live_lrc_gpr(struct intel_engine_cs
*engine
,
5213 struct i915_vma
*scratch
,
5216 u32
*slot
= memset32(engine
->status_page
.addr
+ 1000, 0, 4);
5217 struct intel_context
*ce
;
5218 struct i915_request
*rq
;
5223 if (INTEL_GEN(engine
->i915
) < 9 && engine
->class != RENDER_CLASS
)
5224 return 0; /* GPR only on rcs0 for gen8 */
5226 err
= gpr_make_dirty(engine
->kernel_context
);
5230 ce
= intel_context_create(engine
);
5234 rq
= __gpr_read(ce
, scratch
, slot
);
5240 err
= wait_for_submit(engine
, rq
, HZ
/ 2);
5245 err
= gpr_make_dirty(engine
->kernel_context
);
5249 err
= emit_semaphore_signal(engine
->kernel_context
, slot
);
5257 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0) {
5262 cs
= i915_gem_object_pin_map(scratch
->obj
, I915_MAP_WB
);
5268 for (n
= 0; n
< NUM_GPR_DW
; n
++) {
5270 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
5272 n
/ 2, n
& 1 ? "udw" : "ldw",
5279 i915_gem_object_unpin_map(scratch
->obj
);
5282 memset32(&slot
[0], -1, 4);
5284 i915_request_put(rq
);
5286 intel_context_put(ce
);
5290 static int live_lrc_gpr(void *arg
)
5292 struct intel_gt
*gt
= arg
;
5293 struct intel_engine_cs
*engine
;
5294 struct i915_vma
*scratch
;
5295 enum intel_engine_id id
;
5299 * Check that GPR registers are cleared in new contexts as we need
5300 * to avoid leaking any information from previous contexts.
5303 scratch
= create_scratch(gt
);
5304 if (IS_ERR(scratch
))
5305 return PTR_ERR(scratch
);
5307 for_each_engine(engine
, gt
, id
) {
5308 st_engine_heartbeat_disable(engine
);
5310 err
= __live_lrc_gpr(engine
, scratch
, false);
5314 err
= __live_lrc_gpr(engine
, scratch
, true);
5319 st_engine_heartbeat_enable(engine
);
5320 if (igt_flush_test(gt
->i915
))
5326 i915_vma_unpin_and_release(&scratch
, 0);
5330 static struct i915_request
*
5331 create_timestamp(struct intel_context
*ce
, void *slot
, int idx
)
5334 i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
5335 offset_in_page(slot
);
5336 struct i915_request
*rq
;
5340 rq
= intel_context_create_request(ce
);
5344 cs
= intel_ring_begin(rq
, 10);
5350 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
5353 *cs
++ = MI_SEMAPHORE_WAIT
|
5354 MI_SEMAPHORE_GLOBAL_GTT
|
5356 MI_SEMAPHORE_SAD_NEQ_SDD
;
5361 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
| MI_USE_GGTT
;
5362 *cs
++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq
->engine
->mmio_base
));
5363 *cs
++ = offset
+ idx
* sizeof(u32
);
5366 intel_ring_advance(rq
, cs
);
5368 rq
->sched
.attr
.priority
= I915_PRIORITY_MASK
;
5371 i915_request_get(rq
);
5372 i915_request_add(rq
);
5374 i915_request_put(rq
);
5375 return ERR_PTR(err
);
5381 struct lrc_timestamp
{
5382 struct intel_engine_cs
*engine
;
5383 struct intel_context
*ce
[2];
5387 static bool timestamp_advanced(u32 start
, u32 end
)
5389 return (s32
)(end
- start
) > 0;
5392 static int __lrc_timestamp(const struct lrc_timestamp
*arg
, bool preempt
)
5394 u32
*slot
= memset32(arg
->engine
->status_page
.addr
+ 1000, 0, 4);
5395 struct i915_request
*rq
;
5399 arg
->ce
[0]->lrc_reg_state
[CTX_TIMESTAMP
] = arg
->poison
;
5400 rq
= create_timestamp(arg
->ce
[0], slot
, 1);
5404 err
= wait_for_submit(rq
->engine
, rq
, HZ
/ 2);
5409 arg
->ce
[1]->lrc_reg_state
[CTX_TIMESTAMP
] = 0xdeadbeef;
5410 err
= emit_semaphore_signal(arg
->ce
[1], slot
);
5418 /* And wait for switch to kernel (to save our context to memory) */
5419 err
= context_flush(arg
->ce
[0], HZ
/ 2);
5423 if (!timestamp_advanced(arg
->poison
, slot
[1])) {
5424 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5425 arg
->engine
->name
, preempt
? "preempt" : "simple",
5426 arg
->poison
, slot
[1]);
5430 timestamp
= READ_ONCE(arg
->ce
[0]->lrc_reg_state
[CTX_TIMESTAMP
]);
5431 if (!timestamp_advanced(slot
[1], timestamp
)) {
5432 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5433 arg
->engine
->name
, preempt
? "preempt" : "simple",
5434 slot
[1], timestamp
);
5439 memset32(slot
, -1, 4);
5440 i915_request_put(rq
);
5444 static int live_lrc_timestamp(void *arg
)
5446 struct lrc_timestamp data
= {};
5447 struct intel_gt
*gt
= arg
;
5448 enum intel_engine_id id
;
5449 const u32 poison
[] = {
5457 * We want to verify that the timestamp is saved and restore across
5458 * context switches and is monotonic.
5460 * So we do this with a little bit of LRC poisoning to check various
5461 * boundary conditions, and see what happens if we preempt the context
5462 * with a second request (carrying more poison into the timestamp).
5465 for_each_engine(data
.engine
, gt
, id
) {
5468 st_engine_heartbeat_disable(data
.engine
);
5470 for (i
= 0; i
< ARRAY_SIZE(data
.ce
); i
++) {
5471 struct intel_context
*tmp
;
5473 tmp
= intel_context_create(data
.engine
);
5479 err
= intel_context_pin(tmp
);
5481 intel_context_put(tmp
);
5488 for (i
= 0; i
< ARRAY_SIZE(poison
); i
++) {
5489 data
.poison
= poison
[i
];
5491 err
= __lrc_timestamp(&data
, false);
5495 err
= __lrc_timestamp(&data
, true);
5501 st_engine_heartbeat_enable(data
.engine
);
5502 for (i
= 0; i
< ARRAY_SIZE(data
.ce
); i
++) {
5506 intel_context_unpin(data
.ce
[i
]);
5507 intel_context_put(data
.ce
[i
]);
5510 if (igt_flush_test(gt
->i915
))
5519 static struct i915_vma
*
5520 create_user_vma(struct i915_address_space
*vm
, unsigned long size
)
5522 struct drm_i915_gem_object
*obj
;
5523 struct i915_vma
*vma
;
5526 obj
= i915_gem_object_create_internal(vm
->i915
, size
);
5528 return ERR_CAST(obj
);
5530 vma
= i915_vma_instance(obj
, vm
, NULL
);
5532 i915_gem_object_put(obj
);
5536 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
5538 i915_gem_object_put(obj
);
5539 return ERR_PTR(err
);
5545 static struct i915_vma
*
5546 store_context(struct intel_context
*ce
, struct i915_vma
*scratch
)
5548 struct i915_vma
*batch
;
5549 u32 dw
, x
, *cs
, *hw
;
5552 batch
= create_user_vma(ce
->vm
, SZ_64K
);
5556 cs
= i915_gem_object_pin_map(batch
->obj
, I915_MAP_WC
);
5558 i915_vma_put(batch
);
5559 return ERR_CAST(cs
);
5562 defaults
= shmem_pin_map(ce
->engine
->default_state
);
5564 i915_gem_object_unpin_map(batch
->obj
);
5565 i915_vma_put(batch
);
5566 return ERR_PTR(-ENOMEM
);
5572 hw
+= LRC_STATE_OFFSET
/ sizeof(*hw
);
5574 u32 len
= hw
[dw
] & 0x7f;
5581 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5587 len
= (len
+ 1) / 2;
5589 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
;
5591 *cs
++ = lower_32_bits(scratch
->node
.start
+ x
);
5592 *cs
++ = upper_32_bits(scratch
->node
.start
+ x
);
5597 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
5598 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
5600 *cs
++ = MI_BATCH_BUFFER_END
;
5602 shmem_unpin_map(ce
->engine
->default_state
, defaults
);
5604 i915_gem_object_flush_map(batch
->obj
);
5605 i915_gem_object_unpin_map(batch
->obj
);
5610 static int move_to_active(struct i915_request
*rq
,
5611 struct i915_vma
*vma
,
5617 err
= i915_request_await_object(rq
, vma
->obj
, flags
);
5619 err
= i915_vma_move_to_active(vma
, rq
, flags
);
5620 i915_vma_unlock(vma
);
5625 static struct i915_request
*
5626 record_registers(struct intel_context
*ce
,
5627 struct i915_vma
*before
,
5628 struct i915_vma
*after
,
5631 struct i915_vma
*b_before
, *b_after
;
5632 struct i915_request
*rq
;
5636 b_before
= store_context(ce
, before
);
5637 if (IS_ERR(b_before
))
5638 return ERR_CAST(b_before
);
5640 b_after
= store_context(ce
, after
);
5641 if (IS_ERR(b_after
)) {
5642 rq
= ERR_CAST(b_after
);
5646 rq
= intel_context_create_request(ce
);
5650 err
= move_to_active(rq
, before
, EXEC_OBJECT_WRITE
);
5654 err
= move_to_active(rq
, b_before
, 0);
5658 err
= move_to_active(rq
, after
, EXEC_OBJECT_WRITE
);
5662 err
= move_to_active(rq
, b_after
, 0);
5666 cs
= intel_ring_begin(rq
, 14);
5672 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
5673 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
5674 *cs
++ = lower_32_bits(b_before
->node
.start
);
5675 *cs
++ = upper_32_bits(b_before
->node
.start
);
5677 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_ENABLE
;
5678 *cs
++ = MI_SEMAPHORE_WAIT
|
5679 MI_SEMAPHORE_GLOBAL_GTT
|
5681 MI_SEMAPHORE_SAD_NEQ_SDD
;
5683 *cs
++ = i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
5684 offset_in_page(sema
);
5688 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
5689 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
5690 *cs
++ = lower_32_bits(b_after
->node
.start
);
5691 *cs
++ = upper_32_bits(b_after
->node
.start
);
5693 intel_ring_advance(rq
, cs
);
5695 WRITE_ONCE(*sema
, 0);
5696 i915_request_get(rq
);
5697 i915_request_add(rq
);
5699 i915_vma_put(b_after
);
5701 i915_vma_put(b_before
);
5705 i915_request_add(rq
);
5710 static struct i915_vma
*load_context(struct intel_context
*ce
, u32 poison
)
5712 struct i915_vma
*batch
;
5716 batch
= create_user_vma(ce
->vm
, SZ_64K
);
5720 cs
= i915_gem_object_pin_map(batch
->obj
, I915_MAP_WC
);
5722 i915_vma_put(batch
);
5723 return ERR_CAST(cs
);
5726 defaults
= shmem_pin_map(ce
->engine
->default_state
);
5728 i915_gem_object_unpin_map(batch
->obj
);
5729 i915_vma_put(batch
);
5730 return ERR_PTR(-ENOMEM
);
5735 hw
+= LRC_STATE_OFFSET
/ sizeof(*hw
);
5737 u32 len
= hw
[dw
] & 0x7f;
5744 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5750 len
= (len
+ 1) / 2;
5751 *cs
++ = MI_LOAD_REGISTER_IMM(len
);
5757 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
5758 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
5760 *cs
++ = MI_BATCH_BUFFER_END
;
5762 shmem_unpin_map(ce
->engine
->default_state
, defaults
);
5764 i915_gem_object_flush_map(batch
->obj
);
5765 i915_gem_object_unpin_map(batch
->obj
);
5770 static int poison_registers(struct intel_context
*ce
, u32 poison
, u32
*sema
)
5772 struct i915_request
*rq
;
5773 struct i915_vma
*batch
;
5777 batch
= load_context(ce
, poison
);
5779 return PTR_ERR(batch
);
5781 rq
= intel_context_create_request(ce
);
5787 err
= move_to_active(rq
, batch
, 0);
5791 cs
= intel_ring_begin(rq
, 8);
5797 *cs
++ = MI_ARB_ON_OFF
| MI_ARB_DISABLE
;
5798 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| BIT(8);
5799 *cs
++ = lower_32_bits(batch
->node
.start
);
5800 *cs
++ = upper_32_bits(batch
->node
.start
);
5802 *cs
++ = MI_STORE_DWORD_IMM_GEN4
| MI_USE_GGTT
;
5803 *cs
++ = i915_ggtt_offset(ce
->engine
->status_page
.vma
) +
5804 offset_in_page(sema
);
5808 intel_ring_advance(rq
, cs
);
5810 rq
->sched
.attr
.priority
= I915_PRIORITY_BARRIER
;
5812 i915_request_add(rq
);
5814 i915_vma_put(batch
);
5818 static bool is_moving(u32 a
, u32 b
)
5823 static int compare_isolation(struct intel_engine_cs
*engine
,
5824 struct i915_vma
*ref
[2],
5825 struct i915_vma
*result
[2],
5826 struct intel_context
*ce
,
5829 u32 x
, dw
, *hw
, *lrc
;
5834 A
[0] = i915_gem_object_pin_map(ref
[0]->obj
, I915_MAP_WC
);
5836 return PTR_ERR(A
[0]);
5838 A
[1] = i915_gem_object_pin_map(ref
[1]->obj
, I915_MAP_WC
);
5840 err
= PTR_ERR(A
[1]);
5844 B
[0] = i915_gem_object_pin_map(result
[0]->obj
, I915_MAP_WC
);
5846 err
= PTR_ERR(B
[0]);
5850 B
[1] = i915_gem_object_pin_map(result
[1]->obj
, I915_MAP_WC
);
5852 err
= PTR_ERR(B
[1]);
5856 lrc
= i915_gem_object_pin_map(ce
->state
->obj
,
5857 i915_coherent_map_type(engine
->i915
));
5862 lrc
+= LRC_STATE_OFFSET
/ sizeof(*hw
);
5864 defaults
= shmem_pin_map(ce
->engine
->default_state
);
5873 hw
+= LRC_STATE_OFFSET
/ sizeof(*hw
);
5875 u32 len
= hw
[dw
] & 0x7f;
5882 if ((hw
[dw
] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5888 len
= (len
+ 1) / 2;
5890 if (!is_moving(A
[0][x
], A
[1][x
]) &&
5891 (A
[0][x
] != B
[0][x
] || A
[1][x
] != B
[1][x
])) {
5892 switch (hw
[dw
] & 4095) {
5893 case 0x30: /* RING_HEAD */
5894 case 0x34: /* RING_TAIL */
5898 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5901 A
[0][x
], B
[0][x
], B
[1][x
],
5902 poison
, lrc
[dw
+ 1]);
5909 } while (dw
< PAGE_SIZE
/ sizeof(u32
) &&
5910 (hw
[dw
] & ~BIT(0)) != MI_BATCH_BUFFER_END
);
5912 shmem_unpin_map(ce
->engine
->default_state
, defaults
);
5914 i915_gem_object_unpin_map(ce
->state
->obj
);
5916 i915_gem_object_unpin_map(result
[1]->obj
);
5918 i915_gem_object_unpin_map(result
[0]->obj
);
5920 i915_gem_object_unpin_map(ref
[1]->obj
);
5922 i915_gem_object_unpin_map(ref
[0]->obj
);
5926 static int __lrc_isolation(struct intel_engine_cs
*engine
, u32 poison
)
5928 u32
*sema
= memset32(engine
->status_page
.addr
+ 1000, 0, 1);
5929 struct i915_vma
*ref
[2], *result
[2];
5930 struct intel_context
*A
, *B
;
5931 struct i915_request
*rq
;
5934 A
= intel_context_create(engine
);
5938 B
= intel_context_create(engine
);
5944 ref
[0] = create_user_vma(A
->vm
, SZ_64K
);
5945 if (IS_ERR(ref
[0])) {
5946 err
= PTR_ERR(ref
[0]);
5950 ref
[1] = create_user_vma(A
->vm
, SZ_64K
);
5951 if (IS_ERR(ref
[1])) {
5952 err
= PTR_ERR(ref
[1]);
5956 rq
= record_registers(A
, ref
[0], ref
[1], sema
);
5962 WRITE_ONCE(*sema
, 1);
5965 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
5966 i915_request_put(rq
);
5970 i915_request_put(rq
);
5972 result
[0] = create_user_vma(A
->vm
, SZ_64K
);
5973 if (IS_ERR(result
[0])) {
5974 err
= PTR_ERR(result
[0]);
5978 result
[1] = create_user_vma(A
->vm
, SZ_64K
);
5979 if (IS_ERR(result
[1])) {
5980 err
= PTR_ERR(result
[1]);
5984 rq
= record_registers(A
, result
[0], result
[1], sema
);
5990 err
= poison_registers(B
, poison
, sema
);
5992 WRITE_ONCE(*sema
, -1);
5993 i915_request_put(rq
);
5997 if (i915_request_wait(rq
, 0, HZ
/ 2) < 0) {
5998 i915_request_put(rq
);
6002 i915_request_put(rq
);
6004 err
= compare_isolation(engine
, ref
, result
, A
, poison
);
6007 i915_vma_put(result
[1]);
6009 i915_vma_put(result
[0]);
6011 i915_vma_put(ref
[1]);
6013 i915_vma_put(ref
[0]);
6015 intel_context_put(B
);
6017 intel_context_put(A
);
6021 static bool skip_isolation(const struct intel_engine_cs
*engine
)
6023 if (engine
->class == COPY_ENGINE_CLASS
&& INTEL_GEN(engine
->i915
) == 9)
6026 if (engine
->class == RENDER_CLASS
&& INTEL_GEN(engine
->i915
) == 11)
6032 static int live_lrc_isolation(void *arg
)
6034 struct intel_gt
*gt
= arg
;
6035 struct intel_engine_cs
*engine
;
6036 enum intel_engine_id id
;
6037 const u32 poison
[] = {
6047 * Our goal is try and verify that per-context state cannot be
6048 * tampered with by another non-privileged client.
6050 * We take the list of context registers from the LRI in the default
6051 * context image and attempt to modify that list from a remote context.
6054 for_each_engine(engine
, gt
, id
) {
6057 /* Just don't even ask */
6058 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN
) &&
6059 skip_isolation(engine
))
6062 intel_engine_pm_get(engine
);
6063 for (i
= 0; i
< ARRAY_SIZE(poison
); i
++) {
6066 result
= __lrc_isolation(engine
, poison
[i
]);
6070 result
= __lrc_isolation(engine
, ~poison
[i
]);
6074 intel_engine_pm_put(engine
);
6075 if (igt_flush_test(gt
->i915
)) {
6084 static int indirect_ctx_submit_req(struct intel_context
*ce
)
6086 struct i915_request
*rq
;
6089 rq
= intel_context_create_request(ce
);
6093 i915_request_get(rq
);
6094 i915_request_add(rq
);
6096 if (i915_request_wait(rq
, 0, HZ
/ 5) < 0)
6099 i915_request_put(rq
);
6104 #define CTX_BB_CANARY_OFFSET (3 * 1024)
6105 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
6108 emit_indirect_ctx_bb_canary(const struct intel_context
*ce
, u32
*cs
)
6110 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
|
6111 MI_SRM_LRM_GLOBAL_GTT
|
6113 *cs
++ = i915_mmio_reg_offset(RING_START(0));
6114 *cs
++ = i915_ggtt_offset(ce
->state
) +
6115 context_wa_bb_offset(ce
) +
6116 CTX_BB_CANARY_OFFSET
;
6123 indirect_ctx_bb_setup(struct intel_context
*ce
)
6125 u32
*cs
= context_indirect_bb(ce
);
6127 cs
[CTX_BB_CANARY_INDEX
] = 0xdeadf00d;
6129 setup_indirect_ctx_bb(ce
, ce
->engine
, emit_indirect_ctx_bb_canary
);
6132 static bool check_ring_start(struct intel_context
*ce
)
6134 const u32
* const ctx_bb
= (void *)(ce
->lrc_reg_state
) -
6135 LRC_STATE_OFFSET
+ context_wa_bb_offset(ce
);
6137 if (ctx_bb
[CTX_BB_CANARY_INDEX
] == ce
->lrc_reg_state
[CTX_RING_START
])
6140 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
6141 ctx_bb
[CTX_BB_CANARY_INDEX
],
6142 ce
->lrc_reg_state
[CTX_RING_START
]);
6147 static int indirect_ctx_bb_check(struct intel_context
*ce
)
6151 err
= indirect_ctx_submit_req(ce
);
6155 if (!check_ring_start(ce
))
6161 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs
*engine
)
6163 struct intel_context
*a
, *b
;
6166 a
= intel_context_create(engine
);
6169 err
= intel_context_pin(a
);
6173 b
= intel_context_create(engine
);
6178 err
= intel_context_pin(b
);
6182 /* We use the already reserved extra page in context state */
6183 if (!a
->wa_bb_page
) {
6184 GEM_BUG_ON(b
->wa_bb_page
);
6185 GEM_BUG_ON(INTEL_GEN(engine
->i915
) == 12);
6190 * In order to test that our per context bb is truly per context,
6191 * and executes at the intended spot on context restoring process,
6192 * make the batch store the ring start value to memory.
6193 * As ring start is restored apriori of starting the indirect ctx bb and
6194 * as it will be different for each context, it fits to this purpose.
6196 indirect_ctx_bb_setup(a
);
6197 indirect_ctx_bb_setup(b
);
6199 err
= indirect_ctx_bb_check(a
);
6203 err
= indirect_ctx_bb_check(b
);
6206 intel_context_unpin(b
);
6208 intel_context_put(b
);
6210 intel_context_unpin(a
);
6212 intel_context_put(a
);
6217 static int live_lrc_indirect_ctx_bb(void *arg
)
6219 struct intel_gt
*gt
= arg
;
6220 struct intel_engine_cs
*engine
;
6221 enum intel_engine_id id
;
6224 for_each_engine(engine
, gt
, id
) {
6225 intel_engine_pm_get(engine
);
6226 err
= __live_lrc_indirect_ctx_bb(engine
);
6227 intel_engine_pm_put(engine
);
6229 if (igt_flush_test(gt
->i915
))
6239 static void garbage_reset(struct intel_engine_cs
*engine
,
6240 struct i915_request
*rq
)
6242 const unsigned int bit
= I915_RESET_ENGINE
+ engine
->id
;
6243 unsigned long *lock
= &engine
->gt
->reset
.flags
;
6245 if (test_and_set_bit(bit
, lock
))
6248 tasklet_disable(&engine
->execlists
.tasklet
);
6250 if (!rq
->fence
.error
)
6251 intel_engine_reset(engine
, NULL
);
6253 tasklet_enable(&engine
->execlists
.tasklet
);
6254 clear_and_wake_up_bit(bit
, lock
);
6257 static struct i915_request
*garbage(struct intel_context
*ce
,
6258 struct rnd_state
*prng
)
6260 struct i915_request
*rq
;
6263 err
= intel_context_pin(ce
);
6265 return ERR_PTR(err
);
6267 prandom_bytes_state(prng
,
6269 ce
->engine
->context_size
-
6272 rq
= intel_context_create_request(ce
);
6278 i915_request_get(rq
);
6279 i915_request_add(rq
);
6283 intel_context_unpin(ce
);
6284 return ERR_PTR(err
);
6287 static int __lrc_garbage(struct intel_engine_cs
*engine
, struct rnd_state
*prng
)
6289 struct intel_context
*ce
;
6290 struct i915_request
*hang
;
6293 ce
= intel_context_create(engine
);
6297 hang
= garbage(ce
, prng
);
6299 err
= PTR_ERR(hang
);
6303 if (wait_for_submit(engine
, hang
, HZ
/ 2)) {
6304 i915_request_put(hang
);
6309 intel_context_set_banned(ce
);
6310 garbage_reset(engine
, hang
);
6312 intel_engine_flush_submission(engine
);
6313 if (!hang
->fence
.error
) {
6314 i915_request_put(hang
);
6315 pr_err("%s: corrupted context was not reset\n",
6321 if (i915_request_wait(hang
, 0, HZ
/ 2) < 0) {
6322 pr_err("%s: corrupted context did not recover\n",
6324 i915_request_put(hang
);
6328 i915_request_put(hang
);
6331 intel_context_put(ce
);
6335 static int live_lrc_garbage(void *arg
)
6337 struct intel_gt
*gt
= arg
;
6338 struct intel_engine_cs
*engine
;
6339 enum intel_engine_id id
;
6342 * Verify that we can recover if one context state is completely
6346 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN
))
6349 for_each_engine(engine
, gt
, id
) {
6350 I915_RND_STATE(prng
);
6353 if (!intel_has_reset_engine(engine
->gt
))
6356 intel_engine_pm_get(engine
);
6357 for (i
= 0; i
< 3; i
++) {
6358 err
= __lrc_garbage(engine
, &prng
);
6362 intel_engine_pm_put(engine
);
6364 if (igt_flush_test(gt
->i915
))
6373 static int __live_pphwsp_runtime(struct intel_engine_cs
*engine
)
6375 struct intel_context
*ce
;
6376 struct i915_request
*rq
;
6377 IGT_TIMEOUT(end_time
);
6380 ce
= intel_context_create(engine
);
6384 ce
->runtime
.num_underflow
= 0;
6385 ce
->runtime
.max_underflow
= 0;
6388 unsigned int loop
= 1024;
6391 rq
= intel_context_create_request(ce
);
6398 i915_request_get(rq
);
6400 i915_request_add(rq
);
6403 if (__igt_timeout(end_time
, NULL
))
6406 i915_request_put(rq
);
6409 err
= i915_request_wait(rq
, 0, HZ
/ 5);
6411 pr_err("%s: request not completed!\n", engine
->name
);
6415 igt_flush_test(engine
->i915
);
6417 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6419 intel_context_get_total_runtime_ns(ce
),
6420 intel_context_get_avg_runtime_ns(ce
));
6423 if (ce
->runtime
.num_underflow
) {
6424 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6426 ce
->runtime
.num_underflow
,
6427 ce
->runtime
.max_underflow
);
6433 i915_request_put(rq
);
6435 intel_context_put(ce
);
6439 static int live_pphwsp_runtime(void *arg
)
6441 struct intel_gt
*gt
= arg
;
6442 struct intel_engine_cs
*engine
;
6443 enum intel_engine_id id
;
6447 * Check that cumulative context runtime as stored in the pphwsp[16]
6451 for_each_engine(engine
, gt
, id
) {
6452 err
= __live_pphwsp_runtime(engine
);
6457 if (igt_flush_test(gt
->i915
))
6463 int intel_lrc_live_selftests(struct drm_i915_private
*i915
)
6465 static const struct i915_subtest tests
[] = {
6466 SUBTEST(live_lrc_layout
),
6467 SUBTEST(live_lrc_fixed
),
6468 SUBTEST(live_lrc_state
),
6469 SUBTEST(live_lrc_gpr
),
6470 SUBTEST(live_lrc_isolation
),
6471 SUBTEST(live_lrc_timestamp
),
6472 SUBTEST(live_lrc_garbage
),
6473 SUBTEST(live_pphwsp_runtime
),
6474 SUBTEST(live_lrc_indirect_ctx_bb
),
6477 if (!HAS_LOGICAL_RING_CONTEXTS(i915
))
6480 return intel_gt_live_subtests(tests
, &i915
->gt
);