WIP FPC-III support
[linux/fpc-iii.git] / drivers / gpu / drm / i915 / gt / selftest_lrc.c
blob95d41c01d0e04221f6f22733d3f1d5a65e2107a9
1 /*
2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
5 */
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR 16
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
28 static struct i915_vma *create_scratch(struct intel_gt *gt)
30 struct drm_i915_gem_object *obj;
31 struct i915_vma *vma;
32 int err;
34 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
35 if (IS_ERR(obj))
36 return ERR_CAST(obj);
38 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
40 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
41 if (IS_ERR(vma)) {
42 i915_gem_object_put(obj);
43 return vma;
46 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
47 if (err) {
48 i915_gem_object_put(obj);
49 return ERR_PTR(err);
52 return vma;
55 static bool is_active(struct i915_request *rq)
57 if (i915_request_is_active(rq))
58 return true;
60 if (i915_request_on_hold(rq))
61 return true;
63 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
64 return true;
66 return false;
69 static int wait_for_submit(struct intel_engine_cs *engine,
70 struct i915_request *rq,
71 unsigned long timeout)
73 timeout += jiffies;
74 do {
75 bool done = time_after(jiffies, timeout);
77 if (i915_request_completed(rq)) /* that was quick! */
78 return 0;
80 /* Wait until the HW has acknowleged the submission (or err) */
81 intel_engine_flush_submission(engine);
82 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
83 return 0;
85 if (done)
86 return -ETIME;
88 cond_resched();
89 } while (1);
92 static int wait_for_reset(struct intel_engine_cs *engine,
93 struct i915_request *rq,
94 unsigned long timeout)
96 timeout += jiffies;
98 do {
99 cond_resched();
100 intel_engine_flush_submission(engine);
102 if (READ_ONCE(engine->execlists.pending[0]))
103 continue;
105 if (i915_request_completed(rq))
106 break;
108 if (READ_ONCE(rq->fence.error))
109 break;
110 } while (time_before(jiffies, timeout));
112 flush_scheduled_work();
114 if (rq->fence.error != -EIO) {
115 pr_err("%s: hanging request %llx:%lld not reset\n",
116 engine->name,
117 rq->fence.context,
118 rq->fence.seqno);
119 return -EINVAL;
122 /* Give the request a jiffie to complete after flushing the worker */
123 if (i915_request_wait(rq, 0,
124 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
125 pr_err("%s: hanging request %llx:%lld did not complete\n",
126 engine->name,
127 rq->fence.context,
128 rq->fence.seqno);
129 return -ETIME;
132 return 0;
135 static int live_sanitycheck(void *arg)
137 struct intel_gt *gt = arg;
138 struct intel_engine_cs *engine;
139 enum intel_engine_id id;
140 struct igt_spinner spin;
141 int err = 0;
143 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
144 return 0;
146 if (igt_spinner_init(&spin, gt))
147 return -ENOMEM;
149 for_each_engine(engine, gt, id) {
150 struct intel_context *ce;
151 struct i915_request *rq;
153 ce = intel_context_create(engine);
154 if (IS_ERR(ce)) {
155 err = PTR_ERR(ce);
156 break;
159 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
160 if (IS_ERR(rq)) {
161 err = PTR_ERR(rq);
162 goto out_ctx;
165 i915_request_add(rq);
166 if (!igt_wait_for_spinner(&spin, rq)) {
167 GEM_TRACE("spinner failed to start\n");
168 GEM_TRACE_DUMP();
169 intel_gt_set_wedged(gt);
170 err = -EIO;
171 goto out_ctx;
174 igt_spinner_end(&spin);
175 if (igt_flush_test(gt->i915)) {
176 err = -EIO;
177 goto out_ctx;
180 out_ctx:
181 intel_context_put(ce);
182 if (err)
183 break;
186 igt_spinner_fini(&spin);
187 return err;
190 static int live_unlite_restore(struct intel_gt *gt, int prio)
192 struct intel_engine_cs *engine;
193 enum intel_engine_id id;
194 struct igt_spinner spin;
195 int err = -ENOMEM;
198 * Check that we can correctly context switch between 2 instances
199 * on the same engine from the same parent context.
202 if (igt_spinner_init(&spin, gt))
203 return err;
205 err = 0;
206 for_each_engine(engine, gt, id) {
207 struct intel_context *ce[2] = {};
208 struct i915_request *rq[2];
209 struct igt_live_test t;
210 int n;
212 if (prio && !intel_engine_has_preemption(engine))
213 continue;
215 if (!intel_engine_can_store_dword(engine))
216 continue;
218 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
219 err = -EIO;
220 break;
222 st_engine_heartbeat_disable(engine);
224 for (n = 0; n < ARRAY_SIZE(ce); n++) {
225 struct intel_context *tmp;
227 tmp = intel_context_create(engine);
228 if (IS_ERR(tmp)) {
229 err = PTR_ERR(tmp);
230 goto err_ce;
233 err = intel_context_pin(tmp);
234 if (err) {
235 intel_context_put(tmp);
236 goto err_ce;
240 * Setup the pair of contexts such that if we
241 * lite-restore using the RING_TAIL from ce[1] it
242 * will execute garbage from ce[0]->ring.
244 memset(tmp->ring->vaddr,
245 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
246 tmp->ring->vma->size);
248 ce[n] = tmp;
250 GEM_BUG_ON(!ce[1]->ring->size);
251 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
252 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
254 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
255 if (IS_ERR(rq[0])) {
256 err = PTR_ERR(rq[0]);
257 goto err_ce;
260 i915_request_get(rq[0]);
261 i915_request_add(rq[0]);
262 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
264 if (!igt_wait_for_spinner(&spin, rq[0])) {
265 i915_request_put(rq[0]);
266 goto err_ce;
269 rq[1] = i915_request_create(ce[1]);
270 if (IS_ERR(rq[1])) {
271 err = PTR_ERR(rq[1]);
272 i915_request_put(rq[0]);
273 goto err_ce;
276 if (!prio) {
278 * Ensure we do the switch to ce[1] on completion.
280 * rq[0] is already submitted, so this should reduce
281 * to a no-op (a wait on a request on the same engine
282 * uses the submit fence, not the completion fence),
283 * but it will install a dependency on rq[1] for rq[0]
284 * that will prevent the pair being reordered by
285 * timeslicing.
287 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
290 i915_request_get(rq[1]);
291 i915_request_add(rq[1]);
292 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
293 i915_request_put(rq[0]);
295 if (prio) {
296 struct i915_sched_attr attr = {
297 .priority = prio,
300 /* Alternatively preempt the spinner with ce[1] */
301 engine->schedule(rq[1], &attr);
304 /* And switch back to ce[0] for good measure */
305 rq[0] = i915_request_create(ce[0]);
306 if (IS_ERR(rq[0])) {
307 err = PTR_ERR(rq[0]);
308 i915_request_put(rq[1]);
309 goto err_ce;
312 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
313 i915_request_get(rq[0]);
314 i915_request_add(rq[0]);
315 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
316 i915_request_put(rq[1]);
317 i915_request_put(rq[0]);
319 err_ce:
320 intel_engine_flush_submission(engine);
321 igt_spinner_end(&spin);
322 for (n = 0; n < ARRAY_SIZE(ce); n++) {
323 if (IS_ERR_OR_NULL(ce[n]))
324 break;
326 intel_context_unpin(ce[n]);
327 intel_context_put(ce[n]);
330 st_engine_heartbeat_enable(engine);
331 if (igt_live_test_end(&t))
332 err = -EIO;
333 if (err)
334 break;
337 igt_spinner_fini(&spin);
338 return err;
341 static int live_unlite_switch(void *arg)
343 return live_unlite_restore(arg, 0);
346 static int live_unlite_preempt(void *arg)
348 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
351 static int live_unlite_ring(void *arg)
353 struct intel_gt *gt = arg;
354 struct intel_engine_cs *engine;
355 struct igt_spinner spin;
356 enum intel_engine_id id;
357 int err = 0;
360 * Setup a preemption event that will cause almost the entire ring
361 * to be unwound, potentially fooling our intel_ring_direction()
362 * into emitting a forward lite-restore instead of the rollback.
365 if (igt_spinner_init(&spin, gt))
366 return -ENOMEM;
368 for_each_engine(engine, gt, id) {
369 struct intel_context *ce[2] = {};
370 struct i915_request *rq;
371 struct igt_live_test t;
372 int n;
374 if (!intel_engine_has_preemption(engine))
375 continue;
377 if (!intel_engine_can_store_dword(engine))
378 continue;
380 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
381 err = -EIO;
382 break;
384 st_engine_heartbeat_disable(engine);
386 for (n = 0; n < ARRAY_SIZE(ce); n++) {
387 struct intel_context *tmp;
389 tmp = intel_context_create(engine);
390 if (IS_ERR(tmp)) {
391 err = PTR_ERR(tmp);
392 goto err_ce;
395 err = intel_context_pin(tmp);
396 if (err) {
397 intel_context_put(tmp);
398 goto err_ce;
401 memset32(tmp->ring->vaddr,
402 0xdeadbeef, /* trigger a hang if executed */
403 tmp->ring->vma->size / sizeof(u32));
405 ce[n] = tmp;
408 /* Create max prio spinner, followed by N low prio nops */
409 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
410 if (IS_ERR(rq)) {
411 err = PTR_ERR(rq);
412 goto err_ce;
415 i915_request_get(rq);
416 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
417 i915_request_add(rq);
419 if (!igt_wait_for_spinner(&spin, rq)) {
420 intel_gt_set_wedged(gt);
421 i915_request_put(rq);
422 err = -ETIME;
423 goto err_ce;
426 /* Fill the ring, until we will cause a wrap */
427 n = 0;
428 while (intel_ring_direction(ce[0]->ring,
429 rq->wa_tail,
430 ce[0]->ring->tail) <= 0) {
431 struct i915_request *tmp;
433 tmp = intel_context_create_request(ce[0]);
434 if (IS_ERR(tmp)) {
435 err = PTR_ERR(tmp);
436 i915_request_put(rq);
437 goto err_ce;
440 i915_request_add(tmp);
441 intel_engine_flush_submission(engine);
442 n++;
444 intel_engine_flush_submission(engine);
445 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
446 engine->name, n,
447 ce[0]->ring->size,
448 ce[0]->ring->tail,
449 ce[0]->ring->emit,
450 rq->tail);
451 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
452 rq->tail,
453 ce[0]->ring->tail) <= 0);
454 i915_request_put(rq);
456 /* Create a second ring to preempt the first ring after rq[0] */
457 rq = intel_context_create_request(ce[1]);
458 if (IS_ERR(rq)) {
459 err = PTR_ERR(rq);
460 goto err_ce;
463 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
464 i915_request_get(rq);
465 i915_request_add(rq);
467 err = wait_for_submit(engine, rq, HZ / 2);
468 i915_request_put(rq);
469 if (err) {
470 pr_err("%s: preemption request was not submitted\n",
471 engine->name);
472 err = -ETIME;
475 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
476 engine->name,
477 ce[0]->ring->tail, ce[0]->ring->emit,
478 ce[1]->ring->tail, ce[1]->ring->emit);
480 err_ce:
481 intel_engine_flush_submission(engine);
482 igt_spinner_end(&spin);
483 for (n = 0; n < ARRAY_SIZE(ce); n++) {
484 if (IS_ERR_OR_NULL(ce[n]))
485 break;
487 intel_context_unpin(ce[n]);
488 intel_context_put(ce[n]);
490 st_engine_heartbeat_enable(engine);
491 if (igt_live_test_end(&t))
492 err = -EIO;
493 if (err)
494 break;
497 igt_spinner_fini(&spin);
498 return err;
501 static int live_pin_rewind(void *arg)
503 struct intel_gt *gt = arg;
504 struct intel_engine_cs *engine;
505 enum intel_engine_id id;
506 int err = 0;
509 * We have to be careful not to trust intel_ring too much, for example
510 * ring->head is updated upon retire which is out of sync with pinning
511 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
512 * or else we risk writing an older, stale value.
514 * To simulate this, let's apply a bit of deliberate sabotague.
517 for_each_engine(engine, gt, id) {
518 struct intel_context *ce;
519 struct i915_request *rq;
520 struct intel_ring *ring;
521 struct igt_live_test t;
523 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
524 err = -EIO;
525 break;
528 ce = intel_context_create(engine);
529 if (IS_ERR(ce)) {
530 err = PTR_ERR(ce);
531 break;
534 err = intel_context_pin(ce);
535 if (err) {
536 intel_context_put(ce);
537 break;
540 /* Keep the context awake while we play games */
541 err = i915_active_acquire(&ce->active);
542 if (err) {
543 intel_context_unpin(ce);
544 intel_context_put(ce);
545 break;
547 ring = ce->ring;
549 /* Poison the ring, and offset the next request from HEAD */
550 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
551 ring->emit = ring->size / 2;
552 ring->tail = ring->emit;
553 GEM_BUG_ON(ring->head);
555 intel_context_unpin(ce);
557 /* Submit a simple nop request */
558 GEM_BUG_ON(intel_context_is_pinned(ce));
559 rq = intel_context_create_request(ce);
560 i915_active_release(&ce->active); /* e.g. async retire */
561 intel_context_put(ce);
562 if (IS_ERR(rq)) {
563 err = PTR_ERR(rq);
564 break;
566 GEM_BUG_ON(!rq->head);
567 i915_request_add(rq);
569 /* Expect not to hang! */
570 if (igt_live_test_end(&t)) {
571 err = -EIO;
572 break;
576 return err;
579 static int live_hold_reset(void *arg)
581 struct intel_gt *gt = arg;
582 struct intel_engine_cs *engine;
583 enum intel_engine_id id;
584 struct igt_spinner spin;
585 int err = 0;
588 * In order to support offline error capture for fast preempt reset,
589 * we need to decouple the guilty request and ensure that it and its
590 * descendents are not executed while the capture is in progress.
593 if (!intel_has_reset_engine(gt))
594 return 0;
596 if (igt_spinner_init(&spin, gt))
597 return -ENOMEM;
599 for_each_engine(engine, gt, id) {
600 struct intel_context *ce;
601 struct i915_request *rq;
603 ce = intel_context_create(engine);
604 if (IS_ERR(ce)) {
605 err = PTR_ERR(ce);
606 break;
609 st_engine_heartbeat_disable(engine);
611 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
612 if (IS_ERR(rq)) {
613 err = PTR_ERR(rq);
614 goto out;
616 i915_request_add(rq);
618 if (!igt_wait_for_spinner(&spin, rq)) {
619 intel_gt_set_wedged(gt);
620 err = -ETIME;
621 goto out;
624 /* We have our request executing, now remove it and reset */
626 if (test_and_set_bit(I915_RESET_ENGINE + id,
627 &gt->reset.flags)) {
628 intel_gt_set_wedged(gt);
629 err = -EBUSY;
630 goto out;
632 tasklet_disable(&engine->execlists.tasklet);
634 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
635 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
637 i915_request_get(rq);
638 execlists_hold(engine, rq);
639 GEM_BUG_ON(!i915_request_on_hold(rq));
641 intel_engine_reset(engine, NULL);
642 GEM_BUG_ON(rq->fence.error != -EIO);
644 tasklet_enable(&engine->execlists.tasklet);
645 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
646 &gt->reset.flags);
648 /* Check that we do not resubmit the held request */
649 if (!i915_request_wait(rq, 0, HZ / 5)) {
650 pr_err("%s: on hold request completed!\n",
651 engine->name);
652 i915_request_put(rq);
653 err = -EIO;
654 goto out;
656 GEM_BUG_ON(!i915_request_on_hold(rq));
658 /* But is resubmitted on release */
659 execlists_unhold(engine, rq);
660 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
661 pr_err("%s: held request did not complete!\n",
662 engine->name);
663 intel_gt_set_wedged(gt);
664 err = -ETIME;
666 i915_request_put(rq);
668 out:
669 st_engine_heartbeat_enable(engine);
670 intel_context_put(ce);
671 if (err)
672 break;
675 igt_spinner_fini(&spin);
676 return err;
679 static const char *error_repr(int err)
681 return err ? "bad" : "good";
684 static int live_error_interrupt(void *arg)
686 static const struct error_phase {
687 enum { GOOD = 0, BAD = -EIO } error[2];
688 } phases[] = {
689 { { BAD, GOOD } },
690 { { BAD, BAD } },
691 { { BAD, GOOD } },
692 { { GOOD, GOOD } }, /* sentinel */
694 struct intel_gt *gt = arg;
695 struct intel_engine_cs *engine;
696 enum intel_engine_id id;
699 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
700 * of invalid commands in user batches that will cause a GPU hang.
701 * This is a faster mechanism than using hangcheck/heartbeats, but
702 * only detects problems the HW knows about -- it will not warn when
703 * we kill the HW!
705 * To verify our detection and reset, we throw some invalid commands
706 * at the HW and wait for the interrupt.
709 if (!intel_has_reset_engine(gt))
710 return 0;
712 for_each_engine(engine, gt, id) {
713 const struct error_phase *p;
714 int err = 0;
716 st_engine_heartbeat_disable(engine);
718 for (p = phases; p->error[0] != GOOD; p++) {
719 struct i915_request *client[ARRAY_SIZE(phases->error)];
720 u32 *cs;
721 int i;
723 memset(client, 0, sizeof(*client));
724 for (i = 0; i < ARRAY_SIZE(client); i++) {
725 struct intel_context *ce;
726 struct i915_request *rq;
728 ce = intel_context_create(engine);
729 if (IS_ERR(ce)) {
730 err = PTR_ERR(ce);
731 goto out;
734 rq = intel_context_create_request(ce);
735 intel_context_put(ce);
736 if (IS_ERR(rq)) {
737 err = PTR_ERR(rq);
738 goto out;
741 if (rq->engine->emit_init_breadcrumb) {
742 err = rq->engine->emit_init_breadcrumb(rq);
743 if (err) {
744 i915_request_add(rq);
745 goto out;
749 cs = intel_ring_begin(rq, 2);
750 if (IS_ERR(cs)) {
751 i915_request_add(rq);
752 err = PTR_ERR(cs);
753 goto out;
756 if (p->error[i]) {
757 *cs++ = 0xdeadbeef;
758 *cs++ = 0xdeadbeef;
759 } else {
760 *cs++ = MI_NOOP;
761 *cs++ = MI_NOOP;
764 client[i] = i915_request_get(rq);
765 i915_request_add(rq);
768 err = wait_for_submit(engine, client[0], HZ / 2);
769 if (err) {
770 pr_err("%s: first request did not start within time!\n",
771 engine->name);
772 err = -ETIME;
773 goto out;
776 for (i = 0; i < ARRAY_SIZE(client); i++) {
777 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
778 pr_debug("%s: %s request incomplete!\n",
779 engine->name,
780 error_repr(p->error[i]));
782 if (!i915_request_started(client[i])) {
783 pr_err("%s: %s request not started!\n",
784 engine->name,
785 error_repr(p->error[i]));
786 err = -ETIME;
787 goto out;
790 /* Kick the tasklet to process the error */
791 intel_engine_flush_submission(engine);
792 if (client[i]->fence.error != p->error[i]) {
793 pr_err("%s: %s request (%s) with wrong error code: %d\n",
794 engine->name,
795 error_repr(p->error[i]),
796 i915_request_completed(client[i]) ? "completed" : "running",
797 client[i]->fence.error);
798 err = -EINVAL;
799 goto out;
803 out:
804 for (i = 0; i < ARRAY_SIZE(client); i++)
805 if (client[i])
806 i915_request_put(client[i]);
807 if (err) {
808 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
809 engine->name, p - phases,
810 p->error[0], p->error[1]);
811 break;
815 st_engine_heartbeat_enable(engine);
816 if (err) {
817 intel_gt_set_wedged(gt);
818 return err;
822 return 0;
825 static int
826 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
828 u32 *cs;
830 cs = intel_ring_begin(rq, 10);
831 if (IS_ERR(cs))
832 return PTR_ERR(cs);
834 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
836 *cs++ = MI_SEMAPHORE_WAIT |
837 MI_SEMAPHORE_GLOBAL_GTT |
838 MI_SEMAPHORE_POLL |
839 MI_SEMAPHORE_SAD_NEQ_SDD;
840 *cs++ = 0;
841 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
842 *cs++ = 0;
844 if (idx > 0) {
845 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
846 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
847 *cs++ = 0;
848 *cs++ = 1;
849 } else {
850 *cs++ = MI_NOOP;
851 *cs++ = MI_NOOP;
852 *cs++ = MI_NOOP;
853 *cs++ = MI_NOOP;
856 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
858 intel_ring_advance(rq, cs);
859 return 0;
862 static struct i915_request *
863 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
865 struct intel_context *ce;
866 struct i915_request *rq;
867 int err;
869 ce = intel_context_create(engine);
870 if (IS_ERR(ce))
871 return ERR_CAST(ce);
873 rq = intel_context_create_request(ce);
874 if (IS_ERR(rq))
875 goto out_ce;
877 err = 0;
878 if (rq->engine->emit_init_breadcrumb)
879 err = rq->engine->emit_init_breadcrumb(rq);
880 if (err == 0)
881 err = emit_semaphore_chain(rq, vma, idx);
882 if (err == 0)
883 i915_request_get(rq);
884 i915_request_add(rq);
885 if (err)
886 rq = ERR_PTR(err);
888 out_ce:
889 intel_context_put(ce);
890 return rq;
893 static int
894 release_queue(struct intel_engine_cs *engine,
895 struct i915_vma *vma,
896 int idx, int prio)
898 struct i915_sched_attr attr = {
899 .priority = prio,
901 struct i915_request *rq;
902 u32 *cs;
904 rq = intel_engine_create_kernel_request(engine);
905 if (IS_ERR(rq))
906 return PTR_ERR(rq);
908 cs = intel_ring_begin(rq, 4);
909 if (IS_ERR(cs)) {
910 i915_request_add(rq);
911 return PTR_ERR(cs);
914 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
915 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
916 *cs++ = 0;
917 *cs++ = 1;
919 intel_ring_advance(rq, cs);
921 i915_request_get(rq);
922 i915_request_add(rq);
924 local_bh_disable();
925 engine->schedule(rq, &attr);
926 local_bh_enable(); /* kick tasklet */
928 i915_request_put(rq);
930 return 0;
933 static int
934 slice_semaphore_queue(struct intel_engine_cs *outer,
935 struct i915_vma *vma,
936 int count)
938 struct intel_engine_cs *engine;
939 struct i915_request *head;
940 enum intel_engine_id id;
941 int err, i, n = 0;
943 head = semaphore_queue(outer, vma, n++);
944 if (IS_ERR(head))
945 return PTR_ERR(head);
947 for_each_engine(engine, outer->gt, id) {
948 for (i = 0; i < count; i++) {
949 struct i915_request *rq;
951 rq = semaphore_queue(engine, vma, n++);
952 if (IS_ERR(rq)) {
953 err = PTR_ERR(rq);
954 goto out;
957 i915_request_put(rq);
961 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
962 if (err)
963 goto out;
965 if (i915_request_wait(head, 0,
966 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
967 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
968 count, n);
969 GEM_TRACE_DUMP();
970 intel_gt_set_wedged(outer->gt);
971 err = -EIO;
974 out:
975 i915_request_put(head);
976 return err;
979 static int live_timeslice_preempt(void *arg)
981 struct intel_gt *gt = arg;
982 struct drm_i915_gem_object *obj;
983 struct intel_engine_cs *engine;
984 enum intel_engine_id id;
985 struct i915_vma *vma;
986 void *vaddr;
987 int err = 0;
990 * If a request takes too long, we would like to give other users
991 * a fair go on the GPU. In particular, users may create batches
992 * that wait upon external input, where that input may even be
993 * supplied by another GPU job. To avoid blocking forever, we
994 * need to preempt the current task and replace it with another
995 * ready task.
997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
998 return 0;
1000 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1001 if (IS_ERR(obj))
1002 return PTR_ERR(obj);
1004 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1005 if (IS_ERR(vma)) {
1006 err = PTR_ERR(vma);
1007 goto err_obj;
1010 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1011 if (IS_ERR(vaddr)) {
1012 err = PTR_ERR(vaddr);
1013 goto err_obj;
1016 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1017 if (err)
1018 goto err_map;
1020 err = i915_vma_sync(vma);
1021 if (err)
1022 goto err_pin;
1024 for_each_engine(engine, gt, id) {
1025 if (!intel_engine_has_preemption(engine))
1026 continue;
1028 memset(vaddr, 0, PAGE_SIZE);
1030 st_engine_heartbeat_disable(engine);
1031 err = slice_semaphore_queue(engine, vma, 5);
1032 st_engine_heartbeat_enable(engine);
1033 if (err)
1034 goto err_pin;
1036 if (igt_flush_test(gt->i915)) {
1037 err = -EIO;
1038 goto err_pin;
1042 err_pin:
1043 i915_vma_unpin(vma);
1044 err_map:
1045 i915_gem_object_unpin_map(obj);
1046 err_obj:
1047 i915_gem_object_put(obj);
1048 return err;
1051 static struct i915_request *
1052 create_rewinder(struct intel_context *ce,
1053 struct i915_request *wait,
1054 void *slot, int idx)
1056 const u32 offset =
1057 i915_ggtt_offset(ce->engine->status_page.vma) +
1058 offset_in_page(slot);
1059 struct i915_request *rq;
1060 u32 *cs;
1061 int err;
1063 rq = intel_context_create_request(ce);
1064 if (IS_ERR(rq))
1065 return rq;
1067 if (wait) {
1068 err = i915_request_await_dma_fence(rq, &wait->fence);
1069 if (err)
1070 goto err;
1073 cs = intel_ring_begin(rq, 14);
1074 if (IS_ERR(cs)) {
1075 err = PTR_ERR(cs);
1076 goto err;
1079 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1080 *cs++ = MI_NOOP;
1082 *cs++ = MI_SEMAPHORE_WAIT |
1083 MI_SEMAPHORE_GLOBAL_GTT |
1084 MI_SEMAPHORE_POLL |
1085 MI_SEMAPHORE_SAD_GTE_SDD;
1086 *cs++ = idx;
1087 *cs++ = offset;
1088 *cs++ = 0;
1090 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1091 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1092 *cs++ = offset + idx * sizeof(u32);
1093 *cs++ = 0;
1095 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1096 *cs++ = offset;
1097 *cs++ = 0;
1098 *cs++ = idx + 1;
1100 intel_ring_advance(rq, cs);
1102 rq->sched.attr.priority = I915_PRIORITY_MASK;
1103 err = 0;
1104 err:
1105 i915_request_get(rq);
1106 i915_request_add(rq);
1107 if (err) {
1108 i915_request_put(rq);
1109 return ERR_PTR(err);
1112 return rq;
1115 static int live_timeslice_rewind(void *arg)
1117 struct intel_gt *gt = arg;
1118 struct intel_engine_cs *engine;
1119 enum intel_engine_id id;
1122 * The usual presumption on timeslice expiration is that we replace
1123 * the active context with another. However, given a chain of
1124 * dependencies we may end up with replacing the context with itself,
1125 * but only a few of those requests, forcing us to rewind the
1126 * RING_TAIL of the original request.
1128 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1129 return 0;
1131 for_each_engine(engine, gt, id) {
1132 enum { A1, A2, B1 };
1133 enum { X = 1, Z, Y };
1134 struct i915_request *rq[3] = {};
1135 struct intel_context *ce;
1136 unsigned long timeslice;
1137 int i, err = 0;
1138 u32 *slot;
1140 if (!intel_engine_has_timeslices(engine))
1141 continue;
1144 * A:rq1 -- semaphore wait, timestamp X
1145 * A:rq2 -- write timestamp Y
1147 * B:rq1 [await A:rq1] -- write timestamp Z
1149 * Force timeslice, release semaphore.
1151 * Expect execution/evaluation order XZY
1154 st_engine_heartbeat_disable(engine);
1155 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1157 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1159 ce = intel_context_create(engine);
1160 if (IS_ERR(ce)) {
1161 err = PTR_ERR(ce);
1162 goto err;
1165 rq[A1] = create_rewinder(ce, NULL, slot, X);
1166 if (IS_ERR(rq[A1])) {
1167 intel_context_put(ce);
1168 goto err;
1171 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1172 intel_context_put(ce);
1173 if (IS_ERR(rq[A2]))
1174 goto err;
1176 err = wait_for_submit(engine, rq[A2], HZ / 2);
1177 if (err) {
1178 pr_err("%s: failed to submit first context\n",
1179 engine->name);
1180 goto err;
1183 ce = intel_context_create(engine);
1184 if (IS_ERR(ce)) {
1185 err = PTR_ERR(ce);
1186 goto err;
1189 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1190 intel_context_put(ce);
1191 if (IS_ERR(rq[2]))
1192 goto err;
1194 err = wait_for_submit(engine, rq[B1], HZ / 2);
1195 if (err) {
1196 pr_err("%s: failed to submit second context\n",
1197 engine->name);
1198 goto err;
1201 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1202 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1203 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1204 /* Wait for the timeslice to kick in */
1205 del_timer(&engine->execlists.timer);
1206 tasklet_hi_schedule(&engine->execlists.tasklet);
1207 intel_engine_flush_submission(engine);
1209 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1210 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1211 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1212 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1214 /* Release the hounds! */
1215 slot[0] = 1;
1216 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1218 for (i = 1; i <= 3; i++) {
1219 unsigned long timeout = jiffies + HZ / 2;
1221 while (!READ_ONCE(slot[i]) &&
1222 time_before(jiffies, timeout))
1225 if (!time_before(jiffies, timeout)) {
1226 pr_err("%s: rq[%d] timed out\n",
1227 engine->name, i - 1);
1228 err = -ETIME;
1229 goto err;
1232 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1235 /* XZY: XZ < XY */
1236 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1237 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1238 engine->name,
1239 slot[Z] - slot[X],
1240 slot[Y] - slot[X]);
1241 err = -EINVAL;
1244 err:
1245 memset32(&slot[0], -1, 4);
1246 wmb();
1248 engine->props.timeslice_duration_ms = timeslice;
1249 st_engine_heartbeat_enable(engine);
1250 for (i = 0; i < 3; i++)
1251 i915_request_put(rq[i]);
1252 if (igt_flush_test(gt->i915))
1253 err = -EIO;
1254 if (err)
1255 return err;
1258 return 0;
1261 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1263 struct i915_request *rq;
1265 rq = intel_engine_create_kernel_request(engine);
1266 if (IS_ERR(rq))
1267 return rq;
1269 i915_request_get(rq);
1270 i915_request_add(rq);
1272 return rq;
1275 static long slice_timeout(struct intel_engine_cs *engine)
1277 long timeout;
1279 /* Enough time for a timeslice to kick in, and kick out */
1280 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1282 /* Enough time for the nop request to complete */
1283 timeout += HZ / 5;
1285 return timeout + 1;
1288 static int live_timeslice_queue(void *arg)
1290 struct intel_gt *gt = arg;
1291 struct drm_i915_gem_object *obj;
1292 struct intel_engine_cs *engine;
1293 enum intel_engine_id id;
1294 struct i915_vma *vma;
1295 void *vaddr;
1296 int err = 0;
1299 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1300 * timeslicing between them disabled, we *do* enable timeslicing
1301 * if the queue demands it. (Normally, we do not submit if
1302 * ELSP[1] is already occupied, so must rely on timeslicing to
1303 * eject ELSP[0] in favour of the queue.)
1305 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1306 return 0;
1308 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1309 if (IS_ERR(obj))
1310 return PTR_ERR(obj);
1312 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1313 if (IS_ERR(vma)) {
1314 err = PTR_ERR(vma);
1315 goto err_obj;
1318 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1319 if (IS_ERR(vaddr)) {
1320 err = PTR_ERR(vaddr);
1321 goto err_obj;
1324 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1325 if (err)
1326 goto err_map;
1328 err = i915_vma_sync(vma);
1329 if (err)
1330 goto err_pin;
1332 for_each_engine(engine, gt, id) {
1333 struct i915_sched_attr attr = {
1334 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1336 struct i915_request *rq, *nop;
1338 if (!intel_engine_has_preemption(engine))
1339 continue;
1341 st_engine_heartbeat_disable(engine);
1342 memset(vaddr, 0, PAGE_SIZE);
1344 /* ELSP[0]: semaphore wait */
1345 rq = semaphore_queue(engine, vma, 0);
1346 if (IS_ERR(rq)) {
1347 err = PTR_ERR(rq);
1348 goto err_heartbeat;
1350 engine->schedule(rq, &attr);
1351 err = wait_for_submit(engine, rq, HZ / 2);
1352 if (err) {
1353 pr_err("%s: Timed out trying to submit semaphores\n",
1354 engine->name);
1355 goto err_rq;
1358 /* ELSP[1]: nop request */
1359 nop = nop_request(engine);
1360 if (IS_ERR(nop)) {
1361 err = PTR_ERR(nop);
1362 goto err_rq;
1364 err = wait_for_submit(engine, nop, HZ / 2);
1365 i915_request_put(nop);
1366 if (err) {
1367 pr_err("%s: Timed out trying to submit nop\n",
1368 engine->name);
1369 goto err_rq;
1372 GEM_BUG_ON(i915_request_completed(rq));
1373 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1375 /* Queue: semaphore signal, matching priority as semaphore */
1376 err = release_queue(engine, vma, 1, effective_prio(rq));
1377 if (err)
1378 goto err_rq;
1380 /* Wait until we ack the release_queue and start timeslicing */
1381 do {
1382 cond_resched();
1383 intel_engine_flush_submission(engine);
1384 } while (READ_ONCE(engine->execlists.pending[0]));
1386 /* Timeslice every jiffy, so within 2 we should signal */
1387 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1388 struct drm_printer p =
1389 drm_info_printer(gt->i915->drm.dev);
1391 pr_err("%s: Failed to timeslice into queue\n",
1392 engine->name);
1393 intel_engine_dump(engine, &p,
1394 "%s\n", engine->name);
1396 memset(vaddr, 0xff, PAGE_SIZE);
1397 err = -EIO;
1399 err_rq:
1400 i915_request_put(rq);
1401 err_heartbeat:
1402 st_engine_heartbeat_enable(engine);
1403 if (err)
1404 break;
1407 err_pin:
1408 i915_vma_unpin(vma);
1409 err_map:
1410 i915_gem_object_unpin_map(obj);
1411 err_obj:
1412 i915_gem_object_put(obj);
1413 return err;
1416 static int live_timeslice_nopreempt(void *arg)
1418 struct intel_gt *gt = arg;
1419 struct intel_engine_cs *engine;
1420 enum intel_engine_id id;
1421 struct igt_spinner spin;
1422 int err = 0;
1425 * We should not timeslice into a request that is marked with
1426 * I915_REQUEST_NOPREEMPT.
1428 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1429 return 0;
1431 if (igt_spinner_init(&spin, gt))
1432 return -ENOMEM;
1434 for_each_engine(engine, gt, id) {
1435 struct intel_context *ce;
1436 struct i915_request *rq;
1437 unsigned long timeslice;
1439 if (!intel_engine_has_preemption(engine))
1440 continue;
1442 ce = intel_context_create(engine);
1443 if (IS_ERR(ce)) {
1444 err = PTR_ERR(ce);
1445 break;
1448 st_engine_heartbeat_disable(engine);
1449 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1451 /* Create an unpreemptible spinner */
1453 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1454 intel_context_put(ce);
1455 if (IS_ERR(rq)) {
1456 err = PTR_ERR(rq);
1457 goto out_heartbeat;
1460 i915_request_get(rq);
1461 i915_request_add(rq);
1463 if (!igt_wait_for_spinner(&spin, rq)) {
1464 i915_request_put(rq);
1465 err = -ETIME;
1466 goto out_spin;
1469 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1470 i915_request_put(rq);
1472 /* Followed by a maximum priority barrier (heartbeat) */
1474 ce = intel_context_create(engine);
1475 if (IS_ERR(ce)) {
1476 err = PTR_ERR(ce);
1477 goto out_spin;
1480 rq = intel_context_create_request(ce);
1481 intel_context_put(ce);
1482 if (IS_ERR(rq)) {
1483 err = PTR_ERR(rq);
1484 goto out_spin;
1487 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1488 i915_request_get(rq);
1489 i915_request_add(rq);
1492 * Wait until the barrier is in ELSP, and we know timeslicing
1493 * will have been activated.
1495 if (wait_for_submit(engine, rq, HZ / 2)) {
1496 i915_request_put(rq);
1497 err = -ETIME;
1498 goto out_spin;
1502 * Since the ELSP[0] request is unpreemptible, it should not
1503 * allow the maximum priority barrier through. Wait long
1504 * enough to see if it is timesliced in by mistake.
1506 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1507 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1508 engine->name);
1509 err = -EINVAL;
1511 i915_request_put(rq);
1513 out_spin:
1514 igt_spinner_end(&spin);
1515 out_heartbeat:
1516 xchg(&engine->props.timeslice_duration_ms, timeslice);
1517 st_engine_heartbeat_enable(engine);
1518 if (err)
1519 break;
1521 if (igt_flush_test(gt->i915)) {
1522 err = -EIO;
1523 break;
1527 igt_spinner_fini(&spin);
1528 return err;
1531 static int live_busywait_preempt(void *arg)
1533 struct intel_gt *gt = arg;
1534 struct i915_gem_context *ctx_hi, *ctx_lo;
1535 struct intel_engine_cs *engine;
1536 struct drm_i915_gem_object *obj;
1537 struct i915_vma *vma;
1538 enum intel_engine_id id;
1539 int err = -ENOMEM;
1540 u32 *map;
1543 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1544 * preempt the busywaits used to synchronise between rings.
1547 ctx_hi = kernel_context(gt->i915);
1548 if (!ctx_hi)
1549 return -ENOMEM;
1550 ctx_hi->sched.priority =
1551 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1553 ctx_lo = kernel_context(gt->i915);
1554 if (!ctx_lo)
1555 goto err_ctx_hi;
1556 ctx_lo->sched.priority =
1557 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1559 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1560 if (IS_ERR(obj)) {
1561 err = PTR_ERR(obj);
1562 goto err_ctx_lo;
1565 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1566 if (IS_ERR(map)) {
1567 err = PTR_ERR(map);
1568 goto err_obj;
1571 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1572 if (IS_ERR(vma)) {
1573 err = PTR_ERR(vma);
1574 goto err_map;
1577 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1578 if (err)
1579 goto err_map;
1581 err = i915_vma_sync(vma);
1582 if (err)
1583 goto err_vma;
1585 for_each_engine(engine, gt, id) {
1586 struct i915_request *lo, *hi;
1587 struct igt_live_test t;
1588 u32 *cs;
1590 if (!intel_engine_has_preemption(engine))
1591 continue;
1593 if (!intel_engine_can_store_dword(engine))
1594 continue;
1596 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1597 err = -EIO;
1598 goto err_vma;
1602 * We create two requests. The low priority request
1603 * busywaits on a semaphore (inside the ringbuffer where
1604 * is should be preemptible) and the high priority requests
1605 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1606 * allowing the first request to complete. If preemption
1607 * fails, we hang instead.
1610 lo = igt_request_alloc(ctx_lo, engine);
1611 if (IS_ERR(lo)) {
1612 err = PTR_ERR(lo);
1613 goto err_vma;
1616 cs = intel_ring_begin(lo, 8);
1617 if (IS_ERR(cs)) {
1618 err = PTR_ERR(cs);
1619 i915_request_add(lo);
1620 goto err_vma;
1623 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1624 *cs++ = i915_ggtt_offset(vma);
1625 *cs++ = 0;
1626 *cs++ = 1;
1628 /* XXX Do we need a flush + invalidate here? */
1630 *cs++ = MI_SEMAPHORE_WAIT |
1631 MI_SEMAPHORE_GLOBAL_GTT |
1632 MI_SEMAPHORE_POLL |
1633 MI_SEMAPHORE_SAD_EQ_SDD;
1634 *cs++ = 0;
1635 *cs++ = i915_ggtt_offset(vma);
1636 *cs++ = 0;
1638 intel_ring_advance(lo, cs);
1640 i915_request_get(lo);
1641 i915_request_add(lo);
1643 if (wait_for(READ_ONCE(*map), 10)) {
1644 i915_request_put(lo);
1645 err = -ETIMEDOUT;
1646 goto err_vma;
1649 /* Low priority request should be busywaiting now */
1650 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1651 i915_request_put(lo);
1652 pr_err("%s: Busywaiting request did not!\n",
1653 engine->name);
1654 err = -EIO;
1655 goto err_vma;
1658 hi = igt_request_alloc(ctx_hi, engine);
1659 if (IS_ERR(hi)) {
1660 err = PTR_ERR(hi);
1661 i915_request_put(lo);
1662 goto err_vma;
1665 cs = intel_ring_begin(hi, 4);
1666 if (IS_ERR(cs)) {
1667 err = PTR_ERR(cs);
1668 i915_request_add(hi);
1669 i915_request_put(lo);
1670 goto err_vma;
1673 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1674 *cs++ = i915_ggtt_offset(vma);
1675 *cs++ = 0;
1676 *cs++ = 0;
1678 intel_ring_advance(hi, cs);
1679 i915_request_add(hi);
1681 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1682 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1684 pr_err("%s: Failed to preempt semaphore busywait!\n",
1685 engine->name);
1687 intel_engine_dump(engine, &p, "%s\n", engine->name);
1688 GEM_TRACE_DUMP();
1690 i915_request_put(lo);
1691 intel_gt_set_wedged(gt);
1692 err = -EIO;
1693 goto err_vma;
1695 GEM_BUG_ON(READ_ONCE(*map));
1696 i915_request_put(lo);
1698 if (igt_live_test_end(&t)) {
1699 err = -EIO;
1700 goto err_vma;
1704 err = 0;
1705 err_vma:
1706 i915_vma_unpin(vma);
1707 err_map:
1708 i915_gem_object_unpin_map(obj);
1709 err_obj:
1710 i915_gem_object_put(obj);
1711 err_ctx_lo:
1712 kernel_context_close(ctx_lo);
1713 err_ctx_hi:
1714 kernel_context_close(ctx_hi);
1715 return err;
1718 static struct i915_request *
1719 spinner_create_request(struct igt_spinner *spin,
1720 struct i915_gem_context *ctx,
1721 struct intel_engine_cs *engine,
1722 u32 arb)
1724 struct intel_context *ce;
1725 struct i915_request *rq;
1727 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1728 if (IS_ERR(ce))
1729 return ERR_CAST(ce);
1731 rq = igt_spinner_create_request(spin, ce, arb);
1732 intel_context_put(ce);
1733 return rq;
1736 static int live_preempt(void *arg)
1738 struct intel_gt *gt = arg;
1739 struct i915_gem_context *ctx_hi, *ctx_lo;
1740 struct igt_spinner spin_hi, spin_lo;
1741 struct intel_engine_cs *engine;
1742 enum intel_engine_id id;
1743 int err = -ENOMEM;
1745 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1746 return 0;
1748 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1749 pr_err("Logical preemption supported, but not exposed\n");
1751 if (igt_spinner_init(&spin_hi, gt))
1752 return -ENOMEM;
1754 if (igt_spinner_init(&spin_lo, gt))
1755 goto err_spin_hi;
1757 ctx_hi = kernel_context(gt->i915);
1758 if (!ctx_hi)
1759 goto err_spin_lo;
1760 ctx_hi->sched.priority =
1761 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1763 ctx_lo = kernel_context(gt->i915);
1764 if (!ctx_lo)
1765 goto err_ctx_hi;
1766 ctx_lo->sched.priority =
1767 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1769 for_each_engine(engine, gt, id) {
1770 struct igt_live_test t;
1771 struct i915_request *rq;
1773 if (!intel_engine_has_preemption(engine))
1774 continue;
1776 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1777 err = -EIO;
1778 goto err_ctx_lo;
1781 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1782 MI_ARB_CHECK);
1783 if (IS_ERR(rq)) {
1784 err = PTR_ERR(rq);
1785 goto err_ctx_lo;
1788 i915_request_add(rq);
1789 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1790 GEM_TRACE("lo spinner failed to start\n");
1791 GEM_TRACE_DUMP();
1792 intel_gt_set_wedged(gt);
1793 err = -EIO;
1794 goto err_ctx_lo;
1797 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1798 MI_ARB_CHECK);
1799 if (IS_ERR(rq)) {
1800 igt_spinner_end(&spin_lo);
1801 err = PTR_ERR(rq);
1802 goto err_ctx_lo;
1805 i915_request_add(rq);
1806 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1807 GEM_TRACE("hi spinner failed to start\n");
1808 GEM_TRACE_DUMP();
1809 intel_gt_set_wedged(gt);
1810 err = -EIO;
1811 goto err_ctx_lo;
1814 igt_spinner_end(&spin_hi);
1815 igt_spinner_end(&spin_lo);
1817 if (igt_live_test_end(&t)) {
1818 err = -EIO;
1819 goto err_ctx_lo;
1823 err = 0;
1824 err_ctx_lo:
1825 kernel_context_close(ctx_lo);
1826 err_ctx_hi:
1827 kernel_context_close(ctx_hi);
1828 err_spin_lo:
1829 igt_spinner_fini(&spin_lo);
1830 err_spin_hi:
1831 igt_spinner_fini(&spin_hi);
1832 return err;
1835 static int live_late_preempt(void *arg)
1837 struct intel_gt *gt = arg;
1838 struct i915_gem_context *ctx_hi, *ctx_lo;
1839 struct igt_spinner spin_hi, spin_lo;
1840 struct intel_engine_cs *engine;
1841 struct i915_sched_attr attr = {};
1842 enum intel_engine_id id;
1843 int err = -ENOMEM;
1845 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1846 return 0;
1848 if (igt_spinner_init(&spin_hi, gt))
1849 return -ENOMEM;
1851 if (igt_spinner_init(&spin_lo, gt))
1852 goto err_spin_hi;
1854 ctx_hi = kernel_context(gt->i915);
1855 if (!ctx_hi)
1856 goto err_spin_lo;
1858 ctx_lo = kernel_context(gt->i915);
1859 if (!ctx_lo)
1860 goto err_ctx_hi;
1862 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1863 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1865 for_each_engine(engine, gt, id) {
1866 struct igt_live_test t;
1867 struct i915_request *rq;
1869 if (!intel_engine_has_preemption(engine))
1870 continue;
1872 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1873 err = -EIO;
1874 goto err_ctx_lo;
1877 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1878 MI_ARB_CHECK);
1879 if (IS_ERR(rq)) {
1880 err = PTR_ERR(rq);
1881 goto err_ctx_lo;
1884 i915_request_add(rq);
1885 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1886 pr_err("First context failed to start\n");
1887 goto err_wedged;
1890 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1891 MI_NOOP);
1892 if (IS_ERR(rq)) {
1893 igt_spinner_end(&spin_lo);
1894 err = PTR_ERR(rq);
1895 goto err_ctx_lo;
1898 i915_request_add(rq);
1899 if (igt_wait_for_spinner(&spin_hi, rq)) {
1900 pr_err("Second context overtook first?\n");
1901 goto err_wedged;
1904 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1905 engine->schedule(rq, &attr);
1907 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1908 pr_err("High priority context failed to preempt the low priority context\n");
1909 GEM_TRACE_DUMP();
1910 goto err_wedged;
1913 igt_spinner_end(&spin_hi);
1914 igt_spinner_end(&spin_lo);
1916 if (igt_live_test_end(&t)) {
1917 err = -EIO;
1918 goto err_ctx_lo;
1922 err = 0;
1923 err_ctx_lo:
1924 kernel_context_close(ctx_lo);
1925 err_ctx_hi:
1926 kernel_context_close(ctx_hi);
1927 err_spin_lo:
1928 igt_spinner_fini(&spin_lo);
1929 err_spin_hi:
1930 igt_spinner_fini(&spin_hi);
1931 return err;
1933 err_wedged:
1934 igt_spinner_end(&spin_hi);
1935 igt_spinner_end(&spin_lo);
1936 intel_gt_set_wedged(gt);
1937 err = -EIO;
1938 goto err_ctx_lo;
1941 struct preempt_client {
1942 struct igt_spinner spin;
1943 struct i915_gem_context *ctx;
1946 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1948 c->ctx = kernel_context(gt->i915);
1949 if (!c->ctx)
1950 return -ENOMEM;
1952 if (igt_spinner_init(&c->spin, gt))
1953 goto err_ctx;
1955 return 0;
1957 err_ctx:
1958 kernel_context_close(c->ctx);
1959 return -ENOMEM;
1962 static void preempt_client_fini(struct preempt_client *c)
1964 igt_spinner_fini(&c->spin);
1965 kernel_context_close(c->ctx);
1968 static int live_nopreempt(void *arg)
1970 struct intel_gt *gt = arg;
1971 struct intel_engine_cs *engine;
1972 struct preempt_client a, b;
1973 enum intel_engine_id id;
1974 int err = -ENOMEM;
1977 * Verify that we can disable preemption for an individual request
1978 * that may be being observed and not want to be interrupted.
1981 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1982 return 0;
1984 if (preempt_client_init(gt, &a))
1985 return -ENOMEM;
1986 if (preempt_client_init(gt, &b))
1987 goto err_client_a;
1988 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1990 for_each_engine(engine, gt, id) {
1991 struct i915_request *rq_a, *rq_b;
1993 if (!intel_engine_has_preemption(engine))
1994 continue;
1996 engine->execlists.preempt_hang.count = 0;
1998 rq_a = spinner_create_request(&a.spin,
1999 a.ctx, engine,
2000 MI_ARB_CHECK);
2001 if (IS_ERR(rq_a)) {
2002 err = PTR_ERR(rq_a);
2003 goto err_client_b;
2006 /* Low priority client, but unpreemptable! */
2007 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
2009 i915_request_add(rq_a);
2010 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2011 pr_err("First client failed to start\n");
2012 goto err_wedged;
2015 rq_b = spinner_create_request(&b.spin,
2016 b.ctx, engine,
2017 MI_ARB_CHECK);
2018 if (IS_ERR(rq_b)) {
2019 err = PTR_ERR(rq_b);
2020 goto err_client_b;
2023 i915_request_add(rq_b);
2025 /* B is much more important than A! (But A is unpreemptable.) */
2026 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2028 /* Wait long enough for preemption and timeslicing */
2029 if (igt_wait_for_spinner(&b.spin, rq_b)) {
2030 pr_err("Second client started too early!\n");
2031 goto err_wedged;
2034 igt_spinner_end(&a.spin);
2036 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2037 pr_err("Second client failed to start\n");
2038 goto err_wedged;
2041 igt_spinner_end(&b.spin);
2043 if (engine->execlists.preempt_hang.count) {
2044 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2045 engine->execlists.preempt_hang.count);
2046 err = -EINVAL;
2047 goto err_wedged;
2050 if (igt_flush_test(gt->i915))
2051 goto err_wedged;
2054 err = 0;
2055 err_client_b:
2056 preempt_client_fini(&b);
2057 err_client_a:
2058 preempt_client_fini(&a);
2059 return err;
2061 err_wedged:
2062 igt_spinner_end(&b.spin);
2063 igt_spinner_end(&a.spin);
2064 intel_gt_set_wedged(gt);
2065 err = -EIO;
2066 goto err_client_b;
2069 struct live_preempt_cancel {
2070 struct intel_engine_cs *engine;
2071 struct preempt_client a, b;
2074 static int __cancel_active0(struct live_preempt_cancel *arg)
2076 struct i915_request *rq;
2077 struct igt_live_test t;
2078 int err;
2080 /* Preempt cancel of ELSP0 */
2081 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2082 if (igt_live_test_begin(&t, arg->engine->i915,
2083 __func__, arg->engine->name))
2084 return -EIO;
2086 rq = spinner_create_request(&arg->a.spin,
2087 arg->a.ctx, arg->engine,
2088 MI_ARB_CHECK);
2089 if (IS_ERR(rq))
2090 return PTR_ERR(rq);
2092 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2093 i915_request_get(rq);
2094 i915_request_add(rq);
2095 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2096 err = -EIO;
2097 goto out;
2100 intel_context_set_banned(rq->context);
2101 err = intel_engine_pulse(arg->engine);
2102 if (err)
2103 goto out;
2105 err = wait_for_reset(arg->engine, rq, HZ / 2);
2106 if (err) {
2107 pr_err("Cancelled inflight0 request did not reset\n");
2108 goto out;
2111 out:
2112 i915_request_put(rq);
2113 if (igt_live_test_end(&t))
2114 err = -EIO;
2115 return err;
2118 static int __cancel_active1(struct live_preempt_cancel *arg)
2120 struct i915_request *rq[2] = {};
2121 struct igt_live_test t;
2122 int err;
2124 /* Preempt cancel of ELSP1 */
2125 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2126 if (igt_live_test_begin(&t, arg->engine->i915,
2127 __func__, arg->engine->name))
2128 return -EIO;
2130 rq[0] = spinner_create_request(&arg->a.spin,
2131 arg->a.ctx, arg->engine,
2132 MI_NOOP); /* no preemption */
2133 if (IS_ERR(rq[0]))
2134 return PTR_ERR(rq[0]);
2136 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2137 i915_request_get(rq[0]);
2138 i915_request_add(rq[0]);
2139 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2140 err = -EIO;
2141 goto out;
2144 rq[1] = spinner_create_request(&arg->b.spin,
2145 arg->b.ctx, arg->engine,
2146 MI_ARB_CHECK);
2147 if (IS_ERR(rq[1])) {
2148 err = PTR_ERR(rq[1]);
2149 goto out;
2152 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2153 i915_request_get(rq[1]);
2154 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2155 i915_request_add(rq[1]);
2156 if (err)
2157 goto out;
2159 intel_context_set_banned(rq[1]->context);
2160 err = intel_engine_pulse(arg->engine);
2161 if (err)
2162 goto out;
2164 igt_spinner_end(&arg->a.spin);
2165 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2166 if (err)
2167 goto out;
2169 if (rq[0]->fence.error != 0) {
2170 pr_err("Normal inflight0 request did not complete\n");
2171 err = -EINVAL;
2172 goto out;
2175 if (rq[1]->fence.error != -EIO) {
2176 pr_err("Cancelled inflight1 request did not report -EIO\n");
2177 err = -EINVAL;
2178 goto out;
2181 out:
2182 i915_request_put(rq[1]);
2183 i915_request_put(rq[0]);
2184 if (igt_live_test_end(&t))
2185 err = -EIO;
2186 return err;
2189 static int __cancel_queued(struct live_preempt_cancel *arg)
2191 struct i915_request *rq[3] = {};
2192 struct igt_live_test t;
2193 int err;
2195 /* Full ELSP and one in the wings */
2196 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2197 if (igt_live_test_begin(&t, arg->engine->i915,
2198 __func__, arg->engine->name))
2199 return -EIO;
2201 rq[0] = spinner_create_request(&arg->a.spin,
2202 arg->a.ctx, arg->engine,
2203 MI_ARB_CHECK);
2204 if (IS_ERR(rq[0]))
2205 return PTR_ERR(rq[0]);
2207 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2208 i915_request_get(rq[0]);
2209 i915_request_add(rq[0]);
2210 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2211 err = -EIO;
2212 goto out;
2215 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2216 if (IS_ERR(rq[1])) {
2217 err = PTR_ERR(rq[1]);
2218 goto out;
2221 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2222 i915_request_get(rq[1]);
2223 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2224 i915_request_add(rq[1]);
2225 if (err)
2226 goto out;
2228 rq[2] = spinner_create_request(&arg->b.spin,
2229 arg->a.ctx, arg->engine,
2230 MI_ARB_CHECK);
2231 if (IS_ERR(rq[2])) {
2232 err = PTR_ERR(rq[2]);
2233 goto out;
2236 i915_request_get(rq[2]);
2237 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2238 i915_request_add(rq[2]);
2239 if (err)
2240 goto out;
2242 intel_context_set_banned(rq[2]->context);
2243 err = intel_engine_pulse(arg->engine);
2244 if (err)
2245 goto out;
2247 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2248 if (err)
2249 goto out;
2251 if (rq[0]->fence.error != -EIO) {
2252 pr_err("Cancelled inflight0 request did not report -EIO\n");
2253 err = -EINVAL;
2254 goto out;
2257 if (rq[1]->fence.error != 0) {
2258 pr_err("Normal inflight1 request did not complete\n");
2259 err = -EINVAL;
2260 goto out;
2263 if (rq[2]->fence.error != -EIO) {
2264 pr_err("Cancelled queued request did not report -EIO\n");
2265 err = -EINVAL;
2266 goto out;
2269 out:
2270 i915_request_put(rq[2]);
2271 i915_request_put(rq[1]);
2272 i915_request_put(rq[0]);
2273 if (igt_live_test_end(&t))
2274 err = -EIO;
2275 return err;
2278 static int __cancel_hostile(struct live_preempt_cancel *arg)
2280 struct i915_request *rq;
2281 int err;
2283 /* Preempt cancel non-preemptible spinner in ELSP0 */
2284 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2285 return 0;
2287 if (!intel_has_reset_engine(arg->engine->gt))
2288 return 0;
2290 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2291 rq = spinner_create_request(&arg->a.spin,
2292 arg->a.ctx, arg->engine,
2293 MI_NOOP); /* preemption disabled */
2294 if (IS_ERR(rq))
2295 return PTR_ERR(rq);
2297 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2298 i915_request_get(rq);
2299 i915_request_add(rq);
2300 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2301 err = -EIO;
2302 goto out;
2305 intel_context_set_banned(rq->context);
2306 err = intel_engine_pulse(arg->engine); /* force reset */
2307 if (err)
2308 goto out;
2310 err = wait_for_reset(arg->engine, rq, HZ / 2);
2311 if (err) {
2312 pr_err("Cancelled inflight0 request did not reset\n");
2313 goto out;
2316 out:
2317 i915_request_put(rq);
2318 if (igt_flush_test(arg->engine->i915))
2319 err = -EIO;
2320 return err;
2323 static int live_preempt_cancel(void *arg)
2325 struct intel_gt *gt = arg;
2326 struct live_preempt_cancel data;
2327 enum intel_engine_id id;
2328 int err = -ENOMEM;
2331 * To cancel an inflight context, we need to first remove it from the
2332 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2335 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2336 return 0;
2338 if (preempt_client_init(gt, &data.a))
2339 return -ENOMEM;
2340 if (preempt_client_init(gt, &data.b))
2341 goto err_client_a;
2343 for_each_engine(data.engine, gt, id) {
2344 if (!intel_engine_has_preemption(data.engine))
2345 continue;
2347 err = __cancel_active0(&data);
2348 if (err)
2349 goto err_wedged;
2351 err = __cancel_active1(&data);
2352 if (err)
2353 goto err_wedged;
2355 err = __cancel_queued(&data);
2356 if (err)
2357 goto err_wedged;
2359 err = __cancel_hostile(&data);
2360 if (err)
2361 goto err_wedged;
2364 err = 0;
2365 err_client_b:
2366 preempt_client_fini(&data.b);
2367 err_client_a:
2368 preempt_client_fini(&data.a);
2369 return err;
2371 err_wedged:
2372 GEM_TRACE_DUMP();
2373 igt_spinner_end(&data.b.spin);
2374 igt_spinner_end(&data.a.spin);
2375 intel_gt_set_wedged(gt);
2376 goto err_client_b;
2379 static int live_suppress_self_preempt(void *arg)
2381 struct intel_gt *gt = arg;
2382 struct intel_engine_cs *engine;
2383 struct i915_sched_attr attr = {
2384 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2386 struct preempt_client a, b;
2387 enum intel_engine_id id;
2388 int err = -ENOMEM;
2391 * Verify that if a preemption request does not cause a change in
2392 * the current execution order, the preempt-to-idle injection is
2393 * skipped and that we do not accidentally apply it after the CS
2394 * completion event.
2397 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2398 return 0;
2400 if (intel_uc_uses_guc_submission(&gt->uc))
2401 return 0; /* presume black blox */
2403 if (intel_vgpu_active(gt->i915))
2404 return 0; /* GVT forces single port & request submission */
2406 if (preempt_client_init(gt, &a))
2407 return -ENOMEM;
2408 if (preempt_client_init(gt, &b))
2409 goto err_client_a;
2411 for_each_engine(engine, gt, id) {
2412 struct i915_request *rq_a, *rq_b;
2413 int depth;
2415 if (!intel_engine_has_preemption(engine))
2416 continue;
2418 if (igt_flush_test(gt->i915))
2419 goto err_wedged;
2421 st_engine_heartbeat_disable(engine);
2422 engine->execlists.preempt_hang.count = 0;
2424 rq_a = spinner_create_request(&a.spin,
2425 a.ctx, engine,
2426 MI_NOOP);
2427 if (IS_ERR(rq_a)) {
2428 err = PTR_ERR(rq_a);
2429 st_engine_heartbeat_enable(engine);
2430 goto err_client_b;
2433 i915_request_add(rq_a);
2434 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2435 pr_err("First client failed to start\n");
2436 st_engine_heartbeat_enable(engine);
2437 goto err_wedged;
2440 /* Keep postponing the timer to avoid premature slicing */
2441 mod_timer(&engine->execlists.timer, jiffies + HZ);
2442 for (depth = 0; depth < 8; depth++) {
2443 rq_b = spinner_create_request(&b.spin,
2444 b.ctx, engine,
2445 MI_NOOP);
2446 if (IS_ERR(rq_b)) {
2447 err = PTR_ERR(rq_b);
2448 st_engine_heartbeat_enable(engine);
2449 goto err_client_b;
2451 i915_request_add(rq_b);
2453 GEM_BUG_ON(i915_request_completed(rq_a));
2454 engine->schedule(rq_a, &attr);
2455 igt_spinner_end(&a.spin);
2457 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2458 pr_err("Second client failed to start\n");
2459 st_engine_heartbeat_enable(engine);
2460 goto err_wedged;
2463 swap(a, b);
2464 rq_a = rq_b;
2466 igt_spinner_end(&a.spin);
2468 if (engine->execlists.preempt_hang.count) {
2469 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2470 engine->name,
2471 engine->execlists.preempt_hang.count,
2472 depth);
2473 st_engine_heartbeat_enable(engine);
2474 err = -EINVAL;
2475 goto err_client_b;
2478 st_engine_heartbeat_enable(engine);
2479 if (igt_flush_test(gt->i915))
2480 goto err_wedged;
2483 err = 0;
2484 err_client_b:
2485 preempt_client_fini(&b);
2486 err_client_a:
2487 preempt_client_fini(&a);
2488 return err;
2490 err_wedged:
2491 igt_spinner_end(&b.spin);
2492 igt_spinner_end(&a.spin);
2493 intel_gt_set_wedged(gt);
2494 err = -EIO;
2495 goto err_client_b;
2498 static int live_chain_preempt(void *arg)
2500 struct intel_gt *gt = arg;
2501 struct intel_engine_cs *engine;
2502 struct preempt_client hi, lo;
2503 enum intel_engine_id id;
2504 int err = -ENOMEM;
2507 * Build a chain AB...BA between two contexts (A, B) and request
2508 * preemption of the last request. It should then complete before
2509 * the previously submitted spinner in B.
2512 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2513 return 0;
2515 if (preempt_client_init(gt, &hi))
2516 return -ENOMEM;
2518 if (preempt_client_init(gt, &lo))
2519 goto err_client_hi;
2521 for_each_engine(engine, gt, id) {
2522 struct i915_sched_attr attr = {
2523 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2525 struct igt_live_test t;
2526 struct i915_request *rq;
2527 int ring_size, count, i;
2529 if (!intel_engine_has_preemption(engine))
2530 continue;
2532 rq = spinner_create_request(&lo.spin,
2533 lo.ctx, engine,
2534 MI_ARB_CHECK);
2535 if (IS_ERR(rq))
2536 goto err_wedged;
2538 i915_request_get(rq);
2539 i915_request_add(rq);
2541 ring_size = rq->wa_tail - rq->head;
2542 if (ring_size < 0)
2543 ring_size += rq->ring->size;
2544 ring_size = rq->ring->size / ring_size;
2545 pr_debug("%s(%s): Using maximum of %d requests\n",
2546 __func__, engine->name, ring_size);
2548 igt_spinner_end(&lo.spin);
2549 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2550 pr_err("Timed out waiting to flush %s\n", engine->name);
2551 i915_request_put(rq);
2552 goto err_wedged;
2554 i915_request_put(rq);
2556 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2557 err = -EIO;
2558 goto err_wedged;
2561 for_each_prime_number_from(count, 1, ring_size) {
2562 rq = spinner_create_request(&hi.spin,
2563 hi.ctx, engine,
2564 MI_ARB_CHECK);
2565 if (IS_ERR(rq))
2566 goto err_wedged;
2567 i915_request_add(rq);
2568 if (!igt_wait_for_spinner(&hi.spin, rq))
2569 goto err_wedged;
2571 rq = spinner_create_request(&lo.spin,
2572 lo.ctx, engine,
2573 MI_ARB_CHECK);
2574 if (IS_ERR(rq))
2575 goto err_wedged;
2576 i915_request_add(rq);
2578 for (i = 0; i < count; i++) {
2579 rq = igt_request_alloc(lo.ctx, engine);
2580 if (IS_ERR(rq))
2581 goto err_wedged;
2582 i915_request_add(rq);
2585 rq = igt_request_alloc(hi.ctx, engine);
2586 if (IS_ERR(rq))
2587 goto err_wedged;
2589 i915_request_get(rq);
2590 i915_request_add(rq);
2591 engine->schedule(rq, &attr);
2593 igt_spinner_end(&hi.spin);
2594 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2595 struct drm_printer p =
2596 drm_info_printer(gt->i915->drm.dev);
2598 pr_err("Failed to preempt over chain of %d\n",
2599 count);
2600 intel_engine_dump(engine, &p,
2601 "%s\n", engine->name);
2602 i915_request_put(rq);
2603 goto err_wedged;
2605 igt_spinner_end(&lo.spin);
2606 i915_request_put(rq);
2608 rq = igt_request_alloc(lo.ctx, engine);
2609 if (IS_ERR(rq))
2610 goto err_wedged;
2612 i915_request_get(rq);
2613 i915_request_add(rq);
2615 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2616 struct drm_printer p =
2617 drm_info_printer(gt->i915->drm.dev);
2619 pr_err("Failed to flush low priority chain of %d requests\n",
2620 count);
2621 intel_engine_dump(engine, &p,
2622 "%s\n", engine->name);
2624 i915_request_put(rq);
2625 goto err_wedged;
2627 i915_request_put(rq);
2630 if (igt_live_test_end(&t)) {
2631 err = -EIO;
2632 goto err_wedged;
2636 err = 0;
2637 err_client_lo:
2638 preempt_client_fini(&lo);
2639 err_client_hi:
2640 preempt_client_fini(&hi);
2641 return err;
2643 err_wedged:
2644 igt_spinner_end(&hi.spin);
2645 igt_spinner_end(&lo.spin);
2646 intel_gt_set_wedged(gt);
2647 err = -EIO;
2648 goto err_client_lo;
2651 static int create_gang(struct intel_engine_cs *engine,
2652 struct i915_request **prev)
2654 struct drm_i915_gem_object *obj;
2655 struct intel_context *ce;
2656 struct i915_request *rq;
2657 struct i915_vma *vma;
2658 u32 *cs;
2659 int err;
2661 ce = intel_context_create(engine);
2662 if (IS_ERR(ce))
2663 return PTR_ERR(ce);
2665 obj = i915_gem_object_create_internal(engine->i915, 4096);
2666 if (IS_ERR(obj)) {
2667 err = PTR_ERR(obj);
2668 goto err_ce;
2671 vma = i915_vma_instance(obj, ce->vm, NULL);
2672 if (IS_ERR(vma)) {
2673 err = PTR_ERR(vma);
2674 goto err_obj;
2677 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2678 if (err)
2679 goto err_obj;
2681 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2682 if (IS_ERR(cs))
2683 goto err_obj;
2685 /* Semaphore target: spin until zero */
2686 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2688 *cs++ = MI_SEMAPHORE_WAIT |
2689 MI_SEMAPHORE_POLL |
2690 MI_SEMAPHORE_SAD_EQ_SDD;
2691 *cs++ = 0;
2692 *cs++ = lower_32_bits(vma->node.start);
2693 *cs++ = upper_32_bits(vma->node.start);
2695 if (*prev) {
2696 u64 offset = (*prev)->batch->node.start;
2698 /* Terminate the spinner in the next lower priority batch. */
2699 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2700 *cs++ = lower_32_bits(offset);
2701 *cs++ = upper_32_bits(offset);
2702 *cs++ = 0;
2705 *cs++ = MI_BATCH_BUFFER_END;
2706 i915_gem_object_flush_map(obj);
2707 i915_gem_object_unpin_map(obj);
2709 rq = intel_context_create_request(ce);
2710 if (IS_ERR(rq))
2711 goto err_obj;
2713 rq->batch = i915_vma_get(vma);
2714 i915_request_get(rq);
2716 i915_vma_lock(vma);
2717 err = i915_request_await_object(rq, vma->obj, false);
2718 if (!err)
2719 err = i915_vma_move_to_active(vma, rq, 0);
2720 if (!err)
2721 err = rq->engine->emit_bb_start(rq,
2722 vma->node.start,
2723 PAGE_SIZE, 0);
2724 i915_vma_unlock(vma);
2725 i915_request_add(rq);
2726 if (err)
2727 goto err_rq;
2729 i915_gem_object_put(obj);
2730 intel_context_put(ce);
2732 rq->mock.link.next = &(*prev)->mock.link;
2733 *prev = rq;
2734 return 0;
2736 err_rq:
2737 i915_vma_put(rq->batch);
2738 i915_request_put(rq);
2739 err_obj:
2740 i915_gem_object_put(obj);
2741 err_ce:
2742 intel_context_put(ce);
2743 return err;
2746 static int __live_preempt_ring(struct intel_engine_cs *engine,
2747 struct igt_spinner *spin,
2748 int queue_sz, int ring_sz)
2750 struct intel_context *ce[2] = {};
2751 struct i915_request *rq;
2752 struct igt_live_test t;
2753 int err = 0;
2754 int n;
2756 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2757 return -EIO;
2759 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2760 struct intel_context *tmp;
2762 tmp = intel_context_create(engine);
2763 if (IS_ERR(tmp)) {
2764 err = PTR_ERR(tmp);
2765 goto err_ce;
2768 tmp->ring = __intel_context_ring_size(ring_sz);
2770 err = intel_context_pin(tmp);
2771 if (err) {
2772 intel_context_put(tmp);
2773 goto err_ce;
2776 memset32(tmp->ring->vaddr,
2777 0xdeadbeef, /* trigger a hang if executed */
2778 tmp->ring->vma->size / sizeof(u32));
2780 ce[n] = tmp;
2783 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2784 if (IS_ERR(rq)) {
2785 err = PTR_ERR(rq);
2786 goto err_ce;
2789 i915_request_get(rq);
2790 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2791 i915_request_add(rq);
2793 if (!igt_wait_for_spinner(spin, rq)) {
2794 intel_gt_set_wedged(engine->gt);
2795 i915_request_put(rq);
2796 err = -ETIME;
2797 goto err_ce;
2800 /* Fill the ring, until we will cause a wrap */
2801 n = 0;
2802 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2803 struct i915_request *tmp;
2805 tmp = intel_context_create_request(ce[0]);
2806 if (IS_ERR(tmp)) {
2807 err = PTR_ERR(tmp);
2808 i915_request_put(rq);
2809 goto err_ce;
2812 i915_request_add(tmp);
2813 intel_engine_flush_submission(engine);
2814 n++;
2816 intel_engine_flush_submission(engine);
2817 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2818 engine->name, queue_sz, n,
2819 ce[0]->ring->size,
2820 ce[0]->ring->tail,
2821 ce[0]->ring->emit,
2822 rq->tail);
2823 i915_request_put(rq);
2825 /* Create a second request to preempt the first ring */
2826 rq = intel_context_create_request(ce[1]);
2827 if (IS_ERR(rq)) {
2828 err = PTR_ERR(rq);
2829 goto err_ce;
2832 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2833 i915_request_get(rq);
2834 i915_request_add(rq);
2836 err = wait_for_submit(engine, rq, HZ / 2);
2837 i915_request_put(rq);
2838 if (err) {
2839 pr_err("%s: preemption request was not submited\n",
2840 engine->name);
2841 err = -ETIME;
2844 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2845 engine->name,
2846 ce[0]->ring->tail, ce[0]->ring->emit,
2847 ce[1]->ring->tail, ce[1]->ring->emit);
2849 err_ce:
2850 intel_engine_flush_submission(engine);
2851 igt_spinner_end(spin);
2852 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2853 if (IS_ERR_OR_NULL(ce[n]))
2854 break;
2856 intel_context_unpin(ce[n]);
2857 intel_context_put(ce[n]);
2859 if (igt_live_test_end(&t))
2860 err = -EIO;
2861 return err;
2864 static int live_preempt_ring(void *arg)
2866 struct intel_gt *gt = arg;
2867 struct intel_engine_cs *engine;
2868 struct igt_spinner spin;
2869 enum intel_engine_id id;
2870 int err = 0;
2873 * Check that we rollback large chunks of a ring in order to do a
2874 * preemption event. Similar to live_unlite_ring, but looking at
2875 * ring size rather than the impact of intel_ring_direction().
2878 if (igt_spinner_init(&spin, gt))
2879 return -ENOMEM;
2881 for_each_engine(engine, gt, id) {
2882 int n;
2884 if (!intel_engine_has_preemption(engine))
2885 continue;
2887 if (!intel_engine_can_store_dword(engine))
2888 continue;
2890 st_engine_heartbeat_disable(engine);
2892 for (n = 0; n <= 3; n++) {
2893 err = __live_preempt_ring(engine, &spin,
2894 n * SZ_4K / 4, SZ_4K);
2895 if (err)
2896 break;
2899 st_engine_heartbeat_enable(engine);
2900 if (err)
2901 break;
2904 igt_spinner_fini(&spin);
2905 return err;
2908 static int live_preempt_gang(void *arg)
2910 struct intel_gt *gt = arg;
2911 struct intel_engine_cs *engine;
2912 enum intel_engine_id id;
2914 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2915 return 0;
2918 * Build as long a chain of preempters as we can, with each
2919 * request higher priority than the last. Once we are ready, we release
2920 * the last batch which then precolates down the chain, each releasing
2921 * the next oldest in turn. The intent is to simply push as hard as we
2922 * can with the number of preemptions, trying to exceed narrow HW
2923 * limits. At a minimum, we insist that we can sort all the user
2924 * high priority levels into execution order.
2927 for_each_engine(engine, gt, id) {
2928 struct i915_request *rq = NULL;
2929 struct igt_live_test t;
2930 IGT_TIMEOUT(end_time);
2931 int prio = 0;
2932 int err = 0;
2933 u32 *cs;
2935 if (!intel_engine_has_preemption(engine))
2936 continue;
2938 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2939 return -EIO;
2941 do {
2942 struct i915_sched_attr attr = {
2943 .priority = I915_USER_PRIORITY(prio++),
2946 err = create_gang(engine, &rq);
2947 if (err)
2948 break;
2950 /* Submit each spinner at increasing priority */
2951 engine->schedule(rq, &attr);
2952 } while (prio <= I915_PRIORITY_MAX &&
2953 !__igt_timeout(end_time, NULL));
2954 pr_debug("%s: Preempt chain of %d requests\n",
2955 engine->name, prio);
2958 * Such that the last spinner is the highest priority and
2959 * should execute first. When that spinner completes,
2960 * it will terminate the next lowest spinner until there
2961 * are no more spinners and the gang is complete.
2963 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2964 if (!IS_ERR(cs)) {
2965 *cs = 0;
2966 i915_gem_object_unpin_map(rq->batch->obj);
2967 } else {
2968 err = PTR_ERR(cs);
2969 intel_gt_set_wedged(gt);
2972 while (rq) { /* wait for each rq from highest to lowest prio */
2973 struct i915_request *n = list_next_entry(rq, mock.link);
2975 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2976 struct drm_printer p =
2977 drm_info_printer(engine->i915->drm.dev);
2979 pr_err("Failed to flush chain of %d requests, at %d\n",
2980 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2981 intel_engine_dump(engine, &p,
2982 "%s\n", engine->name);
2984 err = -ETIME;
2987 i915_vma_put(rq->batch);
2988 i915_request_put(rq);
2989 rq = n;
2992 if (igt_live_test_end(&t))
2993 err = -EIO;
2994 if (err)
2995 return err;
2998 return 0;
3001 static struct i915_vma *
3002 create_gpr_user(struct intel_engine_cs *engine,
3003 struct i915_vma *result,
3004 unsigned int offset)
3006 struct drm_i915_gem_object *obj;
3007 struct i915_vma *vma;
3008 u32 *cs;
3009 int err;
3010 int i;
3012 obj = i915_gem_object_create_internal(engine->i915, 4096);
3013 if (IS_ERR(obj))
3014 return ERR_CAST(obj);
3016 vma = i915_vma_instance(obj, result->vm, NULL);
3017 if (IS_ERR(vma)) {
3018 i915_gem_object_put(obj);
3019 return vma;
3022 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3023 if (err) {
3024 i915_vma_put(vma);
3025 return ERR_PTR(err);
3028 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
3029 if (IS_ERR(cs)) {
3030 i915_vma_put(vma);
3031 return ERR_CAST(cs);
3034 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3035 *cs++ = MI_LOAD_REGISTER_IMM(1);
3036 *cs++ = CS_GPR(engine, 0);
3037 *cs++ = 1;
3039 for (i = 1; i < NUM_GPR; i++) {
3040 u64 addr;
3043 * Perform: GPR[i]++
3045 * As we read and write into the context saved GPR[i], if
3046 * we restart this batch buffer from an earlier point, we
3047 * will repeat the increment and store a value > 1.
3049 *cs++ = MI_MATH(4);
3050 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3051 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3052 *cs++ = MI_MATH_ADD;
3053 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3055 addr = result->node.start + offset + i * sizeof(*cs);
3056 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3057 *cs++ = CS_GPR(engine, 2 * i);
3058 *cs++ = lower_32_bits(addr);
3059 *cs++ = upper_32_bits(addr);
3061 *cs++ = MI_SEMAPHORE_WAIT |
3062 MI_SEMAPHORE_POLL |
3063 MI_SEMAPHORE_SAD_GTE_SDD;
3064 *cs++ = i;
3065 *cs++ = lower_32_bits(result->node.start);
3066 *cs++ = upper_32_bits(result->node.start);
3069 *cs++ = MI_BATCH_BUFFER_END;
3070 i915_gem_object_flush_map(obj);
3071 i915_gem_object_unpin_map(obj);
3073 return vma;
3076 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3078 struct drm_i915_gem_object *obj;
3079 struct i915_vma *vma;
3080 int err;
3082 obj = i915_gem_object_create_internal(gt->i915, sz);
3083 if (IS_ERR(obj))
3084 return ERR_CAST(obj);
3086 vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3087 if (IS_ERR(vma)) {
3088 i915_gem_object_put(obj);
3089 return vma;
3092 err = i915_ggtt_pin(vma, NULL, 0, 0);
3093 if (err) {
3094 i915_vma_put(vma);
3095 return ERR_PTR(err);
3098 return vma;
3101 static struct i915_request *
3102 create_gpr_client(struct intel_engine_cs *engine,
3103 struct i915_vma *global,
3104 unsigned int offset)
3106 struct i915_vma *batch, *vma;
3107 struct intel_context *ce;
3108 struct i915_request *rq;
3109 int err;
3111 ce = intel_context_create(engine);
3112 if (IS_ERR(ce))
3113 return ERR_CAST(ce);
3115 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3116 if (IS_ERR(vma)) {
3117 err = PTR_ERR(vma);
3118 goto out_ce;
3121 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3122 if (err)
3123 goto out_ce;
3125 batch = create_gpr_user(engine, vma, offset);
3126 if (IS_ERR(batch)) {
3127 err = PTR_ERR(batch);
3128 goto out_vma;
3131 rq = intel_context_create_request(ce);
3132 if (IS_ERR(rq)) {
3133 err = PTR_ERR(rq);
3134 goto out_batch;
3137 i915_vma_lock(vma);
3138 err = i915_request_await_object(rq, vma->obj, false);
3139 if (!err)
3140 err = i915_vma_move_to_active(vma, rq, 0);
3141 i915_vma_unlock(vma);
3143 i915_vma_lock(batch);
3144 if (!err)
3145 err = i915_request_await_object(rq, batch->obj, false);
3146 if (!err)
3147 err = i915_vma_move_to_active(batch, rq, 0);
3148 if (!err)
3149 err = rq->engine->emit_bb_start(rq,
3150 batch->node.start,
3151 PAGE_SIZE, 0);
3152 i915_vma_unlock(batch);
3153 i915_vma_unpin(batch);
3155 if (!err)
3156 i915_request_get(rq);
3157 i915_request_add(rq);
3159 out_batch:
3160 i915_vma_put(batch);
3161 out_vma:
3162 i915_vma_unpin(vma);
3163 out_ce:
3164 intel_context_put(ce);
3165 return err ? ERR_PTR(err) : rq;
3168 static int preempt_user(struct intel_engine_cs *engine,
3169 struct i915_vma *global,
3170 int id)
3172 struct i915_sched_attr attr = {
3173 .priority = I915_PRIORITY_MAX
3175 struct i915_request *rq;
3176 int err = 0;
3177 u32 *cs;
3179 rq = intel_engine_create_kernel_request(engine);
3180 if (IS_ERR(rq))
3181 return PTR_ERR(rq);
3183 cs = intel_ring_begin(rq, 4);
3184 if (IS_ERR(cs)) {
3185 i915_request_add(rq);
3186 return PTR_ERR(cs);
3189 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3190 *cs++ = i915_ggtt_offset(global);
3191 *cs++ = 0;
3192 *cs++ = id;
3194 intel_ring_advance(rq, cs);
3196 i915_request_get(rq);
3197 i915_request_add(rq);
3199 engine->schedule(rq, &attr);
3201 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3202 err = -ETIME;
3203 i915_request_put(rq);
3205 return err;
3208 static int live_preempt_user(void *arg)
3210 struct intel_gt *gt = arg;
3211 struct intel_engine_cs *engine;
3212 struct i915_vma *global;
3213 enum intel_engine_id id;
3214 u32 *result;
3215 int err = 0;
3217 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3218 return 0;
3221 * In our other tests, we look at preemption in carefully
3222 * controlled conditions in the ringbuffer. Since most of the
3223 * time is spent in user batches, most of our preemptions naturally
3224 * occur there. We want to verify that when we preempt inside a batch
3225 * we continue on from the current instruction and do not roll back
3226 * to the start, or another earlier arbitration point.
3228 * To verify this, we create a batch which is a mixture of
3229 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3230 * a few preempting contexts thrown into the mix, we look for any
3231 * repeated instructions (which show up as incorrect values).
3234 global = create_global(gt, 4096);
3235 if (IS_ERR(global))
3236 return PTR_ERR(global);
3238 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3239 if (IS_ERR(result)) {
3240 i915_vma_unpin_and_release(&global, 0);
3241 return PTR_ERR(result);
3244 for_each_engine(engine, gt, id) {
3245 struct i915_request *client[3] = {};
3246 struct igt_live_test t;
3247 int i;
3249 if (!intel_engine_has_preemption(engine))
3250 continue;
3252 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3253 continue; /* we need per-context GPR */
3255 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3256 err = -EIO;
3257 break;
3260 memset(result, 0, 4096);
3262 for (i = 0; i < ARRAY_SIZE(client); i++) {
3263 struct i915_request *rq;
3265 rq = create_gpr_client(engine, global,
3266 NUM_GPR * i * sizeof(u32));
3267 if (IS_ERR(rq))
3268 goto end_test;
3270 client[i] = rq;
3273 /* Continuously preempt the set of 3 running contexts */
3274 for (i = 1; i <= NUM_GPR; i++) {
3275 err = preempt_user(engine, global, i);
3276 if (err)
3277 goto end_test;
3280 if (READ_ONCE(result[0]) != NUM_GPR) {
3281 pr_err("%s: Failed to release semaphore\n",
3282 engine->name);
3283 err = -EIO;
3284 goto end_test;
3287 for (i = 0; i < ARRAY_SIZE(client); i++) {
3288 int gpr;
3290 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3291 err = -ETIME;
3292 goto end_test;
3295 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3296 if (result[NUM_GPR * i + gpr] != 1) {
3297 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3298 engine->name,
3299 i, gpr, result[NUM_GPR * i + gpr]);
3300 err = -EINVAL;
3301 goto end_test;
3306 end_test:
3307 for (i = 0; i < ARRAY_SIZE(client); i++) {
3308 if (!client[i])
3309 break;
3311 i915_request_put(client[i]);
3314 /* Flush the semaphores on error */
3315 smp_store_mb(result[0], -1);
3316 if (igt_live_test_end(&t))
3317 err = -EIO;
3318 if (err)
3319 break;
3322 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3323 return err;
3326 static int live_preempt_timeout(void *arg)
3328 struct intel_gt *gt = arg;
3329 struct i915_gem_context *ctx_hi, *ctx_lo;
3330 struct igt_spinner spin_lo;
3331 struct intel_engine_cs *engine;
3332 enum intel_engine_id id;
3333 int err = -ENOMEM;
3336 * Check that we force preemption to occur by cancelling the previous
3337 * context if it refuses to yield the GPU.
3339 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3340 return 0;
3342 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3343 return 0;
3345 if (!intel_has_reset_engine(gt))
3346 return 0;
3348 if (igt_spinner_init(&spin_lo, gt))
3349 return -ENOMEM;
3351 ctx_hi = kernel_context(gt->i915);
3352 if (!ctx_hi)
3353 goto err_spin_lo;
3354 ctx_hi->sched.priority =
3355 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3357 ctx_lo = kernel_context(gt->i915);
3358 if (!ctx_lo)
3359 goto err_ctx_hi;
3360 ctx_lo->sched.priority =
3361 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3363 for_each_engine(engine, gt, id) {
3364 unsigned long saved_timeout;
3365 struct i915_request *rq;
3367 if (!intel_engine_has_preemption(engine))
3368 continue;
3370 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3371 MI_NOOP); /* preemption disabled */
3372 if (IS_ERR(rq)) {
3373 err = PTR_ERR(rq);
3374 goto err_ctx_lo;
3377 i915_request_add(rq);
3378 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3379 intel_gt_set_wedged(gt);
3380 err = -EIO;
3381 goto err_ctx_lo;
3384 rq = igt_request_alloc(ctx_hi, engine);
3385 if (IS_ERR(rq)) {
3386 igt_spinner_end(&spin_lo);
3387 err = PTR_ERR(rq);
3388 goto err_ctx_lo;
3391 /* Flush the previous CS ack before changing timeouts */
3392 while (READ_ONCE(engine->execlists.pending[0]))
3393 cpu_relax();
3395 saved_timeout = engine->props.preempt_timeout_ms;
3396 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3398 i915_request_get(rq);
3399 i915_request_add(rq);
3401 intel_engine_flush_submission(engine);
3402 engine->props.preempt_timeout_ms = saved_timeout;
3404 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3405 intel_gt_set_wedged(gt);
3406 i915_request_put(rq);
3407 err = -ETIME;
3408 goto err_ctx_lo;
3411 igt_spinner_end(&spin_lo);
3412 i915_request_put(rq);
3415 err = 0;
3416 err_ctx_lo:
3417 kernel_context_close(ctx_lo);
3418 err_ctx_hi:
3419 kernel_context_close(ctx_hi);
3420 err_spin_lo:
3421 igt_spinner_fini(&spin_lo);
3422 return err;
3425 static int random_range(struct rnd_state *rnd, int min, int max)
3427 return i915_prandom_u32_max_state(max - min, rnd) + min;
3430 static int random_priority(struct rnd_state *rnd)
3432 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3435 struct preempt_smoke {
3436 struct intel_gt *gt;
3437 struct i915_gem_context **contexts;
3438 struct intel_engine_cs *engine;
3439 struct drm_i915_gem_object *batch;
3440 unsigned int ncontext;
3441 struct rnd_state prng;
3442 unsigned long count;
3445 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3447 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3448 &smoke->prng)];
3451 static int smoke_submit(struct preempt_smoke *smoke,
3452 struct i915_gem_context *ctx, int prio,
3453 struct drm_i915_gem_object *batch)
3455 struct i915_request *rq;
3456 struct i915_vma *vma = NULL;
3457 int err = 0;
3459 if (batch) {
3460 struct i915_address_space *vm;
3462 vm = i915_gem_context_get_vm_rcu(ctx);
3463 vma = i915_vma_instance(batch, vm, NULL);
3464 i915_vm_put(vm);
3465 if (IS_ERR(vma))
3466 return PTR_ERR(vma);
3468 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3469 if (err)
3470 return err;
3473 ctx->sched.priority = prio;
3475 rq = igt_request_alloc(ctx, smoke->engine);
3476 if (IS_ERR(rq)) {
3477 err = PTR_ERR(rq);
3478 goto unpin;
3481 if (vma) {
3482 i915_vma_lock(vma);
3483 err = i915_request_await_object(rq, vma->obj, false);
3484 if (!err)
3485 err = i915_vma_move_to_active(vma, rq, 0);
3486 if (!err)
3487 err = rq->engine->emit_bb_start(rq,
3488 vma->node.start,
3489 PAGE_SIZE, 0);
3490 i915_vma_unlock(vma);
3493 i915_request_add(rq);
3495 unpin:
3496 if (vma)
3497 i915_vma_unpin(vma);
3499 return err;
3502 static int smoke_crescendo_thread(void *arg)
3504 struct preempt_smoke *smoke = arg;
3505 IGT_TIMEOUT(end_time);
3506 unsigned long count;
3508 count = 0;
3509 do {
3510 struct i915_gem_context *ctx = smoke_context(smoke);
3511 int err;
3513 err = smoke_submit(smoke,
3514 ctx, count % I915_PRIORITY_MAX,
3515 smoke->batch);
3516 if (err)
3517 return err;
3519 count++;
3520 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3522 smoke->count = count;
3523 return 0;
3526 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3527 #define BATCH BIT(0)
3529 struct task_struct *tsk[I915_NUM_ENGINES] = {};
3530 struct preempt_smoke arg[I915_NUM_ENGINES];
3531 struct intel_engine_cs *engine;
3532 enum intel_engine_id id;
3533 unsigned long count;
3534 int err = 0;
3536 for_each_engine(engine, smoke->gt, id) {
3537 arg[id] = *smoke;
3538 arg[id].engine = engine;
3539 if (!(flags & BATCH))
3540 arg[id].batch = NULL;
3541 arg[id].count = 0;
3543 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3544 "igt/smoke:%d", id);
3545 if (IS_ERR(tsk[id])) {
3546 err = PTR_ERR(tsk[id]);
3547 break;
3549 get_task_struct(tsk[id]);
3552 yield(); /* start all threads before we kthread_stop() */
3554 count = 0;
3555 for_each_engine(engine, smoke->gt, id) {
3556 int status;
3558 if (IS_ERR_OR_NULL(tsk[id]))
3559 continue;
3561 status = kthread_stop(tsk[id]);
3562 if (status && !err)
3563 err = status;
3565 count += arg[id].count;
3567 put_task_struct(tsk[id]);
3570 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3571 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3572 return 0;
3575 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3577 enum intel_engine_id id;
3578 IGT_TIMEOUT(end_time);
3579 unsigned long count;
3581 count = 0;
3582 do {
3583 for_each_engine(smoke->engine, smoke->gt, id) {
3584 struct i915_gem_context *ctx = smoke_context(smoke);
3585 int err;
3587 err = smoke_submit(smoke,
3588 ctx, random_priority(&smoke->prng),
3589 flags & BATCH ? smoke->batch : NULL);
3590 if (err)
3591 return err;
3593 count++;
3595 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3597 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3598 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3599 return 0;
3602 static int live_preempt_smoke(void *arg)
3604 struct preempt_smoke smoke = {
3605 .gt = arg,
3606 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3607 .ncontext = 256,
3609 const unsigned int phase[] = { 0, BATCH };
3610 struct igt_live_test t;
3611 int err = -ENOMEM;
3612 u32 *cs;
3613 int n;
3615 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3616 return 0;
3618 smoke.contexts = kmalloc_array(smoke.ncontext,
3619 sizeof(*smoke.contexts),
3620 GFP_KERNEL);
3621 if (!smoke.contexts)
3622 return -ENOMEM;
3624 smoke.batch =
3625 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3626 if (IS_ERR(smoke.batch)) {
3627 err = PTR_ERR(smoke.batch);
3628 goto err_free;
3631 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3632 if (IS_ERR(cs)) {
3633 err = PTR_ERR(cs);
3634 goto err_batch;
3636 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3637 cs[n] = MI_ARB_CHECK;
3638 cs[n] = MI_BATCH_BUFFER_END;
3639 i915_gem_object_flush_map(smoke.batch);
3640 i915_gem_object_unpin_map(smoke.batch);
3642 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3643 err = -EIO;
3644 goto err_batch;
3647 for (n = 0; n < smoke.ncontext; n++) {
3648 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3649 if (!smoke.contexts[n])
3650 goto err_ctx;
3653 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3654 err = smoke_crescendo(&smoke, phase[n]);
3655 if (err)
3656 goto err_ctx;
3658 err = smoke_random(&smoke, phase[n]);
3659 if (err)
3660 goto err_ctx;
3663 err_ctx:
3664 if (igt_live_test_end(&t))
3665 err = -EIO;
3667 for (n = 0; n < smoke.ncontext; n++) {
3668 if (!smoke.contexts[n])
3669 break;
3670 kernel_context_close(smoke.contexts[n]);
3673 err_batch:
3674 i915_gem_object_put(smoke.batch);
3675 err_free:
3676 kfree(smoke.contexts);
3678 return err;
3681 static int nop_virtual_engine(struct intel_gt *gt,
3682 struct intel_engine_cs **siblings,
3683 unsigned int nsibling,
3684 unsigned int nctx,
3685 unsigned int flags)
3686 #define CHAIN BIT(0)
3688 IGT_TIMEOUT(end_time);
3689 struct i915_request *request[16] = {};
3690 struct intel_context *ve[16];
3691 unsigned long n, prime, nc;
3692 struct igt_live_test t;
3693 ktime_t times[2] = {};
3694 int err;
3696 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3698 for (n = 0; n < nctx; n++) {
3699 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3700 if (IS_ERR(ve[n])) {
3701 err = PTR_ERR(ve[n]);
3702 nctx = n;
3703 goto out;
3706 err = intel_context_pin(ve[n]);
3707 if (err) {
3708 intel_context_put(ve[n]);
3709 nctx = n;
3710 goto out;
3714 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3715 if (err)
3716 goto out;
3718 for_each_prime_number_from(prime, 1, 8192) {
3719 times[1] = ktime_get_raw();
3721 if (flags & CHAIN) {
3722 for (nc = 0; nc < nctx; nc++) {
3723 for (n = 0; n < prime; n++) {
3724 struct i915_request *rq;
3726 rq = i915_request_create(ve[nc]);
3727 if (IS_ERR(rq)) {
3728 err = PTR_ERR(rq);
3729 goto out;
3732 if (request[nc])
3733 i915_request_put(request[nc]);
3734 request[nc] = i915_request_get(rq);
3735 i915_request_add(rq);
3738 } else {
3739 for (n = 0; n < prime; n++) {
3740 for (nc = 0; nc < nctx; nc++) {
3741 struct i915_request *rq;
3743 rq = i915_request_create(ve[nc]);
3744 if (IS_ERR(rq)) {
3745 err = PTR_ERR(rq);
3746 goto out;
3749 if (request[nc])
3750 i915_request_put(request[nc]);
3751 request[nc] = i915_request_get(rq);
3752 i915_request_add(rq);
3757 for (nc = 0; nc < nctx; nc++) {
3758 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3759 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3760 __func__, ve[0]->engine->name,
3761 request[nc]->fence.context,
3762 request[nc]->fence.seqno);
3764 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3765 __func__, ve[0]->engine->name,
3766 request[nc]->fence.context,
3767 request[nc]->fence.seqno);
3768 GEM_TRACE_DUMP();
3769 intel_gt_set_wedged(gt);
3770 break;
3774 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3775 if (prime == 1)
3776 times[0] = times[1];
3778 for (nc = 0; nc < nctx; nc++) {
3779 i915_request_put(request[nc]);
3780 request[nc] = NULL;
3783 if (__igt_timeout(end_time, NULL))
3784 break;
3787 err = igt_live_test_end(&t);
3788 if (err)
3789 goto out;
3791 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3792 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3793 prime, div64_u64(ktime_to_ns(times[1]), prime));
3795 out:
3796 if (igt_flush_test(gt->i915))
3797 err = -EIO;
3799 for (nc = 0; nc < nctx; nc++) {
3800 i915_request_put(request[nc]);
3801 intel_context_unpin(ve[nc]);
3802 intel_context_put(ve[nc]);
3804 return err;
3807 static unsigned int
3808 __select_siblings(struct intel_gt *gt,
3809 unsigned int class,
3810 struct intel_engine_cs **siblings,
3811 bool (*filter)(const struct intel_engine_cs *))
3813 unsigned int n = 0;
3814 unsigned int inst;
3816 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3817 if (!gt->engine_class[class][inst])
3818 continue;
3820 if (filter && !filter(gt->engine_class[class][inst]))
3821 continue;
3823 siblings[n++] = gt->engine_class[class][inst];
3826 return n;
3829 static unsigned int
3830 select_siblings(struct intel_gt *gt,
3831 unsigned int class,
3832 struct intel_engine_cs **siblings)
3834 return __select_siblings(gt, class, siblings, NULL);
3837 static int live_virtual_engine(void *arg)
3839 struct intel_gt *gt = arg;
3840 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3841 struct intel_engine_cs *engine;
3842 enum intel_engine_id id;
3843 unsigned int class;
3844 int err;
3846 if (intel_uc_uses_guc_submission(&gt->uc))
3847 return 0;
3849 for_each_engine(engine, gt, id) {
3850 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3851 if (err) {
3852 pr_err("Failed to wrap engine %s: err=%d\n",
3853 engine->name, err);
3854 return err;
3858 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3859 int nsibling, n;
3861 nsibling = select_siblings(gt, class, siblings);
3862 if (nsibling < 2)
3863 continue;
3865 for (n = 1; n <= nsibling + 1; n++) {
3866 err = nop_virtual_engine(gt, siblings, nsibling,
3867 n, 0);
3868 if (err)
3869 return err;
3872 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3873 if (err)
3874 return err;
3877 return 0;
3880 static int mask_virtual_engine(struct intel_gt *gt,
3881 struct intel_engine_cs **siblings,
3882 unsigned int nsibling)
3884 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3885 struct intel_context *ve;
3886 struct igt_live_test t;
3887 unsigned int n;
3888 int err;
3891 * Check that by setting the execution mask on a request, we can
3892 * restrict it to our desired engine within the virtual engine.
3895 ve = intel_execlists_create_virtual(siblings, nsibling);
3896 if (IS_ERR(ve)) {
3897 err = PTR_ERR(ve);
3898 goto out_close;
3901 err = intel_context_pin(ve);
3902 if (err)
3903 goto out_put;
3905 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3906 if (err)
3907 goto out_unpin;
3909 for (n = 0; n < nsibling; n++) {
3910 request[n] = i915_request_create(ve);
3911 if (IS_ERR(request[n])) {
3912 err = PTR_ERR(request[n]);
3913 nsibling = n;
3914 goto out;
3917 /* Reverse order as it's more likely to be unnatural */
3918 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3920 i915_request_get(request[n]);
3921 i915_request_add(request[n]);
3924 for (n = 0; n < nsibling; n++) {
3925 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3926 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3927 __func__, ve->engine->name,
3928 request[n]->fence.context,
3929 request[n]->fence.seqno);
3931 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3932 __func__, ve->engine->name,
3933 request[n]->fence.context,
3934 request[n]->fence.seqno);
3935 GEM_TRACE_DUMP();
3936 intel_gt_set_wedged(gt);
3937 err = -EIO;
3938 goto out;
3941 if (request[n]->engine != siblings[nsibling - n - 1]) {
3942 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3943 request[n]->engine->name,
3944 siblings[nsibling - n - 1]->name);
3945 err = -EINVAL;
3946 goto out;
3950 err = igt_live_test_end(&t);
3951 out:
3952 if (igt_flush_test(gt->i915))
3953 err = -EIO;
3955 for (n = 0; n < nsibling; n++)
3956 i915_request_put(request[n]);
3958 out_unpin:
3959 intel_context_unpin(ve);
3960 out_put:
3961 intel_context_put(ve);
3962 out_close:
3963 return err;
3966 static int live_virtual_mask(void *arg)
3968 struct intel_gt *gt = arg;
3969 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3970 unsigned int class;
3971 int err;
3973 if (intel_uc_uses_guc_submission(&gt->uc))
3974 return 0;
3976 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3977 unsigned int nsibling;
3979 nsibling = select_siblings(gt, class, siblings);
3980 if (nsibling < 2)
3981 continue;
3983 err = mask_virtual_engine(gt, siblings, nsibling);
3984 if (err)
3985 return err;
3988 return 0;
3991 static int slicein_virtual_engine(struct intel_gt *gt,
3992 struct intel_engine_cs **siblings,
3993 unsigned int nsibling)
3995 const long timeout = slice_timeout(siblings[0]);
3996 struct intel_context *ce;
3997 struct i915_request *rq;
3998 struct igt_spinner spin;
3999 unsigned int n;
4000 int err = 0;
4003 * Virtual requests must take part in timeslicing on the target engines.
4006 if (igt_spinner_init(&spin, gt))
4007 return -ENOMEM;
4009 for (n = 0; n < nsibling; n++) {
4010 ce = intel_context_create(siblings[n]);
4011 if (IS_ERR(ce)) {
4012 err = PTR_ERR(ce);
4013 goto out;
4016 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4017 intel_context_put(ce);
4018 if (IS_ERR(rq)) {
4019 err = PTR_ERR(rq);
4020 goto out;
4023 i915_request_add(rq);
4026 ce = intel_execlists_create_virtual(siblings, nsibling);
4027 if (IS_ERR(ce)) {
4028 err = PTR_ERR(ce);
4029 goto out;
4032 rq = intel_context_create_request(ce);
4033 intel_context_put(ce);
4034 if (IS_ERR(rq)) {
4035 err = PTR_ERR(rq);
4036 goto out;
4039 i915_request_get(rq);
4040 i915_request_add(rq);
4041 if (i915_request_wait(rq, 0, timeout) < 0) {
4042 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4043 __func__, rq->engine->name);
4044 GEM_TRACE_DUMP();
4045 intel_gt_set_wedged(gt);
4046 err = -EIO;
4048 i915_request_put(rq);
4050 out:
4051 igt_spinner_end(&spin);
4052 if (igt_flush_test(gt->i915))
4053 err = -EIO;
4054 igt_spinner_fini(&spin);
4055 return err;
4058 static int sliceout_virtual_engine(struct intel_gt *gt,
4059 struct intel_engine_cs **siblings,
4060 unsigned int nsibling)
4062 const long timeout = slice_timeout(siblings[0]);
4063 struct intel_context *ce;
4064 struct i915_request *rq;
4065 struct igt_spinner spin;
4066 unsigned int n;
4067 int err = 0;
4070 * Virtual requests must allow others a fair timeslice.
4073 if (igt_spinner_init(&spin, gt))
4074 return -ENOMEM;
4076 /* XXX We do not handle oversubscription and fairness with normal rq */
4077 for (n = 0; n < nsibling; n++) {
4078 ce = intel_execlists_create_virtual(siblings, nsibling);
4079 if (IS_ERR(ce)) {
4080 err = PTR_ERR(ce);
4081 goto out;
4084 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4085 intel_context_put(ce);
4086 if (IS_ERR(rq)) {
4087 err = PTR_ERR(rq);
4088 goto out;
4091 i915_request_add(rq);
4094 for (n = 0; !err && n < nsibling; n++) {
4095 ce = intel_context_create(siblings[n]);
4096 if (IS_ERR(ce)) {
4097 err = PTR_ERR(ce);
4098 goto out;
4101 rq = intel_context_create_request(ce);
4102 intel_context_put(ce);
4103 if (IS_ERR(rq)) {
4104 err = PTR_ERR(rq);
4105 goto out;
4108 i915_request_get(rq);
4109 i915_request_add(rq);
4110 if (i915_request_wait(rq, 0, timeout) < 0) {
4111 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4112 __func__, siblings[n]->name);
4113 GEM_TRACE_DUMP();
4114 intel_gt_set_wedged(gt);
4115 err = -EIO;
4117 i915_request_put(rq);
4120 out:
4121 igt_spinner_end(&spin);
4122 if (igt_flush_test(gt->i915))
4123 err = -EIO;
4124 igt_spinner_fini(&spin);
4125 return err;
4128 static int live_virtual_slice(void *arg)
4130 struct intel_gt *gt = arg;
4131 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4132 unsigned int class;
4133 int err;
4135 if (intel_uc_uses_guc_submission(&gt->uc))
4136 return 0;
4138 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4139 unsigned int nsibling;
4141 nsibling = __select_siblings(gt, class, siblings,
4142 intel_engine_has_timeslices);
4143 if (nsibling < 2)
4144 continue;
4146 err = slicein_virtual_engine(gt, siblings, nsibling);
4147 if (err)
4148 return err;
4150 err = sliceout_virtual_engine(gt, siblings, nsibling);
4151 if (err)
4152 return err;
4155 return 0;
4158 static int preserved_virtual_engine(struct intel_gt *gt,
4159 struct intel_engine_cs **siblings,
4160 unsigned int nsibling)
4162 struct i915_request *last = NULL;
4163 struct intel_context *ve;
4164 struct i915_vma *scratch;
4165 struct igt_live_test t;
4166 unsigned int n;
4167 int err = 0;
4168 u32 *cs;
4170 scratch = create_scratch(siblings[0]->gt);
4171 if (IS_ERR(scratch))
4172 return PTR_ERR(scratch);
4174 err = i915_vma_sync(scratch);
4175 if (err)
4176 goto out_scratch;
4178 ve = intel_execlists_create_virtual(siblings, nsibling);
4179 if (IS_ERR(ve)) {
4180 err = PTR_ERR(ve);
4181 goto out_scratch;
4184 err = intel_context_pin(ve);
4185 if (err)
4186 goto out_put;
4188 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4189 if (err)
4190 goto out_unpin;
4192 for (n = 0; n < NUM_GPR_DW; n++) {
4193 struct intel_engine_cs *engine = siblings[n % nsibling];
4194 struct i915_request *rq;
4196 rq = i915_request_create(ve);
4197 if (IS_ERR(rq)) {
4198 err = PTR_ERR(rq);
4199 goto out_end;
4202 i915_request_put(last);
4203 last = i915_request_get(rq);
4205 cs = intel_ring_begin(rq, 8);
4206 if (IS_ERR(cs)) {
4207 i915_request_add(rq);
4208 err = PTR_ERR(cs);
4209 goto out_end;
4212 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4213 *cs++ = CS_GPR(engine, n);
4214 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4215 *cs++ = 0;
4217 *cs++ = MI_LOAD_REGISTER_IMM(1);
4218 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4219 *cs++ = n + 1;
4221 *cs++ = MI_NOOP;
4222 intel_ring_advance(rq, cs);
4224 /* Restrict this request to run on a particular engine */
4225 rq->execution_mask = engine->mask;
4226 i915_request_add(rq);
4229 if (i915_request_wait(last, 0, HZ / 5) < 0) {
4230 err = -ETIME;
4231 goto out_end;
4234 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4235 if (IS_ERR(cs)) {
4236 err = PTR_ERR(cs);
4237 goto out_end;
4240 for (n = 0; n < NUM_GPR_DW; n++) {
4241 if (cs[n] != n) {
4242 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4243 cs[n], n);
4244 err = -EINVAL;
4245 break;
4249 i915_gem_object_unpin_map(scratch->obj);
4251 out_end:
4252 if (igt_live_test_end(&t))
4253 err = -EIO;
4254 i915_request_put(last);
4255 out_unpin:
4256 intel_context_unpin(ve);
4257 out_put:
4258 intel_context_put(ve);
4259 out_scratch:
4260 i915_vma_unpin_and_release(&scratch, 0);
4261 return err;
4264 static int live_virtual_preserved(void *arg)
4266 struct intel_gt *gt = arg;
4267 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4268 unsigned int class;
4271 * Check that the context image retains non-privileged (user) registers
4272 * from one engine to the next. For this we check that the CS_GPR
4273 * are preserved.
4276 if (intel_uc_uses_guc_submission(&gt->uc))
4277 return 0;
4279 /* As we use CS_GPR we cannot run before they existed on all engines. */
4280 if (INTEL_GEN(gt->i915) < 9)
4281 return 0;
4283 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4284 int nsibling, err;
4286 nsibling = select_siblings(gt, class, siblings);
4287 if (nsibling < 2)
4288 continue;
4290 err = preserved_virtual_engine(gt, siblings, nsibling);
4291 if (err)
4292 return err;
4295 return 0;
4298 static int bond_virtual_engine(struct intel_gt *gt,
4299 unsigned int class,
4300 struct intel_engine_cs **siblings,
4301 unsigned int nsibling,
4302 unsigned int flags)
4303 #define BOND_SCHEDULE BIT(0)
4305 struct intel_engine_cs *master;
4306 struct i915_request *rq[16];
4307 enum intel_engine_id id;
4308 struct igt_spinner spin;
4309 unsigned long n;
4310 int err;
4313 * A set of bonded requests is intended to be run concurrently
4314 * across a number of engines. We use one request per-engine
4315 * and a magic fence to schedule each of the bonded requests
4316 * at the same time. A consequence of our current scheduler is that
4317 * we only move requests to the HW ready queue when the request
4318 * becomes ready, that is when all of its prerequisite fences have
4319 * been signaled. As one of those fences is the master submit fence,
4320 * there is a delay on all secondary fences as the HW may be
4321 * currently busy. Equally, as all the requests are independent,
4322 * they may have other fences that delay individual request
4323 * submission to HW. Ergo, we do not guarantee that all requests are
4324 * immediately submitted to HW at the same time, just that if the
4325 * rules are abided by, they are ready at the same time as the
4326 * first is submitted. Userspace can embed semaphores in its batch
4327 * to ensure parallel execution of its phases as it requires.
4328 * Though naturally it gets requested that perhaps the scheduler should
4329 * take care of parallel execution, even across preemption events on
4330 * different HW. (The proper answer is of course "lalalala".)
4332 * With the submit-fence, we have identified three possible phases
4333 * of synchronisation depending on the master fence: queued (not
4334 * ready), executing, and signaled. The first two are quite simple
4335 * and checked below. However, the signaled master fence handling is
4336 * contentious. Currently we do not distinguish between a signaled
4337 * fence and an expired fence, as once signaled it does not convey
4338 * any information about the previous execution. It may even be freed
4339 * and hence checking later it may not exist at all. Ergo we currently
4340 * do not apply the bonding constraint for an already signaled fence,
4341 * as our expectation is that it should not constrain the secondaries
4342 * and is outside of the scope of the bonded request API (i.e. all
4343 * userspace requests are meant to be running in parallel). As
4344 * it imposes no constraint, and is effectively a no-op, we do not
4345 * check below as normal execution flows are checked extensively above.
4347 * XXX Is the degenerate handling of signaled submit fences the
4348 * expected behaviour for userpace?
4351 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4353 if (igt_spinner_init(&spin, gt))
4354 return -ENOMEM;
4356 err = 0;
4357 rq[0] = ERR_PTR(-ENOMEM);
4358 for_each_engine(master, gt, id) {
4359 struct i915_sw_fence fence = {};
4360 struct intel_context *ce;
4362 if (master->class == class)
4363 continue;
4365 ce = intel_context_create(master);
4366 if (IS_ERR(ce)) {
4367 err = PTR_ERR(ce);
4368 goto out;
4371 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4373 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4374 intel_context_put(ce);
4375 if (IS_ERR(rq[0])) {
4376 err = PTR_ERR(rq[0]);
4377 goto out;
4379 i915_request_get(rq[0]);
4381 if (flags & BOND_SCHEDULE) {
4382 onstack_fence_init(&fence);
4383 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4384 &fence,
4385 GFP_KERNEL);
4388 i915_request_add(rq[0]);
4389 if (err < 0)
4390 goto out;
4392 if (!(flags & BOND_SCHEDULE) &&
4393 !igt_wait_for_spinner(&spin, rq[0])) {
4394 err = -EIO;
4395 goto out;
4398 for (n = 0; n < nsibling; n++) {
4399 struct intel_context *ve;
4401 ve = intel_execlists_create_virtual(siblings, nsibling);
4402 if (IS_ERR(ve)) {
4403 err = PTR_ERR(ve);
4404 onstack_fence_fini(&fence);
4405 goto out;
4408 err = intel_virtual_engine_attach_bond(ve->engine,
4409 master,
4410 siblings[n]);
4411 if (err) {
4412 intel_context_put(ve);
4413 onstack_fence_fini(&fence);
4414 goto out;
4417 err = intel_context_pin(ve);
4418 intel_context_put(ve);
4419 if (err) {
4420 onstack_fence_fini(&fence);
4421 goto out;
4424 rq[n + 1] = i915_request_create(ve);
4425 intel_context_unpin(ve);
4426 if (IS_ERR(rq[n + 1])) {
4427 err = PTR_ERR(rq[n + 1]);
4428 onstack_fence_fini(&fence);
4429 goto out;
4431 i915_request_get(rq[n + 1]);
4433 err = i915_request_await_execution(rq[n + 1],
4434 &rq[0]->fence,
4435 ve->engine->bond_execute);
4436 i915_request_add(rq[n + 1]);
4437 if (err < 0) {
4438 onstack_fence_fini(&fence);
4439 goto out;
4442 onstack_fence_fini(&fence);
4443 intel_engine_flush_submission(master);
4444 igt_spinner_end(&spin);
4446 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4447 pr_err("Master request did not execute (on %s)!\n",
4448 rq[0]->engine->name);
4449 err = -EIO;
4450 goto out;
4453 for (n = 0; n < nsibling; n++) {
4454 if (i915_request_wait(rq[n + 1], 0,
4455 MAX_SCHEDULE_TIMEOUT) < 0) {
4456 err = -EIO;
4457 goto out;
4460 if (rq[n + 1]->engine != siblings[n]) {
4461 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4462 siblings[n]->name,
4463 rq[n + 1]->engine->name,
4464 rq[0]->engine->name);
4465 err = -EINVAL;
4466 goto out;
4470 for (n = 0; !IS_ERR(rq[n]); n++)
4471 i915_request_put(rq[n]);
4472 rq[0] = ERR_PTR(-ENOMEM);
4475 out:
4476 for (n = 0; !IS_ERR(rq[n]); n++)
4477 i915_request_put(rq[n]);
4478 if (igt_flush_test(gt->i915))
4479 err = -EIO;
4481 igt_spinner_fini(&spin);
4482 return err;
4485 static int live_virtual_bond(void *arg)
4487 static const struct phase {
4488 const char *name;
4489 unsigned int flags;
4490 } phases[] = {
4491 { "", 0 },
4492 { "schedule", BOND_SCHEDULE },
4493 { },
4495 struct intel_gt *gt = arg;
4496 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4497 unsigned int class;
4498 int err;
4500 if (intel_uc_uses_guc_submission(&gt->uc))
4501 return 0;
4503 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4504 const struct phase *p;
4505 int nsibling;
4507 nsibling = select_siblings(gt, class, siblings);
4508 if (nsibling < 2)
4509 continue;
4511 for (p = phases; p->name; p++) {
4512 err = bond_virtual_engine(gt,
4513 class, siblings, nsibling,
4514 p->flags);
4515 if (err) {
4516 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4517 __func__, p->name, class, nsibling, err);
4518 return err;
4523 return 0;
4526 static int reset_virtual_engine(struct intel_gt *gt,
4527 struct intel_engine_cs **siblings,
4528 unsigned int nsibling)
4530 struct intel_engine_cs *engine;
4531 struct intel_context *ve;
4532 struct igt_spinner spin;
4533 struct i915_request *rq;
4534 unsigned int n;
4535 int err = 0;
4538 * In order to support offline error capture for fast preempt reset,
4539 * we need to decouple the guilty request and ensure that it and its
4540 * descendents are not executed while the capture is in progress.
4543 if (igt_spinner_init(&spin, gt))
4544 return -ENOMEM;
4546 ve = intel_execlists_create_virtual(siblings, nsibling);
4547 if (IS_ERR(ve)) {
4548 err = PTR_ERR(ve);
4549 goto out_spin;
4552 for (n = 0; n < nsibling; n++)
4553 st_engine_heartbeat_disable(siblings[n]);
4555 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4556 if (IS_ERR(rq)) {
4557 err = PTR_ERR(rq);
4558 goto out_heartbeat;
4560 i915_request_add(rq);
4562 if (!igt_wait_for_spinner(&spin, rq)) {
4563 intel_gt_set_wedged(gt);
4564 err = -ETIME;
4565 goto out_heartbeat;
4568 engine = rq->engine;
4569 GEM_BUG_ON(engine == ve->engine);
4571 /* Take ownership of the reset and tasklet */
4572 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4573 &gt->reset.flags)) {
4574 intel_gt_set_wedged(gt);
4575 err = -EBUSY;
4576 goto out_heartbeat;
4578 tasklet_disable(&engine->execlists.tasklet);
4580 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4581 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4583 /* Fake a preemption event; failed of course */
4584 spin_lock_irq(&engine->active.lock);
4585 __unwind_incomplete_requests(engine);
4586 spin_unlock_irq(&engine->active.lock);
4587 GEM_BUG_ON(rq->engine != ve->engine);
4589 /* Reset the engine while keeping our active request on hold */
4590 execlists_hold(engine, rq);
4591 GEM_BUG_ON(!i915_request_on_hold(rq));
4593 intel_engine_reset(engine, NULL);
4594 GEM_BUG_ON(rq->fence.error != -EIO);
4596 /* Release our grasp on the engine, letting CS flow again */
4597 tasklet_enable(&engine->execlists.tasklet);
4598 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4600 /* Check that we do not resubmit the held request */
4601 i915_request_get(rq);
4602 if (!i915_request_wait(rq, 0, HZ / 5)) {
4603 pr_err("%s: on hold request completed!\n",
4604 engine->name);
4605 intel_gt_set_wedged(gt);
4606 err = -EIO;
4607 goto out_rq;
4609 GEM_BUG_ON(!i915_request_on_hold(rq));
4611 /* But is resubmitted on release */
4612 execlists_unhold(engine, rq);
4613 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4614 pr_err("%s: held request did not complete!\n",
4615 engine->name);
4616 intel_gt_set_wedged(gt);
4617 err = -ETIME;
4620 out_rq:
4621 i915_request_put(rq);
4622 out_heartbeat:
4623 for (n = 0; n < nsibling; n++)
4624 st_engine_heartbeat_enable(siblings[n]);
4626 intel_context_put(ve);
4627 out_spin:
4628 igt_spinner_fini(&spin);
4629 return err;
4632 static int live_virtual_reset(void *arg)
4634 struct intel_gt *gt = arg;
4635 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4636 unsigned int class;
4639 * Check that we handle a reset event within a virtual engine.
4640 * Only the physical engine is reset, but we have to check the flow
4641 * of the virtual requests around the reset, and make sure it is not
4642 * forgotten.
4645 if (intel_uc_uses_guc_submission(&gt->uc))
4646 return 0;
4648 if (!intel_has_reset_engine(gt))
4649 return 0;
4651 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4652 int nsibling, err;
4654 nsibling = select_siblings(gt, class, siblings);
4655 if (nsibling < 2)
4656 continue;
4658 err = reset_virtual_engine(gt, siblings, nsibling);
4659 if (err)
4660 return err;
4663 return 0;
4666 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4668 static const struct i915_subtest tests[] = {
4669 SUBTEST(live_sanitycheck),
4670 SUBTEST(live_unlite_switch),
4671 SUBTEST(live_unlite_preempt),
4672 SUBTEST(live_unlite_ring),
4673 SUBTEST(live_pin_rewind),
4674 SUBTEST(live_hold_reset),
4675 SUBTEST(live_error_interrupt),
4676 SUBTEST(live_timeslice_preempt),
4677 SUBTEST(live_timeslice_rewind),
4678 SUBTEST(live_timeslice_queue),
4679 SUBTEST(live_timeslice_nopreempt),
4680 SUBTEST(live_busywait_preempt),
4681 SUBTEST(live_preempt),
4682 SUBTEST(live_late_preempt),
4683 SUBTEST(live_nopreempt),
4684 SUBTEST(live_preempt_cancel),
4685 SUBTEST(live_suppress_self_preempt),
4686 SUBTEST(live_chain_preempt),
4687 SUBTEST(live_preempt_ring),
4688 SUBTEST(live_preempt_gang),
4689 SUBTEST(live_preempt_timeout),
4690 SUBTEST(live_preempt_user),
4691 SUBTEST(live_preempt_smoke),
4692 SUBTEST(live_virtual_engine),
4693 SUBTEST(live_virtual_mask),
4694 SUBTEST(live_virtual_preserved),
4695 SUBTEST(live_virtual_slice),
4696 SUBTEST(live_virtual_bond),
4697 SUBTEST(live_virtual_reset),
4700 if (!HAS_EXECLISTS(i915))
4701 return 0;
4703 if (intel_gt_is_wedged(&i915->gt))
4704 return 0;
4706 return intel_gt_live_subtests(tests, &i915->gt);
4709 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4711 const u32 offset =
4712 i915_ggtt_offset(ce->engine->status_page.vma) +
4713 offset_in_page(slot);
4714 struct i915_request *rq;
4715 u32 *cs;
4717 rq = intel_context_create_request(ce);
4718 if (IS_ERR(rq))
4719 return PTR_ERR(rq);
4721 cs = intel_ring_begin(rq, 4);
4722 if (IS_ERR(cs)) {
4723 i915_request_add(rq);
4724 return PTR_ERR(cs);
4727 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4728 *cs++ = offset;
4729 *cs++ = 0;
4730 *cs++ = 1;
4732 intel_ring_advance(rq, cs);
4734 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4735 i915_request_add(rq);
4736 return 0;
4739 static int context_flush(struct intel_context *ce, long timeout)
4741 struct i915_request *rq;
4742 struct dma_fence *fence;
4743 int err = 0;
4745 rq = intel_engine_create_kernel_request(ce->engine);
4746 if (IS_ERR(rq))
4747 return PTR_ERR(rq);
4749 fence = i915_active_fence_get(&ce->timeline->last_request);
4750 if (fence) {
4751 i915_request_await_dma_fence(rq, fence);
4752 dma_fence_put(fence);
4755 rq = i915_request_get(rq);
4756 i915_request_add(rq);
4757 if (i915_request_wait(rq, 0, timeout) < 0)
4758 err = -ETIME;
4759 i915_request_put(rq);
4761 rmb(); /* We know the request is written, make sure all state is too! */
4762 return err;
4765 static int live_lrc_layout(void *arg)
4767 struct intel_gt *gt = arg;
4768 struct intel_engine_cs *engine;
4769 enum intel_engine_id id;
4770 u32 *lrc;
4771 int err;
4774 * Check the registers offsets we use to create the initial reg state
4775 * match the layout saved by HW.
4778 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4779 if (!lrc)
4780 return -ENOMEM;
4782 err = 0;
4783 for_each_engine(engine, gt, id) {
4784 u32 *hw;
4785 int dw;
4787 if (!engine->default_state)
4788 continue;
4790 hw = shmem_pin_map(engine->default_state);
4791 if (IS_ERR(hw)) {
4792 err = PTR_ERR(hw);
4793 break;
4795 hw += LRC_STATE_OFFSET / sizeof(*hw);
4797 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4798 engine->kernel_context,
4799 engine,
4800 engine->kernel_context->ring,
4801 true);
4803 dw = 0;
4804 do {
4805 u32 lri = hw[dw];
4807 if (lri == 0) {
4808 dw++;
4809 continue;
4812 if (lrc[dw] == 0) {
4813 pr_debug("%s: skipped instruction %x at dword %d\n",
4814 engine->name, lri, dw);
4815 dw++;
4816 continue;
4819 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4820 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4821 engine->name, dw, lri);
4822 err = -EINVAL;
4823 break;
4826 if (lrc[dw] != lri) {
4827 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4828 engine->name, dw, lri, lrc[dw]);
4829 err = -EINVAL;
4830 break;
4833 lri &= 0x7f;
4834 lri++;
4835 dw++;
4837 while (lri) {
4838 if (hw[dw] != lrc[dw]) {
4839 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4840 engine->name, dw, hw[dw], lrc[dw]);
4841 err = -EINVAL;
4842 break;
4846 * Skip over the actual register value as we
4847 * expect that to differ.
4849 dw += 2;
4850 lri -= 2;
4852 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4854 if (err) {
4855 pr_info("%s: HW register image:\n", engine->name);
4856 igt_hexdump(hw, PAGE_SIZE);
4858 pr_info("%s: SW register image:\n", engine->name);
4859 igt_hexdump(lrc, PAGE_SIZE);
4862 shmem_unpin_map(engine->default_state, hw);
4863 if (err)
4864 break;
4867 kfree(lrc);
4868 return err;
4871 static int find_offset(const u32 *lri, u32 offset)
4873 int i;
4875 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4876 if (lri[i] == offset)
4877 return i;
4879 return -1;
4882 static int live_lrc_fixed(void *arg)
4884 struct intel_gt *gt = arg;
4885 struct intel_engine_cs *engine;
4886 enum intel_engine_id id;
4887 int err = 0;
4890 * Check the assumed register offsets match the actual locations in
4891 * the context image.
4894 for_each_engine(engine, gt, id) {
4895 const struct {
4896 u32 reg;
4897 u32 offset;
4898 const char *name;
4899 } tbl[] = {
4901 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4902 CTX_RING_START - 1,
4903 "RING_START"
4906 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4907 CTX_RING_CTL - 1,
4908 "RING_CTL"
4911 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4912 CTX_RING_HEAD - 1,
4913 "RING_HEAD"
4916 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4917 CTX_RING_TAIL - 1,
4918 "RING_TAIL"
4921 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4922 lrc_ring_mi_mode(engine),
4923 "RING_MI_MODE"
4926 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4927 CTX_BB_STATE - 1,
4928 "BB_STATE"
4931 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4932 lrc_ring_wa_bb_per_ctx(engine),
4933 "RING_BB_PER_CTX_PTR"
4936 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4937 lrc_ring_indirect_ptr(engine),
4938 "RING_INDIRECT_CTX_PTR"
4941 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4942 lrc_ring_indirect_offset(engine),
4943 "RING_INDIRECT_CTX_OFFSET"
4946 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4947 CTX_TIMESTAMP - 1,
4948 "RING_CTX_TIMESTAMP"
4951 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4952 lrc_ring_gpr0(engine),
4953 "RING_CS_GPR0"
4956 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4957 lrc_ring_cmd_buf_cctl(engine),
4958 "RING_CMD_BUF_CCTL"
4960 { },
4961 }, *t;
4962 u32 *hw;
4964 if (!engine->default_state)
4965 continue;
4967 hw = shmem_pin_map(engine->default_state);
4968 if (IS_ERR(hw)) {
4969 err = PTR_ERR(hw);
4970 break;
4972 hw += LRC_STATE_OFFSET / sizeof(*hw);
4974 for (t = tbl; t->name; t++) {
4975 int dw = find_offset(hw, t->reg);
4977 if (dw != t->offset) {
4978 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4979 engine->name,
4980 t->name,
4981 t->reg,
4983 t->offset);
4984 err = -EINVAL;
4988 shmem_unpin_map(engine->default_state, hw);
4991 return err;
4994 static int __live_lrc_state(struct intel_engine_cs *engine,
4995 struct i915_vma *scratch)
4997 struct intel_context *ce;
4998 struct i915_request *rq;
4999 struct i915_gem_ww_ctx ww;
5000 enum {
5001 RING_START_IDX = 0,
5002 RING_TAIL_IDX,
5003 MAX_IDX
5005 u32 expected[MAX_IDX];
5006 u32 *cs;
5007 int err;
5008 int n;
5010 ce = intel_context_create(engine);
5011 if (IS_ERR(ce))
5012 return PTR_ERR(ce);
5014 i915_gem_ww_ctx_init(&ww, false);
5015 retry:
5016 err = i915_gem_object_lock(scratch->obj, &ww);
5017 if (!err)
5018 err = intel_context_pin_ww(ce, &ww);
5019 if (err)
5020 goto err_put;
5022 rq = i915_request_create(ce);
5023 if (IS_ERR(rq)) {
5024 err = PTR_ERR(rq);
5025 goto err_unpin;
5028 cs = intel_ring_begin(rq, 4 * MAX_IDX);
5029 if (IS_ERR(cs)) {
5030 err = PTR_ERR(cs);
5031 i915_request_add(rq);
5032 goto err_unpin;
5035 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5036 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
5037 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
5038 *cs++ = 0;
5040 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
5042 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5043 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
5044 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
5045 *cs++ = 0;
5047 err = i915_request_await_object(rq, scratch->obj, true);
5048 if (!err)
5049 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5051 i915_request_get(rq);
5052 i915_request_add(rq);
5053 if (err)
5054 goto err_rq;
5056 intel_engine_flush_submission(engine);
5057 expected[RING_TAIL_IDX] = ce->ring->tail;
5059 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5060 err = -ETIME;
5061 goto err_rq;
5064 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5065 if (IS_ERR(cs)) {
5066 err = PTR_ERR(cs);
5067 goto err_rq;
5070 for (n = 0; n < MAX_IDX; n++) {
5071 if (cs[n] != expected[n]) {
5072 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
5073 engine->name, n, cs[n], expected[n]);
5074 err = -EINVAL;
5075 break;
5079 i915_gem_object_unpin_map(scratch->obj);
5081 err_rq:
5082 i915_request_put(rq);
5083 err_unpin:
5084 intel_context_unpin(ce);
5085 err_put:
5086 if (err == -EDEADLK) {
5087 err = i915_gem_ww_ctx_backoff(&ww);
5088 if (!err)
5089 goto retry;
5091 i915_gem_ww_ctx_fini(&ww);
5092 intel_context_put(ce);
5093 return err;
5096 static int live_lrc_state(void *arg)
5098 struct intel_gt *gt = arg;
5099 struct intel_engine_cs *engine;
5100 struct i915_vma *scratch;
5101 enum intel_engine_id id;
5102 int err = 0;
5105 * Check the live register state matches what we expect for this
5106 * intel_context.
5109 scratch = create_scratch(gt);
5110 if (IS_ERR(scratch))
5111 return PTR_ERR(scratch);
5113 for_each_engine(engine, gt, id) {
5114 err = __live_lrc_state(engine, scratch);
5115 if (err)
5116 break;
5119 if (igt_flush_test(gt->i915))
5120 err = -EIO;
5122 i915_vma_unpin_and_release(&scratch, 0);
5123 return err;
5126 static int gpr_make_dirty(struct intel_context *ce)
5128 struct i915_request *rq;
5129 u32 *cs;
5130 int n;
5132 rq = intel_context_create_request(ce);
5133 if (IS_ERR(rq))
5134 return PTR_ERR(rq);
5136 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
5137 if (IS_ERR(cs)) {
5138 i915_request_add(rq);
5139 return PTR_ERR(cs);
5142 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
5143 for (n = 0; n < NUM_GPR_DW; n++) {
5144 *cs++ = CS_GPR(ce->engine, n);
5145 *cs++ = STACK_MAGIC;
5147 *cs++ = MI_NOOP;
5149 intel_ring_advance(rq, cs);
5151 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5152 i915_request_add(rq);
5154 return 0;
5157 static struct i915_request *
5158 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
5160 const u32 offset =
5161 i915_ggtt_offset(ce->engine->status_page.vma) +
5162 offset_in_page(slot);
5163 struct i915_request *rq;
5164 u32 *cs;
5165 int err;
5166 int n;
5168 rq = intel_context_create_request(ce);
5169 if (IS_ERR(rq))
5170 return rq;
5172 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
5173 if (IS_ERR(cs)) {
5174 i915_request_add(rq);
5175 return ERR_CAST(cs);
5178 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5179 *cs++ = MI_NOOP;
5181 *cs++ = MI_SEMAPHORE_WAIT |
5182 MI_SEMAPHORE_GLOBAL_GTT |
5183 MI_SEMAPHORE_POLL |
5184 MI_SEMAPHORE_SAD_NEQ_SDD;
5185 *cs++ = 0;
5186 *cs++ = offset;
5187 *cs++ = 0;
5189 for (n = 0; n < NUM_GPR_DW; n++) {
5190 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5191 *cs++ = CS_GPR(ce->engine, n);
5192 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
5193 *cs++ = 0;
5196 i915_vma_lock(scratch);
5197 err = i915_request_await_object(rq, scratch->obj, true);
5198 if (!err)
5199 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5200 i915_vma_unlock(scratch);
5202 i915_request_get(rq);
5203 i915_request_add(rq);
5204 if (err) {
5205 i915_request_put(rq);
5206 rq = ERR_PTR(err);
5209 return rq;
5212 static int __live_lrc_gpr(struct intel_engine_cs *engine,
5213 struct i915_vma *scratch,
5214 bool preempt)
5216 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
5217 struct intel_context *ce;
5218 struct i915_request *rq;
5219 u32 *cs;
5220 int err;
5221 int n;
5223 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
5224 return 0; /* GPR only on rcs0 for gen8 */
5226 err = gpr_make_dirty(engine->kernel_context);
5227 if (err)
5228 return err;
5230 ce = intel_context_create(engine);
5231 if (IS_ERR(ce))
5232 return PTR_ERR(ce);
5234 rq = __gpr_read(ce, scratch, slot);
5235 if (IS_ERR(rq)) {
5236 err = PTR_ERR(rq);
5237 goto err_put;
5240 err = wait_for_submit(engine, rq, HZ / 2);
5241 if (err)
5242 goto err_rq;
5244 if (preempt) {
5245 err = gpr_make_dirty(engine->kernel_context);
5246 if (err)
5247 goto err_rq;
5249 err = emit_semaphore_signal(engine->kernel_context, slot);
5250 if (err)
5251 goto err_rq;
5252 } else {
5253 slot[0] = 1;
5254 wmb();
5257 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5258 err = -ETIME;
5259 goto err_rq;
5262 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5263 if (IS_ERR(cs)) {
5264 err = PTR_ERR(cs);
5265 goto err_rq;
5268 for (n = 0; n < NUM_GPR_DW; n++) {
5269 if (cs[n]) {
5270 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
5271 engine->name,
5272 n / 2, n & 1 ? "udw" : "ldw",
5273 cs[n]);
5274 err = -EINVAL;
5275 break;
5279 i915_gem_object_unpin_map(scratch->obj);
5281 err_rq:
5282 memset32(&slot[0], -1, 4);
5283 wmb();
5284 i915_request_put(rq);
5285 err_put:
5286 intel_context_put(ce);
5287 return err;
5290 static int live_lrc_gpr(void *arg)
5292 struct intel_gt *gt = arg;
5293 struct intel_engine_cs *engine;
5294 struct i915_vma *scratch;
5295 enum intel_engine_id id;
5296 int err = 0;
5299 * Check that GPR registers are cleared in new contexts as we need
5300 * to avoid leaking any information from previous contexts.
5303 scratch = create_scratch(gt);
5304 if (IS_ERR(scratch))
5305 return PTR_ERR(scratch);
5307 for_each_engine(engine, gt, id) {
5308 st_engine_heartbeat_disable(engine);
5310 err = __live_lrc_gpr(engine, scratch, false);
5311 if (err)
5312 goto err;
5314 err = __live_lrc_gpr(engine, scratch, true);
5315 if (err)
5316 goto err;
5318 err:
5319 st_engine_heartbeat_enable(engine);
5320 if (igt_flush_test(gt->i915))
5321 err = -EIO;
5322 if (err)
5323 break;
5326 i915_vma_unpin_and_release(&scratch, 0);
5327 return err;
5330 static struct i915_request *
5331 create_timestamp(struct intel_context *ce, void *slot, int idx)
5333 const u32 offset =
5334 i915_ggtt_offset(ce->engine->status_page.vma) +
5335 offset_in_page(slot);
5336 struct i915_request *rq;
5337 u32 *cs;
5338 int err;
5340 rq = intel_context_create_request(ce);
5341 if (IS_ERR(rq))
5342 return rq;
5344 cs = intel_ring_begin(rq, 10);
5345 if (IS_ERR(cs)) {
5346 err = PTR_ERR(cs);
5347 goto err;
5350 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5351 *cs++ = MI_NOOP;
5353 *cs++ = MI_SEMAPHORE_WAIT |
5354 MI_SEMAPHORE_GLOBAL_GTT |
5355 MI_SEMAPHORE_POLL |
5356 MI_SEMAPHORE_SAD_NEQ_SDD;
5357 *cs++ = 0;
5358 *cs++ = offset;
5359 *cs++ = 0;
5361 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5362 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5363 *cs++ = offset + idx * sizeof(u32);
5364 *cs++ = 0;
5366 intel_ring_advance(rq, cs);
5368 rq->sched.attr.priority = I915_PRIORITY_MASK;
5369 err = 0;
5370 err:
5371 i915_request_get(rq);
5372 i915_request_add(rq);
5373 if (err) {
5374 i915_request_put(rq);
5375 return ERR_PTR(err);
5378 return rq;
5381 struct lrc_timestamp {
5382 struct intel_engine_cs *engine;
5383 struct intel_context *ce[2];
5384 u32 poison;
5387 static bool timestamp_advanced(u32 start, u32 end)
5389 return (s32)(end - start) > 0;
5392 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5394 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5395 struct i915_request *rq;
5396 u32 timestamp;
5397 int err = 0;
5399 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5400 rq = create_timestamp(arg->ce[0], slot, 1);
5401 if (IS_ERR(rq))
5402 return PTR_ERR(rq);
5404 err = wait_for_submit(rq->engine, rq, HZ / 2);
5405 if (err)
5406 goto err;
5408 if (preempt) {
5409 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5410 err = emit_semaphore_signal(arg->ce[1], slot);
5411 if (err)
5412 goto err;
5413 } else {
5414 slot[0] = 1;
5415 wmb();
5418 /* And wait for switch to kernel (to save our context to memory) */
5419 err = context_flush(arg->ce[0], HZ / 2);
5420 if (err)
5421 goto err;
5423 if (!timestamp_advanced(arg->poison, slot[1])) {
5424 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5425 arg->engine->name, preempt ? "preempt" : "simple",
5426 arg->poison, slot[1]);
5427 err = -EINVAL;
5430 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5431 if (!timestamp_advanced(slot[1], timestamp)) {
5432 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5433 arg->engine->name, preempt ? "preempt" : "simple",
5434 slot[1], timestamp);
5435 err = -EINVAL;
5438 err:
5439 memset32(slot, -1, 4);
5440 i915_request_put(rq);
5441 return err;
5444 static int live_lrc_timestamp(void *arg)
5446 struct lrc_timestamp data = {};
5447 struct intel_gt *gt = arg;
5448 enum intel_engine_id id;
5449 const u32 poison[] = {
5451 S32_MAX,
5452 (u32)S32_MAX + 1,
5453 U32_MAX,
5457 * We want to verify that the timestamp is saved and restore across
5458 * context switches and is monotonic.
5460 * So we do this with a little bit of LRC poisoning to check various
5461 * boundary conditions, and see what happens if we preempt the context
5462 * with a second request (carrying more poison into the timestamp).
5465 for_each_engine(data.engine, gt, id) {
5466 int i, err = 0;
5468 st_engine_heartbeat_disable(data.engine);
5470 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5471 struct intel_context *tmp;
5473 tmp = intel_context_create(data.engine);
5474 if (IS_ERR(tmp)) {
5475 err = PTR_ERR(tmp);
5476 goto err;
5479 err = intel_context_pin(tmp);
5480 if (err) {
5481 intel_context_put(tmp);
5482 goto err;
5485 data.ce[i] = tmp;
5488 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5489 data.poison = poison[i];
5491 err = __lrc_timestamp(&data, false);
5492 if (err)
5493 break;
5495 err = __lrc_timestamp(&data, true);
5496 if (err)
5497 break;
5500 err:
5501 st_engine_heartbeat_enable(data.engine);
5502 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5503 if (!data.ce[i])
5504 break;
5506 intel_context_unpin(data.ce[i]);
5507 intel_context_put(data.ce[i]);
5510 if (igt_flush_test(gt->i915))
5511 err = -EIO;
5512 if (err)
5513 return err;
5516 return 0;
5519 static struct i915_vma *
5520 create_user_vma(struct i915_address_space *vm, unsigned long size)
5522 struct drm_i915_gem_object *obj;
5523 struct i915_vma *vma;
5524 int err;
5526 obj = i915_gem_object_create_internal(vm->i915, size);
5527 if (IS_ERR(obj))
5528 return ERR_CAST(obj);
5530 vma = i915_vma_instance(obj, vm, NULL);
5531 if (IS_ERR(vma)) {
5532 i915_gem_object_put(obj);
5533 return vma;
5536 err = i915_vma_pin(vma, 0, 0, PIN_USER);
5537 if (err) {
5538 i915_gem_object_put(obj);
5539 return ERR_PTR(err);
5542 return vma;
5545 static struct i915_vma *
5546 store_context(struct intel_context *ce, struct i915_vma *scratch)
5548 struct i915_vma *batch;
5549 u32 dw, x, *cs, *hw;
5550 u32 *defaults;
5552 batch = create_user_vma(ce->vm, SZ_64K);
5553 if (IS_ERR(batch))
5554 return batch;
5556 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5557 if (IS_ERR(cs)) {
5558 i915_vma_put(batch);
5559 return ERR_CAST(cs);
5562 defaults = shmem_pin_map(ce->engine->default_state);
5563 if (!defaults) {
5564 i915_gem_object_unpin_map(batch->obj);
5565 i915_vma_put(batch);
5566 return ERR_PTR(-ENOMEM);
5569 x = 0;
5570 dw = 0;
5571 hw = defaults;
5572 hw += LRC_STATE_OFFSET / sizeof(*hw);
5573 do {
5574 u32 len = hw[dw] & 0x7f;
5576 if (hw[dw] == 0) {
5577 dw++;
5578 continue;
5581 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5582 dw += len + 2;
5583 continue;
5586 dw++;
5587 len = (len + 1) / 2;
5588 while (len--) {
5589 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
5590 *cs++ = hw[dw];
5591 *cs++ = lower_32_bits(scratch->node.start + x);
5592 *cs++ = upper_32_bits(scratch->node.start + x);
5594 dw += 2;
5595 x += 4;
5597 } while (dw < PAGE_SIZE / sizeof(u32) &&
5598 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5600 *cs++ = MI_BATCH_BUFFER_END;
5602 shmem_unpin_map(ce->engine->default_state, defaults);
5604 i915_gem_object_flush_map(batch->obj);
5605 i915_gem_object_unpin_map(batch->obj);
5607 return batch;
5610 static int move_to_active(struct i915_request *rq,
5611 struct i915_vma *vma,
5612 unsigned int flags)
5614 int err;
5616 i915_vma_lock(vma);
5617 err = i915_request_await_object(rq, vma->obj, flags);
5618 if (!err)
5619 err = i915_vma_move_to_active(vma, rq, flags);
5620 i915_vma_unlock(vma);
5622 return err;
5625 static struct i915_request *
5626 record_registers(struct intel_context *ce,
5627 struct i915_vma *before,
5628 struct i915_vma *after,
5629 u32 *sema)
5631 struct i915_vma *b_before, *b_after;
5632 struct i915_request *rq;
5633 u32 *cs;
5634 int err;
5636 b_before = store_context(ce, before);
5637 if (IS_ERR(b_before))
5638 return ERR_CAST(b_before);
5640 b_after = store_context(ce, after);
5641 if (IS_ERR(b_after)) {
5642 rq = ERR_CAST(b_after);
5643 goto err_before;
5646 rq = intel_context_create_request(ce);
5647 if (IS_ERR(rq))
5648 goto err_after;
5650 err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5651 if (err)
5652 goto err_rq;
5654 err = move_to_active(rq, b_before, 0);
5655 if (err)
5656 goto err_rq;
5658 err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5659 if (err)
5660 goto err_rq;
5662 err = move_to_active(rq, b_after, 0);
5663 if (err)
5664 goto err_rq;
5666 cs = intel_ring_begin(rq, 14);
5667 if (IS_ERR(cs)) {
5668 err = PTR_ERR(cs);
5669 goto err_rq;
5672 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5673 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5674 *cs++ = lower_32_bits(b_before->node.start);
5675 *cs++ = upper_32_bits(b_before->node.start);
5677 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5678 *cs++ = MI_SEMAPHORE_WAIT |
5679 MI_SEMAPHORE_GLOBAL_GTT |
5680 MI_SEMAPHORE_POLL |
5681 MI_SEMAPHORE_SAD_NEQ_SDD;
5682 *cs++ = 0;
5683 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5684 offset_in_page(sema);
5685 *cs++ = 0;
5686 *cs++ = MI_NOOP;
5688 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5689 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5690 *cs++ = lower_32_bits(b_after->node.start);
5691 *cs++ = upper_32_bits(b_after->node.start);
5693 intel_ring_advance(rq, cs);
5695 WRITE_ONCE(*sema, 0);
5696 i915_request_get(rq);
5697 i915_request_add(rq);
5698 err_after:
5699 i915_vma_put(b_after);
5700 err_before:
5701 i915_vma_put(b_before);
5702 return rq;
5704 err_rq:
5705 i915_request_add(rq);
5706 rq = ERR_PTR(err);
5707 goto err_after;
5710 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5712 struct i915_vma *batch;
5713 u32 dw, *cs, *hw;
5714 u32 *defaults;
5716 batch = create_user_vma(ce->vm, SZ_64K);
5717 if (IS_ERR(batch))
5718 return batch;
5720 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5721 if (IS_ERR(cs)) {
5722 i915_vma_put(batch);
5723 return ERR_CAST(cs);
5726 defaults = shmem_pin_map(ce->engine->default_state);
5727 if (!defaults) {
5728 i915_gem_object_unpin_map(batch->obj);
5729 i915_vma_put(batch);
5730 return ERR_PTR(-ENOMEM);
5733 dw = 0;
5734 hw = defaults;
5735 hw += LRC_STATE_OFFSET / sizeof(*hw);
5736 do {
5737 u32 len = hw[dw] & 0x7f;
5739 if (hw[dw] == 0) {
5740 dw++;
5741 continue;
5744 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5745 dw += len + 2;
5746 continue;
5749 dw++;
5750 len = (len + 1) / 2;
5751 *cs++ = MI_LOAD_REGISTER_IMM(len);
5752 while (len--) {
5753 *cs++ = hw[dw];
5754 *cs++ = poison;
5755 dw += 2;
5757 } while (dw < PAGE_SIZE / sizeof(u32) &&
5758 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5760 *cs++ = MI_BATCH_BUFFER_END;
5762 shmem_unpin_map(ce->engine->default_state, defaults);
5764 i915_gem_object_flush_map(batch->obj);
5765 i915_gem_object_unpin_map(batch->obj);
5767 return batch;
5770 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5772 struct i915_request *rq;
5773 struct i915_vma *batch;
5774 u32 *cs;
5775 int err;
5777 batch = load_context(ce, poison);
5778 if (IS_ERR(batch))
5779 return PTR_ERR(batch);
5781 rq = intel_context_create_request(ce);
5782 if (IS_ERR(rq)) {
5783 err = PTR_ERR(rq);
5784 goto err_batch;
5787 err = move_to_active(rq, batch, 0);
5788 if (err)
5789 goto err_rq;
5791 cs = intel_ring_begin(rq, 8);
5792 if (IS_ERR(cs)) {
5793 err = PTR_ERR(cs);
5794 goto err_rq;
5797 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5798 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5799 *cs++ = lower_32_bits(batch->node.start);
5800 *cs++ = upper_32_bits(batch->node.start);
5802 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5803 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5804 offset_in_page(sema);
5805 *cs++ = 0;
5806 *cs++ = 1;
5808 intel_ring_advance(rq, cs);
5810 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5811 err_rq:
5812 i915_request_add(rq);
5813 err_batch:
5814 i915_vma_put(batch);
5815 return err;
5818 static bool is_moving(u32 a, u32 b)
5820 return a != b;
5823 static int compare_isolation(struct intel_engine_cs *engine,
5824 struct i915_vma *ref[2],
5825 struct i915_vma *result[2],
5826 struct intel_context *ce,
5827 u32 poison)
5829 u32 x, dw, *hw, *lrc;
5830 u32 *A[2], *B[2];
5831 u32 *defaults;
5832 int err = 0;
5834 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5835 if (IS_ERR(A[0]))
5836 return PTR_ERR(A[0]);
5838 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5839 if (IS_ERR(A[1])) {
5840 err = PTR_ERR(A[1]);
5841 goto err_A0;
5844 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5845 if (IS_ERR(B[0])) {
5846 err = PTR_ERR(B[0]);
5847 goto err_A1;
5850 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5851 if (IS_ERR(B[1])) {
5852 err = PTR_ERR(B[1]);
5853 goto err_B0;
5856 lrc = i915_gem_object_pin_map(ce->state->obj,
5857 i915_coherent_map_type(engine->i915));
5858 if (IS_ERR(lrc)) {
5859 err = PTR_ERR(lrc);
5860 goto err_B1;
5862 lrc += LRC_STATE_OFFSET / sizeof(*hw);
5864 defaults = shmem_pin_map(ce->engine->default_state);
5865 if (!defaults) {
5866 err = -ENOMEM;
5867 goto err_lrc;
5870 x = 0;
5871 dw = 0;
5872 hw = defaults;
5873 hw += LRC_STATE_OFFSET / sizeof(*hw);
5874 do {
5875 u32 len = hw[dw] & 0x7f;
5877 if (hw[dw] == 0) {
5878 dw++;
5879 continue;
5882 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5883 dw += len + 2;
5884 continue;
5887 dw++;
5888 len = (len + 1) / 2;
5889 while (len--) {
5890 if (!is_moving(A[0][x], A[1][x]) &&
5891 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5892 switch (hw[dw] & 4095) {
5893 case 0x30: /* RING_HEAD */
5894 case 0x34: /* RING_TAIL */
5895 break;
5897 default:
5898 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5899 engine->name, dw,
5900 hw[dw], hw[dw + 1],
5901 A[0][x], B[0][x], B[1][x],
5902 poison, lrc[dw + 1]);
5903 err = -EINVAL;
5906 dw += 2;
5907 x++;
5909 } while (dw < PAGE_SIZE / sizeof(u32) &&
5910 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5912 shmem_unpin_map(ce->engine->default_state, defaults);
5913 err_lrc:
5914 i915_gem_object_unpin_map(ce->state->obj);
5915 err_B1:
5916 i915_gem_object_unpin_map(result[1]->obj);
5917 err_B0:
5918 i915_gem_object_unpin_map(result[0]->obj);
5919 err_A1:
5920 i915_gem_object_unpin_map(ref[1]->obj);
5921 err_A0:
5922 i915_gem_object_unpin_map(ref[0]->obj);
5923 return err;
5926 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5928 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5929 struct i915_vma *ref[2], *result[2];
5930 struct intel_context *A, *B;
5931 struct i915_request *rq;
5932 int err;
5934 A = intel_context_create(engine);
5935 if (IS_ERR(A))
5936 return PTR_ERR(A);
5938 B = intel_context_create(engine);
5939 if (IS_ERR(B)) {
5940 err = PTR_ERR(B);
5941 goto err_A;
5944 ref[0] = create_user_vma(A->vm, SZ_64K);
5945 if (IS_ERR(ref[0])) {
5946 err = PTR_ERR(ref[0]);
5947 goto err_B;
5950 ref[1] = create_user_vma(A->vm, SZ_64K);
5951 if (IS_ERR(ref[1])) {
5952 err = PTR_ERR(ref[1]);
5953 goto err_ref0;
5956 rq = record_registers(A, ref[0], ref[1], sema);
5957 if (IS_ERR(rq)) {
5958 err = PTR_ERR(rq);
5959 goto err_ref1;
5962 WRITE_ONCE(*sema, 1);
5963 wmb();
5965 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5966 i915_request_put(rq);
5967 err = -ETIME;
5968 goto err_ref1;
5970 i915_request_put(rq);
5972 result[0] = create_user_vma(A->vm, SZ_64K);
5973 if (IS_ERR(result[0])) {
5974 err = PTR_ERR(result[0]);
5975 goto err_ref1;
5978 result[1] = create_user_vma(A->vm, SZ_64K);
5979 if (IS_ERR(result[1])) {
5980 err = PTR_ERR(result[1]);
5981 goto err_result0;
5984 rq = record_registers(A, result[0], result[1], sema);
5985 if (IS_ERR(rq)) {
5986 err = PTR_ERR(rq);
5987 goto err_result1;
5990 err = poison_registers(B, poison, sema);
5991 if (err) {
5992 WRITE_ONCE(*sema, -1);
5993 i915_request_put(rq);
5994 goto err_result1;
5997 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5998 i915_request_put(rq);
5999 err = -ETIME;
6000 goto err_result1;
6002 i915_request_put(rq);
6004 err = compare_isolation(engine, ref, result, A, poison);
6006 err_result1:
6007 i915_vma_put(result[1]);
6008 err_result0:
6009 i915_vma_put(result[0]);
6010 err_ref1:
6011 i915_vma_put(ref[1]);
6012 err_ref0:
6013 i915_vma_put(ref[0]);
6014 err_B:
6015 intel_context_put(B);
6016 err_A:
6017 intel_context_put(A);
6018 return err;
6021 static bool skip_isolation(const struct intel_engine_cs *engine)
6023 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
6024 return true;
6026 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
6027 return true;
6029 return false;
6032 static int live_lrc_isolation(void *arg)
6034 struct intel_gt *gt = arg;
6035 struct intel_engine_cs *engine;
6036 enum intel_engine_id id;
6037 const u32 poison[] = {
6038 STACK_MAGIC,
6039 0x3a3a3a3a,
6040 0x5c5c5c5c,
6041 0xffffffff,
6042 0xffff0000,
6044 int err = 0;
6047 * Our goal is try and verify that per-context state cannot be
6048 * tampered with by another non-privileged client.
6050 * We take the list of context registers from the LRI in the default
6051 * context image and attempt to modify that list from a remote context.
6054 for_each_engine(engine, gt, id) {
6055 int i;
6057 /* Just don't even ask */
6058 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
6059 skip_isolation(engine))
6060 continue;
6062 intel_engine_pm_get(engine);
6063 for (i = 0; i < ARRAY_SIZE(poison); i++) {
6064 int result;
6066 result = __lrc_isolation(engine, poison[i]);
6067 if (result && !err)
6068 err = result;
6070 result = __lrc_isolation(engine, ~poison[i]);
6071 if (result && !err)
6072 err = result;
6074 intel_engine_pm_put(engine);
6075 if (igt_flush_test(gt->i915)) {
6076 err = -EIO;
6077 break;
6081 return err;
6084 static int indirect_ctx_submit_req(struct intel_context *ce)
6086 struct i915_request *rq;
6087 int err = 0;
6089 rq = intel_context_create_request(ce);
6090 if (IS_ERR(rq))
6091 return PTR_ERR(rq);
6093 i915_request_get(rq);
6094 i915_request_add(rq);
6096 if (i915_request_wait(rq, 0, HZ / 5) < 0)
6097 err = -ETIME;
6099 i915_request_put(rq);
6101 return err;
6104 #define CTX_BB_CANARY_OFFSET (3 * 1024)
6105 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
6107 static u32 *
6108 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
6110 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
6111 MI_SRM_LRM_GLOBAL_GTT |
6112 MI_LRI_LRM_CS_MMIO;
6113 *cs++ = i915_mmio_reg_offset(RING_START(0));
6114 *cs++ = i915_ggtt_offset(ce->state) +
6115 context_wa_bb_offset(ce) +
6116 CTX_BB_CANARY_OFFSET;
6117 *cs++ = 0;
6119 return cs;
6122 static void
6123 indirect_ctx_bb_setup(struct intel_context *ce)
6125 u32 *cs = context_indirect_bb(ce);
6127 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
6129 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
6132 static bool check_ring_start(struct intel_context *ce)
6134 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
6135 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
6137 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
6138 return true;
6140 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
6141 ctx_bb[CTX_BB_CANARY_INDEX],
6142 ce->lrc_reg_state[CTX_RING_START]);
6144 return false;
6147 static int indirect_ctx_bb_check(struct intel_context *ce)
6149 int err;
6151 err = indirect_ctx_submit_req(ce);
6152 if (err)
6153 return err;
6155 if (!check_ring_start(ce))
6156 return -EINVAL;
6158 return 0;
6161 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
6163 struct intel_context *a, *b;
6164 int err;
6166 a = intel_context_create(engine);
6167 if (IS_ERR(a))
6168 return PTR_ERR(a);
6169 err = intel_context_pin(a);
6170 if (err)
6171 goto put_a;
6173 b = intel_context_create(engine);
6174 if (IS_ERR(b)) {
6175 err = PTR_ERR(b);
6176 goto unpin_a;
6178 err = intel_context_pin(b);
6179 if (err)
6180 goto put_b;
6182 /* We use the already reserved extra page in context state */
6183 if (!a->wa_bb_page) {
6184 GEM_BUG_ON(b->wa_bb_page);
6185 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
6186 goto unpin_b;
6190 * In order to test that our per context bb is truly per context,
6191 * and executes at the intended spot on context restoring process,
6192 * make the batch store the ring start value to memory.
6193 * As ring start is restored apriori of starting the indirect ctx bb and
6194 * as it will be different for each context, it fits to this purpose.
6196 indirect_ctx_bb_setup(a);
6197 indirect_ctx_bb_setup(b);
6199 err = indirect_ctx_bb_check(a);
6200 if (err)
6201 goto unpin_b;
6203 err = indirect_ctx_bb_check(b);
6205 unpin_b:
6206 intel_context_unpin(b);
6207 put_b:
6208 intel_context_put(b);
6209 unpin_a:
6210 intel_context_unpin(a);
6211 put_a:
6212 intel_context_put(a);
6214 return err;
6217 static int live_lrc_indirect_ctx_bb(void *arg)
6219 struct intel_gt *gt = arg;
6220 struct intel_engine_cs *engine;
6221 enum intel_engine_id id;
6222 int err = 0;
6224 for_each_engine(engine, gt, id) {
6225 intel_engine_pm_get(engine);
6226 err = __live_lrc_indirect_ctx_bb(engine);
6227 intel_engine_pm_put(engine);
6229 if (igt_flush_test(gt->i915))
6230 err = -EIO;
6232 if (err)
6233 break;
6236 return err;
6239 static void garbage_reset(struct intel_engine_cs *engine,
6240 struct i915_request *rq)
6242 const unsigned int bit = I915_RESET_ENGINE + engine->id;
6243 unsigned long *lock = &engine->gt->reset.flags;
6245 if (test_and_set_bit(bit, lock))
6246 return;
6248 tasklet_disable(&engine->execlists.tasklet);
6250 if (!rq->fence.error)
6251 intel_engine_reset(engine, NULL);
6253 tasklet_enable(&engine->execlists.tasklet);
6254 clear_and_wake_up_bit(bit, lock);
6257 static struct i915_request *garbage(struct intel_context *ce,
6258 struct rnd_state *prng)
6260 struct i915_request *rq;
6261 int err;
6263 err = intel_context_pin(ce);
6264 if (err)
6265 return ERR_PTR(err);
6267 prandom_bytes_state(prng,
6268 ce->lrc_reg_state,
6269 ce->engine->context_size -
6270 LRC_STATE_OFFSET);
6272 rq = intel_context_create_request(ce);
6273 if (IS_ERR(rq)) {
6274 err = PTR_ERR(rq);
6275 goto err_unpin;
6278 i915_request_get(rq);
6279 i915_request_add(rq);
6280 return rq;
6282 err_unpin:
6283 intel_context_unpin(ce);
6284 return ERR_PTR(err);
6287 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6289 struct intel_context *ce;
6290 struct i915_request *hang;
6291 int err = 0;
6293 ce = intel_context_create(engine);
6294 if (IS_ERR(ce))
6295 return PTR_ERR(ce);
6297 hang = garbage(ce, prng);
6298 if (IS_ERR(hang)) {
6299 err = PTR_ERR(hang);
6300 goto err_ce;
6303 if (wait_for_submit(engine, hang, HZ / 2)) {
6304 i915_request_put(hang);
6305 err = -ETIME;
6306 goto err_ce;
6309 intel_context_set_banned(ce);
6310 garbage_reset(engine, hang);
6312 intel_engine_flush_submission(engine);
6313 if (!hang->fence.error) {
6314 i915_request_put(hang);
6315 pr_err("%s: corrupted context was not reset\n",
6316 engine->name);
6317 err = -EINVAL;
6318 goto err_ce;
6321 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6322 pr_err("%s: corrupted context did not recover\n",
6323 engine->name);
6324 i915_request_put(hang);
6325 err = -EIO;
6326 goto err_ce;
6328 i915_request_put(hang);
6330 err_ce:
6331 intel_context_put(ce);
6332 return err;
6335 static int live_lrc_garbage(void *arg)
6337 struct intel_gt *gt = arg;
6338 struct intel_engine_cs *engine;
6339 enum intel_engine_id id;
6342 * Verify that we can recover if one context state is completely
6343 * corrupted.
6346 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6347 return 0;
6349 for_each_engine(engine, gt, id) {
6350 I915_RND_STATE(prng);
6351 int err = 0, i;
6353 if (!intel_has_reset_engine(engine->gt))
6354 continue;
6356 intel_engine_pm_get(engine);
6357 for (i = 0; i < 3; i++) {
6358 err = __lrc_garbage(engine, &prng);
6359 if (err)
6360 break;
6362 intel_engine_pm_put(engine);
6364 if (igt_flush_test(gt->i915))
6365 err = -EIO;
6366 if (err)
6367 return err;
6370 return 0;
6373 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6375 struct intel_context *ce;
6376 struct i915_request *rq;
6377 IGT_TIMEOUT(end_time);
6378 int err;
6380 ce = intel_context_create(engine);
6381 if (IS_ERR(ce))
6382 return PTR_ERR(ce);
6384 ce->runtime.num_underflow = 0;
6385 ce->runtime.max_underflow = 0;
6387 do {
6388 unsigned int loop = 1024;
6390 while (loop) {
6391 rq = intel_context_create_request(ce);
6392 if (IS_ERR(rq)) {
6393 err = PTR_ERR(rq);
6394 goto err_rq;
6397 if (--loop == 0)
6398 i915_request_get(rq);
6400 i915_request_add(rq);
6403 if (__igt_timeout(end_time, NULL))
6404 break;
6406 i915_request_put(rq);
6407 } while (1);
6409 err = i915_request_wait(rq, 0, HZ / 5);
6410 if (err < 0) {
6411 pr_err("%s: request not completed!\n", engine->name);
6412 goto err_wait;
6415 igt_flush_test(engine->i915);
6417 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6418 engine->name,
6419 intel_context_get_total_runtime_ns(ce),
6420 intel_context_get_avg_runtime_ns(ce));
6422 err = 0;
6423 if (ce->runtime.num_underflow) {
6424 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6425 engine->name,
6426 ce->runtime.num_underflow,
6427 ce->runtime.max_underflow);
6428 GEM_TRACE_DUMP();
6429 err = -EOVERFLOW;
6432 err_wait:
6433 i915_request_put(rq);
6434 err_rq:
6435 intel_context_put(ce);
6436 return err;
6439 static int live_pphwsp_runtime(void *arg)
6441 struct intel_gt *gt = arg;
6442 struct intel_engine_cs *engine;
6443 enum intel_engine_id id;
6444 int err = 0;
6447 * Check that cumulative context runtime as stored in the pphwsp[16]
6448 * is monotonic.
6451 for_each_engine(engine, gt, id) {
6452 err = __live_pphwsp_runtime(engine);
6453 if (err)
6454 break;
6457 if (igt_flush_test(gt->i915))
6458 err = -EIO;
6460 return err;
6463 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6465 static const struct i915_subtest tests[] = {
6466 SUBTEST(live_lrc_layout),
6467 SUBTEST(live_lrc_fixed),
6468 SUBTEST(live_lrc_state),
6469 SUBTEST(live_lrc_gpr),
6470 SUBTEST(live_lrc_isolation),
6471 SUBTEST(live_lrc_timestamp),
6472 SUBTEST(live_lrc_garbage),
6473 SUBTEST(live_pphwsp_runtime),
6474 SUBTEST(live_lrc_indirect_ctx_bb),
6477 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6478 return 0;
6480 return intel_gt_live_subtests(tests, &i915->gt);