WIP FPC-III support
[linux/fpc-iii.git] / drivers / gpu / drm / i915 / gt / selftest_timeline.c
blob2edf2b15885f7c72e99ef5e71f457bdedbb54158
1 /*
2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017-2018 Intel Corporation
5 */
7 #include <linux/prime_numbers.h>
9 #include "intel_context.h"
10 #include "intel_engine_heartbeat.h"
11 #include "intel_engine_pm.h"
12 #include "intel_gt.h"
13 #include "intel_gt_requests.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
17 #include "../selftests/i915_random.h"
18 #include "../i915_selftest.h"
20 #include "selftests/igt_flush_test.h"
21 #include "selftests/lib_sw_fence.h"
22 #include "selftests/mock_gem_device.h"
23 #include "selftests/mock_timeline.h"
25 static struct page *hwsp_page(struct intel_timeline *tl)
27 struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
29 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
30 return sg_page(obj->mm.pages->sgl);
33 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
35 unsigned long address = (unsigned long)page_address(hwsp_page(tl));
37 return (address + tl->hwsp_offset) / CACHELINE_BYTES;
40 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
42 struct mock_hwsp_freelist {
43 struct intel_gt *gt;
44 struct radix_tree_root cachelines;
45 struct intel_timeline **history;
46 unsigned long count, max;
47 struct rnd_state prng;
50 enum {
51 SHUFFLE = BIT(0),
54 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
55 unsigned int idx,
56 struct intel_timeline *tl)
58 tl = xchg(&state->history[idx], tl);
59 if (tl) {
60 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
61 intel_timeline_put(tl);
65 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
66 unsigned int count,
67 unsigned int flags)
69 struct intel_timeline *tl;
70 unsigned int idx;
72 while (count--) {
73 unsigned long cacheline;
74 int err;
76 tl = intel_timeline_create(state->gt);
77 if (IS_ERR(tl))
78 return PTR_ERR(tl);
80 cacheline = hwsp_cacheline(tl);
81 err = radix_tree_insert(&state->cachelines, cacheline, tl);
82 if (err) {
83 if (err == -EEXIST) {
84 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
85 cacheline);
87 intel_timeline_put(tl);
88 return err;
91 idx = state->count++ % state->max;
92 __mock_hwsp_record(state, idx, tl);
95 if (flags & SHUFFLE)
96 i915_prandom_shuffle(state->history,
97 sizeof(*state->history),
98 min(state->count, state->max),
99 &state->prng);
101 count = i915_prandom_u32_max_state(min(state->count, state->max),
102 &state->prng);
103 while (count--) {
104 idx = --state->count % state->max;
105 __mock_hwsp_record(state, idx, NULL);
108 return 0;
111 static int mock_hwsp_freelist(void *arg)
113 struct mock_hwsp_freelist state;
114 struct drm_i915_private *i915;
115 const struct {
116 const char *name;
117 unsigned int flags;
118 } phases[] = {
119 { "linear", 0 },
120 { "shuffled", SHUFFLE },
121 { },
122 }, *p;
123 unsigned int na;
124 int err = 0;
126 i915 = mock_gem_device();
127 if (!i915)
128 return -ENOMEM;
130 INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
131 state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
133 state.gt = &i915->gt;
136 * Create a bunch of timelines and check that their HWSP do not overlap.
137 * Free some, and try again.
140 state.max = PAGE_SIZE / sizeof(*state.history);
141 state.count = 0;
142 state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
143 if (!state.history) {
144 err = -ENOMEM;
145 goto err_put;
148 for (p = phases; p->name; p++) {
149 pr_debug("%s(%s)\n", __func__, p->name);
150 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
151 err = __mock_hwsp_timeline(&state, na, p->flags);
152 if (err)
153 goto out;
157 out:
158 for (na = 0; na < state.max; na++)
159 __mock_hwsp_record(&state, na, NULL);
160 kfree(state.history);
161 err_put:
162 mock_destroy_device(i915);
163 return err;
166 struct __igt_sync {
167 const char *name;
168 u32 seqno;
169 bool expected;
170 bool set;
173 static int __igt_sync(struct intel_timeline *tl,
174 u64 ctx,
175 const struct __igt_sync *p,
176 const char *name)
178 int ret;
180 if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
181 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
182 name, p->name, ctx, p->seqno, yesno(p->expected));
183 return -EINVAL;
186 if (p->set) {
187 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
188 if (ret)
189 return ret;
192 return 0;
195 static int igt_sync(void *arg)
197 const struct __igt_sync pass[] = {
198 { "unset", 0, false, false },
199 { "new", 0, false, true },
200 { "0a", 0, true, true },
201 { "1a", 1, false, true },
202 { "1b", 1, true, true },
203 { "0b", 0, true, false },
204 { "2a", 2, false, true },
205 { "4", 4, false, true },
206 { "INT_MAX", INT_MAX, false, true },
207 { "INT_MAX-1", INT_MAX-1, true, false },
208 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
209 { "INT_MAX", INT_MAX, true, false },
210 { "UINT_MAX", UINT_MAX, false, true },
211 { "wrap", 0, false, true },
212 { "unwrap", UINT_MAX, true, false },
214 }, *p;
215 struct intel_timeline tl;
216 int order, offset;
217 int ret = -ENODEV;
219 mock_timeline_init(&tl, 0);
220 for (p = pass; p->name; p++) {
221 for (order = 1; order < 64; order++) {
222 for (offset = -1; offset <= (order > 1); offset++) {
223 u64 ctx = BIT_ULL(order) + offset;
225 ret = __igt_sync(&tl, ctx, p, "1");
226 if (ret)
227 goto out;
231 mock_timeline_fini(&tl);
233 mock_timeline_init(&tl, 0);
234 for (order = 1; order < 64; order++) {
235 for (offset = -1; offset <= (order > 1); offset++) {
236 u64 ctx = BIT_ULL(order) + offset;
238 for (p = pass; p->name; p++) {
239 ret = __igt_sync(&tl, ctx, p, "2");
240 if (ret)
241 goto out;
246 out:
247 mock_timeline_fini(&tl);
248 return ret;
251 static unsigned int random_engine(struct rnd_state *rnd)
253 return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
256 static int bench_sync(void *arg)
258 struct rnd_state prng;
259 struct intel_timeline tl;
260 unsigned long end_time, count;
261 u64 prng32_1M;
262 ktime_t kt;
263 int order, last_order;
265 mock_timeline_init(&tl, 0);
267 /* Lookups from cache are very fast and so the random number generation
268 * and the loop itself becomes a significant factor in the per-iteration
269 * timings. We try to compensate the results by measuring the overhead
270 * of the prng and subtract it from the reported results.
272 prandom_seed_state(&prng, i915_selftest.random_seed);
273 count = 0;
274 kt = ktime_get();
275 end_time = jiffies + HZ/10;
276 do {
277 u32 x;
279 /* Make sure the compiler doesn't optimise away the prng call */
280 WRITE_ONCE(x, prandom_u32_state(&prng));
282 count++;
283 } while (!time_after(jiffies, end_time));
284 kt = ktime_sub(ktime_get(), kt);
285 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
286 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
287 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
289 /* Benchmark (only) setting random context ids */
290 prandom_seed_state(&prng, i915_selftest.random_seed);
291 count = 0;
292 kt = ktime_get();
293 end_time = jiffies + HZ/10;
294 do {
295 u64 id = i915_prandom_u64_state(&prng);
297 __intel_timeline_sync_set(&tl, id, 0);
298 count++;
299 } while (!time_after(jiffies, end_time));
300 kt = ktime_sub(ktime_get(), kt);
301 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
302 pr_info("%s: %lu random insertions, %lluns/insert\n",
303 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
305 /* Benchmark looking up the exact same context ids as we just set */
306 prandom_seed_state(&prng, i915_selftest.random_seed);
307 end_time = count;
308 kt = ktime_get();
309 while (end_time--) {
310 u64 id = i915_prandom_u64_state(&prng);
312 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
313 mock_timeline_fini(&tl);
314 pr_err("Lookup of %llu failed\n", id);
315 return -EINVAL;
318 kt = ktime_sub(ktime_get(), kt);
319 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
320 pr_info("%s: %lu random lookups, %lluns/lookup\n",
321 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
323 mock_timeline_fini(&tl);
324 cond_resched();
326 mock_timeline_init(&tl, 0);
328 /* Benchmark setting the first N (in order) contexts */
329 count = 0;
330 kt = ktime_get();
331 end_time = jiffies + HZ/10;
332 do {
333 __intel_timeline_sync_set(&tl, count++, 0);
334 } while (!time_after(jiffies, end_time));
335 kt = ktime_sub(ktime_get(), kt);
336 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
337 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
339 /* Benchmark looking up the exact same context ids as we just set */
340 end_time = count;
341 kt = ktime_get();
342 while (end_time--) {
343 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
344 pr_err("Lookup of %lu failed\n", end_time);
345 mock_timeline_fini(&tl);
346 return -EINVAL;
349 kt = ktime_sub(ktime_get(), kt);
350 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
351 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
353 mock_timeline_fini(&tl);
354 cond_resched();
356 mock_timeline_init(&tl, 0);
358 /* Benchmark searching for a random context id and maybe changing it */
359 prandom_seed_state(&prng, i915_selftest.random_seed);
360 count = 0;
361 kt = ktime_get();
362 end_time = jiffies + HZ/10;
363 do {
364 u32 id = random_engine(&prng);
365 u32 seqno = prandom_u32_state(&prng);
367 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
368 __intel_timeline_sync_set(&tl, id, seqno);
370 count++;
371 } while (!time_after(jiffies, end_time));
372 kt = ktime_sub(ktime_get(), kt);
373 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
374 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
375 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
376 mock_timeline_fini(&tl);
377 cond_resched();
379 /* Benchmark searching for a known context id and changing the seqno */
380 for (last_order = 1, order = 1; order < 32;
381 ({ int tmp = last_order; last_order = order; order += tmp; })) {
382 unsigned int mask = BIT(order) - 1;
384 mock_timeline_init(&tl, 0);
386 count = 0;
387 kt = ktime_get();
388 end_time = jiffies + HZ/10;
389 do {
390 /* Without assuming too many details of the underlying
391 * implementation, try to identify its phase-changes
392 * (if any)!
394 u64 id = (u64)(count & mask) << order;
396 __intel_timeline_sync_is_later(&tl, id, 0);
397 __intel_timeline_sync_set(&tl, id, 0);
399 count++;
400 } while (!time_after(jiffies, end_time));
401 kt = ktime_sub(ktime_get(), kt);
402 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
403 __func__, count, order,
404 (long long)div64_ul(ktime_to_ns(kt), count));
405 mock_timeline_fini(&tl);
406 cond_resched();
409 return 0;
412 int intel_timeline_mock_selftests(void)
414 static const struct i915_subtest tests[] = {
415 SUBTEST(mock_hwsp_freelist),
416 SUBTEST(igt_sync),
417 SUBTEST(bench_sync),
420 return i915_subtests(tests, NULL);
423 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
425 u32 *cs;
427 cs = intel_ring_begin(rq, 4);
428 if (IS_ERR(cs))
429 return PTR_ERR(cs);
431 if (INTEL_GEN(rq->engine->i915) >= 8) {
432 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
433 *cs++ = addr;
434 *cs++ = 0;
435 *cs++ = value;
436 } else if (INTEL_GEN(rq->engine->i915) >= 4) {
437 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
438 *cs++ = 0;
439 *cs++ = addr;
440 *cs++ = value;
441 } else {
442 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
443 *cs++ = addr;
444 *cs++ = value;
445 *cs++ = MI_NOOP;
448 intel_ring_advance(rq, cs);
450 return 0;
453 static struct i915_request *
454 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
456 struct i915_request *rq;
457 int err;
459 err = intel_timeline_pin(tl, NULL);
460 if (err) {
461 rq = ERR_PTR(err);
462 goto out;
465 rq = intel_engine_create_kernel_request(engine);
466 if (IS_ERR(rq))
467 goto out_unpin;
469 i915_request_get(rq);
471 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
472 i915_request_add(rq);
473 if (err) {
474 i915_request_put(rq);
475 rq = ERR_PTR(err);
478 out_unpin:
479 intel_timeline_unpin(tl);
480 out:
481 if (IS_ERR(rq))
482 pr_err("Failed to write to timeline!\n");
483 return rq;
486 static struct intel_timeline *
487 checked_intel_timeline_create(struct intel_gt *gt)
489 struct intel_timeline *tl;
491 tl = intel_timeline_create(gt);
492 if (IS_ERR(tl))
493 return tl;
495 if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
496 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
497 *tl->hwsp_seqno, tl->seqno);
498 intel_timeline_put(tl);
499 return ERR_PTR(-EINVAL);
502 return tl;
505 static int live_hwsp_engine(void *arg)
507 #define NUM_TIMELINES 4096
508 struct intel_gt *gt = arg;
509 struct intel_timeline **timelines;
510 struct intel_engine_cs *engine;
511 enum intel_engine_id id;
512 unsigned long count, n;
513 int err = 0;
516 * Create a bunch of timelines and check we can write
517 * independently to each of their breadcrumb slots.
520 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
521 sizeof(*timelines),
522 GFP_KERNEL);
523 if (!timelines)
524 return -ENOMEM;
526 count = 0;
527 for_each_engine(engine, gt, id) {
528 if (!intel_engine_can_store_dword(engine))
529 continue;
531 intel_engine_pm_get(engine);
533 for (n = 0; n < NUM_TIMELINES; n++) {
534 struct intel_timeline *tl;
535 struct i915_request *rq;
537 tl = checked_intel_timeline_create(gt);
538 if (IS_ERR(tl)) {
539 err = PTR_ERR(tl);
540 break;
543 rq = tl_write(tl, engine, count);
544 if (IS_ERR(rq)) {
545 intel_timeline_put(tl);
546 err = PTR_ERR(rq);
547 break;
550 timelines[count++] = tl;
551 i915_request_put(rq);
554 intel_engine_pm_put(engine);
555 if (err)
556 break;
559 if (igt_flush_test(gt->i915))
560 err = -EIO;
562 for (n = 0; n < count; n++) {
563 struct intel_timeline *tl = timelines[n];
565 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
566 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
567 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
568 GEM_TRACE_DUMP();
569 err = -EINVAL;
571 intel_timeline_put(tl);
574 kvfree(timelines);
575 return err;
576 #undef NUM_TIMELINES
579 static int live_hwsp_alternate(void *arg)
581 #define NUM_TIMELINES 4096
582 struct intel_gt *gt = arg;
583 struct intel_timeline **timelines;
584 struct intel_engine_cs *engine;
585 enum intel_engine_id id;
586 unsigned long count, n;
587 int err = 0;
590 * Create a bunch of timelines and check we can write
591 * independently to each of their breadcrumb slots with adjacent
592 * engines.
595 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
596 sizeof(*timelines),
597 GFP_KERNEL);
598 if (!timelines)
599 return -ENOMEM;
601 count = 0;
602 for (n = 0; n < NUM_TIMELINES; n++) {
603 for_each_engine(engine, gt, id) {
604 struct intel_timeline *tl;
605 struct i915_request *rq;
607 if (!intel_engine_can_store_dword(engine))
608 continue;
610 tl = checked_intel_timeline_create(gt);
611 if (IS_ERR(tl)) {
612 err = PTR_ERR(tl);
613 goto out;
616 intel_engine_pm_get(engine);
617 rq = tl_write(tl, engine, count);
618 intel_engine_pm_put(engine);
619 if (IS_ERR(rq)) {
620 intel_timeline_put(tl);
621 err = PTR_ERR(rq);
622 goto out;
625 timelines[count++] = tl;
626 i915_request_put(rq);
630 out:
631 if (igt_flush_test(gt->i915))
632 err = -EIO;
634 for (n = 0; n < count; n++) {
635 struct intel_timeline *tl = timelines[n];
637 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
638 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
639 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
640 GEM_TRACE_DUMP();
641 err = -EINVAL;
643 intel_timeline_put(tl);
646 kvfree(timelines);
647 return err;
648 #undef NUM_TIMELINES
651 static int live_hwsp_wrap(void *arg)
653 struct intel_gt *gt = arg;
654 struct intel_engine_cs *engine;
655 struct intel_timeline *tl;
656 enum intel_engine_id id;
657 int err = 0;
660 * Across a seqno wrap, we need to keep the old cacheline alive for
661 * foreign GPU references.
664 tl = intel_timeline_create(gt);
665 if (IS_ERR(tl))
666 return PTR_ERR(tl);
668 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
669 goto out_free;
671 err = intel_timeline_pin(tl, NULL);
672 if (err)
673 goto out_free;
675 for_each_engine(engine, gt, id) {
676 const u32 *hwsp_seqno[2];
677 struct i915_request *rq;
678 u32 seqno[2];
680 if (!intel_engine_can_store_dword(engine))
681 continue;
683 rq = intel_engine_create_kernel_request(engine);
684 if (IS_ERR(rq)) {
685 err = PTR_ERR(rq);
686 goto out;
689 tl->seqno = -4u;
691 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
692 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
693 mutex_unlock(&tl->mutex);
694 if (err) {
695 i915_request_add(rq);
696 goto out;
698 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
699 seqno[0], tl->hwsp_offset);
701 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
702 if (err) {
703 i915_request_add(rq);
704 goto out;
706 hwsp_seqno[0] = tl->hwsp_seqno;
708 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
709 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
710 mutex_unlock(&tl->mutex);
711 if (err) {
712 i915_request_add(rq);
713 goto out;
715 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
716 seqno[1], tl->hwsp_offset);
718 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
719 if (err) {
720 i915_request_add(rq);
721 goto out;
723 hwsp_seqno[1] = tl->hwsp_seqno;
725 /* With wrap should come a new hwsp */
726 GEM_BUG_ON(seqno[1] >= seqno[0]);
727 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
729 i915_request_add(rq);
731 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
732 pr_err("Wait for timeline writes timed out!\n");
733 err = -EIO;
734 goto out;
737 if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
738 READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
739 pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
740 *hwsp_seqno[0], *hwsp_seqno[1],
741 seqno[0], seqno[1]);
742 err = -EINVAL;
743 goto out;
746 intel_gt_retire_requests(gt); /* recycle HWSP */
749 out:
750 if (igt_flush_test(gt->i915))
751 err = -EIO;
753 intel_timeline_unpin(tl);
754 out_free:
755 intel_timeline_put(tl);
756 return err;
759 static int emit_read_hwsp(struct i915_request *rq,
760 u32 seqno, u32 hwsp,
761 u32 *addr)
763 const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
764 u32 *cs;
766 cs = intel_ring_begin(rq, 12);
767 if (IS_ERR(cs))
768 return PTR_ERR(cs);
770 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
771 *cs++ = *addr;
772 *cs++ = 0;
773 *cs++ = seqno;
774 *addr += 4;
776 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
777 *cs++ = gpr;
778 *cs++ = hwsp;
779 *cs++ = 0;
781 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
782 *cs++ = gpr;
783 *cs++ = *addr;
784 *cs++ = 0;
785 *addr += 4;
787 intel_ring_advance(rq, cs);
789 return 0;
792 struct hwsp_watcher {
793 struct i915_vma *vma;
794 struct i915_request *rq;
795 u32 addr;
796 u32 *map;
799 static bool cmp_lt(u32 a, u32 b)
801 return a < b;
804 static bool cmp_gte(u32 a, u32 b)
806 return a >= b;
809 static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
811 struct drm_i915_gem_object *obj;
812 struct i915_vma *vma;
814 obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
815 if (IS_ERR(obj))
816 return PTR_ERR(obj);
818 w->map = i915_gem_object_pin_map(obj, I915_MAP_WB);
819 if (IS_ERR(w->map)) {
820 i915_gem_object_put(obj);
821 return PTR_ERR(w->map);
824 vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0);
825 if (IS_ERR(vma)) {
826 i915_gem_object_put(obj);
827 return PTR_ERR(vma);
830 w->vma = vma;
831 w->addr = i915_ggtt_offset(vma);
832 return 0;
835 static int create_watcher(struct hwsp_watcher *w,
836 struct intel_engine_cs *engine,
837 int ringsz)
839 struct intel_context *ce;
840 struct intel_timeline *tl;
842 ce = intel_context_create(engine);
843 if (IS_ERR(ce))
844 return PTR_ERR(ce);
846 ce->ring = __intel_context_ring_size(ringsz);
847 w->rq = intel_context_create_request(ce);
848 intel_context_put(ce);
849 if (IS_ERR(w->rq))
850 return PTR_ERR(w->rq);
852 w->addr = i915_ggtt_offset(w->vma);
853 tl = w->rq->context->timeline;
855 /* some light mutex juggling required; think co-routines */
856 lockdep_unpin_lock(&tl->mutex, w->rq->cookie);
857 mutex_unlock(&tl->mutex);
859 return 0;
862 static int check_watcher(struct hwsp_watcher *w, const char *name,
863 bool (*op)(u32 hwsp, u32 seqno))
865 struct i915_request *rq = fetch_and_zero(&w->rq);
866 struct intel_timeline *tl = rq->context->timeline;
867 u32 offset, end;
868 int err;
870 GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
872 i915_request_get(rq);
873 mutex_lock(&tl->mutex);
874 rq->cookie = lockdep_pin_lock(&tl->mutex);
875 i915_request_add(rq);
877 if (i915_request_wait(rq, 0, HZ) < 0) {
878 err = -ETIME;
879 goto out;
882 err = 0;
883 offset = 0;
884 end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
885 while (offset < end) {
886 if (!op(w->map[offset + 1], w->map[offset])) {
887 pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
888 name, w->map[offset + 1], w->map[offset]);
889 err = -EINVAL;
892 offset += 2;
895 out:
896 i915_request_put(rq);
897 return err;
900 static void cleanup_watcher(struct hwsp_watcher *w)
902 if (w->rq) {
903 struct intel_timeline *tl = w->rq->context->timeline;
905 mutex_lock(&tl->mutex);
906 w->rq->cookie = lockdep_pin_lock(&tl->mutex);
908 i915_request_add(w->rq);
911 i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
914 static bool retire_requests(struct intel_timeline *tl)
916 struct i915_request *rq, *rn;
918 mutex_lock(&tl->mutex);
919 list_for_each_entry_safe(rq, rn, &tl->requests, link)
920 if (!i915_request_retire(rq))
921 break;
922 mutex_unlock(&tl->mutex);
924 return !i915_active_fence_isset(&tl->last_request);
927 static struct i915_request *wrap_timeline(struct i915_request *rq)
929 struct intel_context *ce = rq->context;
930 struct intel_timeline *tl = ce->timeline;
931 u32 seqno = rq->fence.seqno;
933 while (tl->seqno >= seqno) { /* Cause a wrap */
934 i915_request_put(rq);
935 rq = intel_context_create_request(ce);
936 if (IS_ERR(rq))
937 return rq;
939 i915_request_get(rq);
940 i915_request_add(rq);
943 i915_request_put(rq);
944 rq = intel_context_create_request(ce);
945 if (IS_ERR(rq))
946 return rq;
948 i915_request_get(rq);
949 i915_request_add(rq);
951 return rq;
954 static int live_hwsp_read(void *arg)
956 struct intel_gt *gt = arg;
957 struct hwsp_watcher watcher[2] = {};
958 struct intel_engine_cs *engine;
959 struct intel_timeline *tl;
960 enum intel_engine_id id;
961 int err = 0;
962 int i;
965 * If we take a reference to the HWSP for reading on the GPU, that
966 * read may be arbitrarily delayed (either by foreign fence or
967 * priority saturation) and a wrap can happen within 30 minutes.
968 * When the GPU read is finally submitted it should be correct,
969 * even across multiple wraps.
972 if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */
973 return 0;
975 tl = intel_timeline_create(gt);
976 if (IS_ERR(tl))
977 return PTR_ERR(tl);
979 if (!tl->hwsp_cacheline)
980 goto out_free;
982 for (i = 0; i < ARRAY_SIZE(watcher); i++) {
983 err = setup_watcher(&watcher[i], gt);
984 if (err)
985 goto out;
988 for_each_engine(engine, gt, id) {
989 struct intel_context *ce;
990 unsigned long count = 0;
991 IGT_TIMEOUT(end_time);
993 /* Create a request we can use for remote reading of the HWSP */
994 err = create_watcher(&watcher[1], engine, SZ_512K);
995 if (err)
996 goto out;
998 do {
999 struct i915_sw_fence *submit;
1000 struct i915_request *rq;
1001 u32 hwsp;
1003 submit = heap_fence_create(GFP_KERNEL);
1004 if (!submit) {
1005 err = -ENOMEM;
1006 goto out;
1009 err = create_watcher(&watcher[0], engine, SZ_4K);
1010 if (err)
1011 goto out;
1013 ce = intel_context_create(engine);
1014 if (IS_ERR(ce)) {
1015 err = PTR_ERR(ce);
1016 goto out;
1019 /* Skip to the end, saving 30 minutes of nops */
1020 tl->seqno = -10u + 2 * (count & 3);
1021 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1022 ce->timeline = intel_timeline_get(tl);
1024 rq = intel_context_create_request(ce);
1025 if (IS_ERR(rq)) {
1026 err = PTR_ERR(rq);
1027 intel_context_put(ce);
1028 goto out;
1031 err = i915_sw_fence_await_dma_fence(&rq->submit,
1032 &watcher[0].rq->fence, 0,
1033 GFP_KERNEL);
1034 if (err < 0) {
1035 i915_request_add(rq);
1036 intel_context_put(ce);
1037 goto out;
1040 mutex_lock(&watcher[0].rq->context->timeline->mutex);
1041 err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
1042 if (err == 0)
1043 err = emit_read_hwsp(watcher[0].rq, /* before */
1044 rq->fence.seqno, hwsp,
1045 &watcher[0].addr);
1046 mutex_unlock(&watcher[0].rq->context->timeline->mutex);
1047 if (err) {
1048 i915_request_add(rq);
1049 intel_context_put(ce);
1050 goto out;
1053 mutex_lock(&watcher[1].rq->context->timeline->mutex);
1054 err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
1055 if (err == 0)
1056 err = emit_read_hwsp(watcher[1].rq, /* after */
1057 rq->fence.seqno, hwsp,
1058 &watcher[1].addr);
1059 mutex_unlock(&watcher[1].rq->context->timeline->mutex);
1060 if (err) {
1061 i915_request_add(rq);
1062 intel_context_put(ce);
1063 goto out;
1066 i915_request_get(rq);
1067 i915_request_add(rq);
1069 rq = wrap_timeline(rq);
1070 intel_context_put(ce);
1071 if (IS_ERR(rq)) {
1072 err = PTR_ERR(rq);
1073 goto out;
1076 err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
1077 &rq->fence, 0,
1078 GFP_KERNEL);
1079 if (err < 0) {
1080 i915_request_put(rq);
1081 goto out;
1084 err = check_watcher(&watcher[0], "before", cmp_lt);
1085 i915_sw_fence_commit(submit);
1086 heap_fence_put(submit);
1087 if (err) {
1088 i915_request_put(rq);
1089 goto out;
1091 count++;
1093 if (8 * watcher[1].rq->ring->emit >
1094 3 * watcher[1].rq->ring->size) {
1095 i915_request_put(rq);
1096 break;
1099 /* Flush the timeline before manually wrapping again */
1100 if (i915_request_wait(rq,
1101 I915_WAIT_INTERRUPTIBLE,
1102 HZ) < 0) {
1103 err = -ETIME;
1104 i915_request_put(rq);
1105 goto out;
1108 retire_requests(tl);
1109 i915_request_put(rq);
1110 } while (!__igt_timeout(end_time, NULL));
1111 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef);
1113 pr_info("%s: simulated %lu wraps\n", engine->name, count);
1114 err = check_watcher(&watcher[1], "after", cmp_gte);
1115 if (err)
1116 goto out;
1119 out:
1120 for (i = 0; i < ARRAY_SIZE(watcher); i++)
1121 cleanup_watcher(&watcher[i]);
1123 if (igt_flush_test(gt->i915))
1124 err = -EIO;
1126 out_free:
1127 intel_timeline_put(tl);
1128 return err;
1131 static int live_hwsp_rollover_kernel(void *arg)
1133 struct intel_gt *gt = arg;
1134 struct intel_engine_cs *engine;
1135 enum intel_engine_id id;
1136 int err = 0;
1139 * Run the host for long enough, and even the kernel context will
1140 * see a seqno rollover.
1143 for_each_engine(engine, gt, id) {
1144 struct intel_context *ce = engine->kernel_context;
1145 struct intel_timeline *tl = ce->timeline;
1146 struct i915_request *rq[3] = {};
1147 int i;
1149 st_engine_heartbeat_disable(engine);
1150 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
1151 err = -EIO;
1152 goto out;
1155 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
1156 tl->seqno = 0;
1157 timeline_rollback(tl);
1158 timeline_rollback(tl);
1159 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1161 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1162 struct i915_request *this;
1164 this = i915_request_create(ce);
1165 if (IS_ERR(this)) {
1166 err = PTR_ERR(this);
1167 goto out;
1170 pr_debug("%s: create fence.seqnp:%d\n",
1171 engine->name,
1172 lower_32_bits(this->fence.seqno));
1174 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1176 rq[i] = i915_request_get(this);
1177 i915_request_add(this);
1180 /* We expected a wrap! */
1181 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1183 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1184 pr_err("Wait for timeline wrap timed out!\n");
1185 err = -EIO;
1186 goto out;
1189 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1190 if (!i915_request_completed(rq[i])) {
1191 pr_err("Pre-wrap request not completed!\n");
1192 err = -EINVAL;
1193 goto out;
1197 out:
1198 for (i = 0; i < ARRAY_SIZE(rq); i++)
1199 i915_request_put(rq[i]);
1200 st_engine_heartbeat_enable(engine);
1201 if (err)
1202 break;
1205 if (igt_flush_test(gt->i915))
1206 err = -EIO;
1208 return err;
1211 static int live_hwsp_rollover_user(void *arg)
1213 struct intel_gt *gt = arg;
1214 struct intel_engine_cs *engine;
1215 enum intel_engine_id id;
1216 int err = 0;
1219 * Simulate a long running user context, and force the seqno wrap
1220 * on the user's timeline.
1223 for_each_engine(engine, gt, id) {
1224 struct i915_request *rq[3] = {};
1225 struct intel_timeline *tl;
1226 struct intel_context *ce;
1227 int i;
1229 ce = intel_context_create(engine);
1230 if (IS_ERR(ce))
1231 return PTR_ERR(ce);
1233 err = intel_context_alloc_state(ce);
1234 if (err)
1235 goto out;
1237 tl = ce->timeline;
1238 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
1239 goto out;
1241 timeline_rollback(tl);
1242 timeline_rollback(tl);
1243 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1245 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1246 struct i915_request *this;
1248 this = intel_context_create_request(ce);
1249 if (IS_ERR(this)) {
1250 err = PTR_ERR(this);
1251 goto out;
1254 pr_debug("%s: create fence.seqnp:%d\n",
1255 engine->name,
1256 lower_32_bits(this->fence.seqno));
1258 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1260 rq[i] = i915_request_get(this);
1261 i915_request_add(this);
1264 /* We expected a wrap! */
1265 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1267 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1268 pr_err("Wait for timeline wrap timed out!\n");
1269 err = -EIO;
1270 goto out;
1273 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1274 if (!i915_request_completed(rq[i])) {
1275 pr_err("Pre-wrap request not completed!\n");
1276 err = -EINVAL;
1277 goto out;
1281 out:
1282 for (i = 0; i < ARRAY_SIZE(rq); i++)
1283 i915_request_put(rq[i]);
1284 intel_context_put(ce);
1285 if (err)
1286 break;
1289 if (igt_flush_test(gt->i915))
1290 err = -EIO;
1292 return err;
1295 static int live_hwsp_recycle(void *arg)
1297 struct intel_gt *gt = arg;
1298 struct intel_engine_cs *engine;
1299 enum intel_engine_id id;
1300 unsigned long count;
1301 int err = 0;
1304 * Check seqno writes into one timeline at a time. We expect to
1305 * recycle the breadcrumb slot between iterations and neither
1306 * want to confuse ourselves or the GPU.
1309 count = 0;
1310 for_each_engine(engine, gt, id) {
1311 IGT_TIMEOUT(end_time);
1313 if (!intel_engine_can_store_dword(engine))
1314 continue;
1316 intel_engine_pm_get(engine);
1318 do {
1319 struct intel_timeline *tl;
1320 struct i915_request *rq;
1322 tl = checked_intel_timeline_create(gt);
1323 if (IS_ERR(tl)) {
1324 err = PTR_ERR(tl);
1325 break;
1328 rq = tl_write(tl, engine, count);
1329 if (IS_ERR(rq)) {
1330 intel_timeline_put(tl);
1331 err = PTR_ERR(rq);
1332 break;
1335 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1336 pr_err("Wait for timeline writes timed out!\n");
1337 i915_request_put(rq);
1338 intel_timeline_put(tl);
1339 err = -EIO;
1340 break;
1343 if (READ_ONCE(*tl->hwsp_seqno) != count) {
1344 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
1345 count, tl->fence_context,
1346 tl->hwsp_offset, *tl->hwsp_seqno);
1347 GEM_TRACE_DUMP();
1348 err = -EINVAL;
1351 i915_request_put(rq);
1352 intel_timeline_put(tl);
1353 count++;
1355 if (err)
1356 break;
1357 } while (!__igt_timeout(end_time, NULL));
1359 intel_engine_pm_put(engine);
1360 if (err)
1361 break;
1364 return err;
1367 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1369 static const struct i915_subtest tests[] = {
1370 SUBTEST(live_hwsp_recycle),
1371 SUBTEST(live_hwsp_engine),
1372 SUBTEST(live_hwsp_alternate),
1373 SUBTEST(live_hwsp_wrap),
1374 SUBTEST(live_hwsp_read),
1375 SUBTEST(live_hwsp_rollover_kernel),
1376 SUBTEST(live_hwsp_rollover_user),
1379 if (intel_gt_is_wedged(&i915->gt))
1380 return 0;
1382 return intel_gt_live_subtests(tests, &i915->gt);