2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include <linux/kthread.h>
27 #include "../i915_selftest.h"
29 #include "mock_context.h"
33 struct drm_i915_private
*i915
;
34 struct drm_i915_gem_object
*hws
;
35 struct drm_i915_gem_object
*obj
;
40 static int hang_init(struct hang
*h
, struct drm_i915_private
*i915
)
45 memset(h
, 0, sizeof(*h
));
48 h
->hws
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
50 return PTR_ERR(h
->hws
);
52 h
->obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
54 err
= PTR_ERR(h
->obj
);
58 i915_gem_object_set_cache_level(h
->hws
, I915_CACHE_LLC
);
59 vaddr
= i915_gem_object_pin_map(h
->hws
, I915_MAP_WB
);
64 h
->seqno
= memset(vaddr
, 0xff, PAGE_SIZE
);
66 vaddr
= i915_gem_object_pin_map(h
->obj
,
67 HAS_LLC(i915
) ? I915_MAP_WB
: I915_MAP_WC
);
77 i915_gem_object_unpin_map(h
->hws
);
79 i915_gem_object_put(h
->obj
);
81 i915_gem_object_put(h
->hws
);
85 static u64
hws_address(const struct i915_vma
*hws
,
86 const struct drm_i915_gem_request
*rq
)
88 return hws
->node
.start
+ offset_in_page(sizeof(u32
)*rq
->fence
.context
);
91 static int emit_recurse_batch(struct hang
*h
,
92 struct drm_i915_gem_request
*rq
)
94 struct drm_i915_private
*i915
= h
->i915
;
95 struct i915_address_space
*vm
= rq
->ctx
->ppgtt
? &rq
->ctx
->ppgtt
->base
: &i915
->ggtt
.base
;
96 struct i915_vma
*hws
, *vma
;
101 vma
= i915_vma_instance(h
->obj
, vm
, NULL
);
105 hws
= i915_vma_instance(h
->hws
, vm
, NULL
);
109 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
113 err
= i915_vma_pin(hws
, 0, 0, PIN_USER
);
117 i915_vma_move_to_active(vma
, rq
, 0);
118 if (!i915_gem_object_has_active_reference(vma
->obj
)) {
119 i915_gem_object_get(vma
->obj
);
120 i915_gem_object_set_active_reference(vma
->obj
);
123 i915_vma_move_to_active(hws
, rq
, 0);
124 if (!i915_gem_object_has_active_reference(hws
->obj
)) {
125 i915_gem_object_get(hws
->obj
);
126 i915_gem_object_set_active_reference(hws
->obj
);
130 if (INTEL_GEN(i915
) >= 8) {
131 *batch
++ = MI_STORE_DWORD_IMM_GEN4
;
132 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
133 *batch
++ = upper_32_bits(hws_address(hws
, rq
));
134 *batch
++ = rq
->fence
.seqno
;
135 *batch
++ = MI_ARB_CHECK
;
137 memset(batch
, 0, 1024);
138 batch
+= 1024 / sizeof(*batch
);
140 *batch
++ = MI_ARB_CHECK
;
141 *batch
++ = MI_BATCH_BUFFER_START
| 1 << 8 | 1;
142 *batch
++ = lower_32_bits(vma
->node
.start
);
143 *batch
++ = upper_32_bits(vma
->node
.start
);
144 } else if (INTEL_GEN(i915
) >= 6) {
145 *batch
++ = MI_STORE_DWORD_IMM_GEN4
;
147 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
148 *batch
++ = rq
->fence
.seqno
;
149 *batch
++ = MI_ARB_CHECK
;
151 memset(batch
, 0, 1024);
152 batch
+= 1024 / sizeof(*batch
);
154 *batch
++ = MI_ARB_CHECK
;
155 *batch
++ = MI_BATCH_BUFFER_START
| 1 << 8;
156 *batch
++ = lower_32_bits(vma
->node
.start
);
157 } else if (INTEL_GEN(i915
) >= 4) {
158 *batch
++ = MI_STORE_DWORD_IMM_GEN4
| 1 << 22;
160 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
161 *batch
++ = rq
->fence
.seqno
;
162 *batch
++ = MI_ARB_CHECK
;
164 memset(batch
, 0, 1024);
165 batch
+= 1024 / sizeof(*batch
);
167 *batch
++ = MI_ARB_CHECK
;
168 *batch
++ = MI_BATCH_BUFFER_START
| 2 << 6;
169 *batch
++ = lower_32_bits(vma
->node
.start
);
171 *batch
++ = MI_STORE_DWORD_IMM
;
172 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
173 *batch
++ = rq
->fence
.seqno
;
174 *batch
++ = MI_ARB_CHECK
;
176 memset(batch
, 0, 1024);
177 batch
+= 1024 / sizeof(*batch
);
179 *batch
++ = MI_ARB_CHECK
;
180 *batch
++ = MI_BATCH_BUFFER_START
| 2 << 6 | 1;
181 *batch
++ = lower_32_bits(vma
->node
.start
);
183 *batch
++ = MI_BATCH_BUFFER_END
; /* not reached */
184 i915_gem_chipset_flush(h
->i915
);
187 if (INTEL_GEN(vm
->i915
) <= 5)
188 flags
|= I915_DISPATCH_SECURE
;
190 err
= rq
->engine
->emit_bb_start(rq
, vma
->node
.start
, PAGE_SIZE
, flags
);
198 static struct drm_i915_gem_request
*
199 hang_create_request(struct hang
*h
,
200 struct intel_engine_cs
*engine
,
201 struct i915_gem_context
*ctx
)
203 struct drm_i915_gem_request
*rq
;
206 if (i915_gem_object_is_active(h
->obj
)) {
207 struct drm_i915_gem_object
*obj
;
210 obj
= i915_gem_object_create_internal(h
->i915
, PAGE_SIZE
);
212 return ERR_CAST(obj
);
214 vaddr
= i915_gem_object_pin_map(obj
,
215 HAS_LLC(h
->i915
) ? I915_MAP_WB
: I915_MAP_WC
);
217 i915_gem_object_put(obj
);
218 return ERR_CAST(vaddr
);
221 i915_gem_object_unpin_map(h
->obj
);
222 i915_gem_object_put(h
->obj
);
228 rq
= i915_gem_request_alloc(engine
, ctx
);
232 err
= emit_recurse_batch(h
, rq
);
234 __i915_add_request(rq
, false);
241 static u32
hws_seqno(const struct hang
*h
,
242 const struct drm_i915_gem_request
*rq
)
244 return READ_ONCE(h
->seqno
[rq
->fence
.context
% (PAGE_SIZE
/sizeof(u32
))]);
247 static void hang_fini(struct hang
*h
)
249 *h
->batch
= MI_BATCH_BUFFER_END
;
250 i915_gem_chipset_flush(h
->i915
);
252 i915_gem_object_unpin_map(h
->obj
);
253 i915_gem_object_put(h
->obj
);
255 i915_gem_object_unpin_map(h
->hws
);
256 i915_gem_object_put(h
->hws
);
258 i915_gem_wait_for_idle(h
->i915
, I915_WAIT_LOCKED
);
261 static bool wait_for_hang(struct hang
*h
, struct drm_i915_gem_request
*rq
)
263 return !(wait_for_us(i915_seqno_passed(hws_seqno(h
, rq
),
266 wait_for(i915_seqno_passed(hws_seqno(h
, rq
),
271 static int igt_hang_sanitycheck(void *arg
)
273 struct drm_i915_private
*i915
= arg
;
274 struct drm_i915_gem_request
*rq
;
275 struct intel_engine_cs
*engine
;
276 enum intel_engine_id id
;
280 /* Basic check that we can execute our hanging batch */
282 mutex_lock(&i915
->drm
.struct_mutex
);
283 err
= hang_init(&h
, i915
);
287 for_each_engine(engine
, i915
, id
) {
290 if (!intel_engine_can_store_dword(engine
))
293 rq
= hang_create_request(&h
, engine
, i915
->kernel_context
);
296 pr_err("Failed to create request for %s, err=%d\n",
301 i915_gem_request_get(rq
);
303 *h
.batch
= MI_BATCH_BUFFER_END
;
304 i915_gem_chipset_flush(i915
);
306 __i915_add_request(rq
, true);
308 timeout
= i915_wait_request(rq
,
310 MAX_SCHEDULE_TIMEOUT
);
311 i915_gem_request_put(rq
);
315 pr_err("Wait for request failed on %s, err=%d\n",
324 mutex_unlock(&i915
->drm
.struct_mutex
);
328 static void global_reset_lock(struct drm_i915_private
*i915
)
330 struct intel_engine_cs
*engine
;
331 enum intel_engine_id id
;
333 pr_debug("%s: current gpu_error=%08lx\n",
334 __func__
, i915
->gpu_error
.flags
);
336 while (test_and_set_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
))
337 wait_event(i915
->gpu_error
.reset_queue
,
338 !test_bit(I915_RESET_BACKOFF
,
339 &i915
->gpu_error
.flags
));
341 for_each_engine(engine
, i915
, id
) {
342 while (test_and_set_bit(I915_RESET_ENGINE
+ id
,
343 &i915
->gpu_error
.flags
))
344 wait_on_bit(&i915
->gpu_error
.flags
,
345 I915_RESET_ENGINE
+ id
,
346 TASK_UNINTERRUPTIBLE
);
350 static void global_reset_unlock(struct drm_i915_private
*i915
)
352 struct intel_engine_cs
*engine
;
353 enum intel_engine_id id
;
355 for_each_engine(engine
, i915
, id
)
356 clear_bit(I915_RESET_ENGINE
+ id
, &i915
->gpu_error
.flags
);
358 clear_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
);
359 wake_up_all(&i915
->gpu_error
.reset_queue
);
362 static int igt_global_reset(void *arg
)
364 struct drm_i915_private
*i915
= arg
;
365 unsigned int reset_count
;
368 /* Check that we can issue a global GPU reset */
370 global_reset_lock(i915
);
371 set_bit(I915_RESET_HANDOFF
, &i915
->gpu_error
.flags
);
373 mutex_lock(&i915
->drm
.struct_mutex
);
374 reset_count
= i915_reset_count(&i915
->gpu_error
);
376 i915_reset(i915
, I915_RESET_QUIET
);
378 if (i915_reset_count(&i915
->gpu_error
) == reset_count
) {
379 pr_err("No GPU reset recorded!\n");
382 mutex_unlock(&i915
->drm
.struct_mutex
);
384 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF
, &i915
->gpu_error
.flags
));
385 global_reset_unlock(i915
);
387 if (i915_terminally_wedged(&i915
->gpu_error
))
393 static int __igt_reset_engine(struct drm_i915_private
*i915
, bool active
)
395 struct intel_engine_cs
*engine
;
396 enum intel_engine_id id
;
400 /* Check that we can issue an engine reset on an idle engine (no-op) */
402 if (!intel_has_reset_engine(i915
))
406 mutex_lock(&i915
->drm
.struct_mutex
);
407 err
= hang_init(&h
, i915
);
408 mutex_unlock(&i915
->drm
.struct_mutex
);
413 for_each_engine(engine
, i915
, id
) {
414 unsigned int reset_count
, reset_engine_count
;
415 IGT_TIMEOUT(end_time
);
417 if (active
&& !intel_engine_can_store_dword(engine
))
420 reset_count
= i915_reset_count(&i915
->gpu_error
);
421 reset_engine_count
= i915_reset_engine_count(&i915
->gpu_error
,
424 set_bit(I915_RESET_ENGINE
+ id
, &i915
->gpu_error
.flags
);
427 struct drm_i915_gem_request
*rq
;
429 mutex_lock(&i915
->drm
.struct_mutex
);
430 rq
= hang_create_request(&h
, engine
,
431 i915
->kernel_context
);
434 mutex_unlock(&i915
->drm
.struct_mutex
);
438 i915_gem_request_get(rq
);
439 __i915_add_request(rq
, true);
440 mutex_unlock(&i915
->drm
.struct_mutex
);
442 if (!wait_for_hang(&h
, rq
)) {
443 struct drm_printer p
= drm_info_printer(i915
->drm
.dev
);
445 pr_err("%s: Failed to start request %x, at %x\n",
446 __func__
, rq
->fence
.seqno
, hws_seqno(&h
, rq
));
447 intel_engine_dump(engine
, &p
,
448 "%s\n", engine
->name
);
450 i915_gem_request_put(rq
);
455 i915_gem_request_put(rq
);
458 engine
->hangcheck
.stalled
= true;
459 engine
->hangcheck
.seqno
=
460 intel_engine_get_seqno(engine
);
462 err
= i915_reset_engine(engine
, I915_RESET_QUIET
);
464 pr_err("i915_reset_engine failed\n");
468 if (i915_reset_count(&i915
->gpu_error
) != reset_count
) {
469 pr_err("Full GPU reset recorded! (engine reset expected)\n");
474 reset_engine_count
+= active
;
475 if (i915_reset_engine_count(&i915
->gpu_error
, engine
) !=
476 reset_engine_count
) {
477 pr_err("%s engine reset %srecorded!\n",
478 engine
->name
, active
? "not " : "");
483 engine
->hangcheck
.stalled
= false;
484 } while (time_before(jiffies
, end_time
));
485 clear_bit(I915_RESET_ENGINE
+ id
, &i915
->gpu_error
.flags
);
493 if (i915_terminally_wedged(&i915
->gpu_error
))
497 mutex_lock(&i915
->drm
.struct_mutex
);
499 mutex_unlock(&i915
->drm
.struct_mutex
);
505 static int igt_reset_idle_engine(void *arg
)
507 return __igt_reset_engine(arg
, false);
510 static int igt_reset_active_engine(void *arg
)
512 return __igt_reset_engine(arg
, true);
515 static int active_engine(void *data
)
517 struct intel_engine_cs
*engine
= data
;
518 struct drm_i915_gem_request
*rq
[2] = {};
519 struct i915_gem_context
*ctx
[2];
520 struct drm_file
*file
;
521 unsigned long count
= 0;
524 file
= mock_file(engine
->i915
);
526 return PTR_ERR(file
);
528 mutex_lock(&engine
->i915
->drm
.struct_mutex
);
529 ctx
[0] = live_context(engine
->i915
, file
);
530 mutex_unlock(&engine
->i915
->drm
.struct_mutex
);
531 if (IS_ERR(ctx
[0])) {
532 err
= PTR_ERR(ctx
[0]);
536 mutex_lock(&engine
->i915
->drm
.struct_mutex
);
537 ctx
[1] = live_context(engine
->i915
, file
);
538 mutex_unlock(&engine
->i915
->drm
.struct_mutex
);
539 if (IS_ERR(ctx
[1])) {
540 err
= PTR_ERR(ctx
[1]);
541 i915_gem_context_put(ctx
[0]);
545 while (!kthread_should_stop()) {
546 unsigned int idx
= count
++ & 1;
547 struct drm_i915_gem_request
*old
= rq
[idx
];
548 struct drm_i915_gem_request
*new;
550 mutex_lock(&engine
->i915
->drm
.struct_mutex
);
551 new = i915_gem_request_alloc(engine
, ctx
[idx
]);
553 mutex_unlock(&engine
->i915
->drm
.struct_mutex
);
558 rq
[idx
] = i915_gem_request_get(new);
559 i915_add_request(new);
560 mutex_unlock(&engine
->i915
->drm
.struct_mutex
);
563 i915_wait_request(old
, 0, MAX_SCHEDULE_TIMEOUT
);
564 i915_gem_request_put(old
);
568 for (count
= 0; count
< ARRAY_SIZE(rq
); count
++)
569 i915_gem_request_put(rq
[count
]);
572 mock_file_free(engine
->i915
, file
);
576 static int __igt_reset_engine_others(struct drm_i915_private
*i915
,
579 struct intel_engine_cs
*engine
, *other
;
580 enum intel_engine_id id
, tmp
;
584 /* Check that issuing a reset on one engine does not interfere
585 * with any other engine.
588 if (!intel_has_reset_engine(i915
))
592 mutex_lock(&i915
->drm
.struct_mutex
);
593 err
= hang_init(&h
, i915
);
594 mutex_unlock(&i915
->drm
.struct_mutex
);
599 for_each_engine(engine
, i915
, id
) {
600 struct task_struct
*threads
[I915_NUM_ENGINES
] = {};
601 unsigned long resets
[I915_NUM_ENGINES
];
602 unsigned long global
= i915_reset_count(&i915
->gpu_error
);
603 unsigned long count
= 0;
604 IGT_TIMEOUT(end_time
);
606 if (active
&& !intel_engine_can_store_dword(engine
))
609 memset(threads
, 0, sizeof(threads
));
610 for_each_engine(other
, i915
, tmp
) {
611 struct task_struct
*tsk
;
613 resets
[tmp
] = i915_reset_engine_count(&i915
->gpu_error
,
619 tsk
= kthread_run(active_engine
, other
,
620 "igt/%s", other
->name
);
627 get_task_struct(tsk
);
630 set_bit(I915_RESET_ENGINE
+ id
, &i915
->gpu_error
.flags
);
633 struct drm_i915_gem_request
*rq
;
635 mutex_lock(&i915
->drm
.struct_mutex
);
636 rq
= hang_create_request(&h
, engine
,
637 i915
->kernel_context
);
640 mutex_unlock(&i915
->drm
.struct_mutex
);
644 i915_gem_request_get(rq
);
645 __i915_add_request(rq
, true);
646 mutex_unlock(&i915
->drm
.struct_mutex
);
648 if (!wait_for_hang(&h
, rq
)) {
649 struct drm_printer p
= drm_info_printer(i915
->drm
.dev
);
651 pr_err("%s: Failed to start request %x, at %x\n",
652 __func__
, rq
->fence
.seqno
, hws_seqno(&h
, rq
));
653 intel_engine_dump(engine
, &p
,
654 "%s\n", engine
->name
);
656 i915_gem_request_put(rq
);
661 i915_gem_request_put(rq
);
664 engine
->hangcheck
.stalled
= true;
665 engine
->hangcheck
.seqno
=
666 intel_engine_get_seqno(engine
);
668 err
= i915_reset_engine(engine
, I915_RESET_QUIET
);
670 pr_err("i915_reset_engine(%s:%s) failed, err=%d\n",
671 engine
->name
, active
? "active" : "idle", err
);
675 engine
->hangcheck
.stalled
= false;
677 } while (time_before(jiffies
, end_time
));
678 clear_bit(I915_RESET_ENGINE
+ id
, &i915
->gpu_error
.flags
);
679 pr_info("i915_reset_engine(%s:%s): %lu resets\n",
680 engine
->name
, active
? "active" : "idle", count
);
682 if (i915_reset_engine_count(&i915
->gpu_error
, engine
) -
683 resets
[engine
->id
] != (active
? count
: 0)) {
684 pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
685 engine
->name
, active
? "active" : "idle", count
,
686 i915_reset_engine_count(&i915
->gpu_error
,
687 engine
) - resets
[engine
->id
]);
693 for_each_engine(other
, i915
, tmp
) {
699 ret
= kthread_stop(threads
[tmp
]);
701 pr_err("kthread for other engine %s failed, err=%d\n",
706 put_task_struct(threads
[tmp
]);
708 if (resets
[tmp
] != i915_reset_engine_count(&i915
->gpu_error
,
710 pr_err("Innocent engine %s was reset (count=%ld)\n",
712 i915_reset_engine_count(&i915
->gpu_error
,
713 other
) - resets
[tmp
]);
719 if (global
!= i915_reset_count(&i915
->gpu_error
)) {
720 pr_err("Global reset (count=%ld)!\n",
721 i915_reset_count(&i915
->gpu_error
) - global
);
732 if (i915_terminally_wedged(&i915
->gpu_error
))
736 mutex_lock(&i915
->drm
.struct_mutex
);
738 mutex_unlock(&i915
->drm
.struct_mutex
);
744 static int igt_reset_idle_engine_others(void *arg
)
746 return __igt_reset_engine_others(arg
, false);
749 static int igt_reset_active_engine_others(void *arg
)
751 return __igt_reset_engine_others(arg
, true);
754 static u32
fake_hangcheck(struct drm_i915_gem_request
*rq
)
758 rq
->engine
->hangcheck
.stalled
= true;
759 rq
->engine
->hangcheck
.seqno
= intel_engine_get_seqno(rq
->engine
);
761 reset_count
= i915_reset_count(&rq
->i915
->gpu_error
);
763 set_bit(I915_RESET_HANDOFF
, &rq
->i915
->gpu_error
.flags
);
764 wake_up_all(&rq
->i915
->gpu_error
.wait_queue
);
769 static int igt_wait_reset(void *arg
)
771 struct drm_i915_private
*i915
= arg
;
772 struct drm_i915_gem_request
*rq
;
773 unsigned int reset_count
;
778 if (!intel_engine_can_store_dword(i915
->engine
[RCS
]))
781 /* Check that we detect a stuck waiter and issue a reset */
783 global_reset_lock(i915
);
785 mutex_lock(&i915
->drm
.struct_mutex
);
786 err
= hang_init(&h
, i915
);
790 rq
= hang_create_request(&h
, i915
->engine
[RCS
], i915
->kernel_context
);
796 i915_gem_request_get(rq
);
797 __i915_add_request(rq
, true);
799 if (!wait_for_hang(&h
, rq
)) {
800 struct drm_printer p
= drm_info_printer(i915
->drm
.dev
);
802 pr_err("%s: Failed to start request %x, at %x\n",
803 __func__
, rq
->fence
.seqno
, hws_seqno(&h
, rq
));
804 intel_engine_dump(rq
->engine
, &p
, "%s\n", rq
->engine
->name
);
807 i915_gem_set_wedged(i915
);
813 reset_count
= fake_hangcheck(rq
);
815 timeout
= i915_wait_request(rq
, I915_WAIT_LOCKED
, 10);
817 pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
823 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF
, &i915
->gpu_error
.flags
));
824 if (i915_reset_count(&i915
->gpu_error
) == reset_count
) {
825 pr_err("No GPU reset recorded!\n");
831 i915_gem_request_put(rq
);
835 mutex_unlock(&i915
->drm
.struct_mutex
);
836 global_reset_unlock(i915
);
838 if (i915_terminally_wedged(&i915
->gpu_error
))
844 static int igt_reset_queue(void *arg
)
846 struct drm_i915_private
*i915
= arg
;
847 struct intel_engine_cs
*engine
;
848 enum intel_engine_id id
;
852 /* Check that we replay pending requests following a hang */
854 global_reset_lock(i915
);
856 mutex_lock(&i915
->drm
.struct_mutex
);
857 err
= hang_init(&h
, i915
);
861 for_each_engine(engine
, i915
, id
) {
862 struct drm_i915_gem_request
*prev
;
863 IGT_TIMEOUT(end_time
);
866 if (!intel_engine_can_store_dword(engine
))
869 prev
= hang_create_request(&h
, engine
, i915
->kernel_context
);
875 i915_gem_request_get(prev
);
876 __i915_add_request(prev
, true);
880 struct drm_i915_gem_request
*rq
;
881 unsigned int reset_count
;
883 rq
= hang_create_request(&h
,
885 i915
->kernel_context
);
891 i915_gem_request_get(rq
);
892 __i915_add_request(rq
, true);
894 if (!wait_for_hang(&h
, prev
)) {
895 struct drm_printer p
= drm_info_printer(i915
->drm
.dev
);
897 pr_err("%s: Failed to start request %x, at %x\n",
898 __func__
, prev
->fence
.seqno
, hws_seqno(&h
, prev
));
899 intel_engine_dump(prev
->engine
, &p
,
900 "%s\n", prev
->engine
->name
);
902 i915_gem_request_put(rq
);
903 i915_gem_request_put(prev
);
906 i915_gem_set_wedged(i915
);
912 reset_count
= fake_hangcheck(prev
);
914 i915_reset(i915
, I915_RESET_QUIET
);
916 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF
,
917 &i915
->gpu_error
.flags
));
919 if (prev
->fence
.error
!= -EIO
) {
920 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
922 i915_gem_request_put(rq
);
923 i915_gem_request_put(prev
);
928 if (rq
->fence
.error
) {
929 pr_err("Fence error status not zero [%d] after unrelated reset\n",
931 i915_gem_request_put(rq
);
932 i915_gem_request_put(prev
);
937 if (i915_reset_count(&i915
->gpu_error
) == reset_count
) {
938 pr_err("No GPU reset recorded!\n");
939 i915_gem_request_put(rq
);
940 i915_gem_request_put(prev
);
945 i915_gem_request_put(prev
);
948 } while (time_before(jiffies
, end_time
));
949 pr_info("%s: Completed %d resets\n", engine
->name
, count
);
951 *h
.batch
= MI_BATCH_BUFFER_END
;
952 i915_gem_chipset_flush(i915
);
954 i915_gem_request_put(prev
);
960 mutex_unlock(&i915
->drm
.struct_mutex
);
961 global_reset_unlock(i915
);
963 if (i915_terminally_wedged(&i915
->gpu_error
))
969 static int igt_handle_error(void *arg
)
971 struct drm_i915_private
*i915
= arg
;
972 struct intel_engine_cs
*engine
= i915
->engine
[RCS
];
974 struct drm_i915_gem_request
*rq
;
975 struct i915_gpu_state
*error
;
978 /* Check that we can issue a global GPU and engine reset */
980 if (!intel_has_reset_engine(i915
))
983 if (!intel_engine_can_store_dword(i915
->engine
[RCS
]))
986 mutex_lock(&i915
->drm
.struct_mutex
);
988 err
= hang_init(&h
, i915
);
992 rq
= hang_create_request(&h
, engine
, i915
->kernel_context
);
998 i915_gem_request_get(rq
);
999 __i915_add_request(rq
, true);
1001 if (!wait_for_hang(&h
, rq
)) {
1002 struct drm_printer p
= drm_info_printer(i915
->drm
.dev
);
1004 pr_err("%s: Failed to start request %x, at %x\n",
1005 __func__
, rq
->fence
.seqno
, hws_seqno(&h
, rq
));
1006 intel_engine_dump(rq
->engine
, &p
, "%s\n", rq
->engine
->name
);
1008 i915_reset(i915
, 0);
1009 i915_gem_set_wedged(i915
);
1015 mutex_unlock(&i915
->drm
.struct_mutex
);
1017 /* Temporarily disable error capture */
1018 error
= xchg(&i915
->gpu_error
.first_error
, (void *)-1);
1020 engine
->hangcheck
.stalled
= true;
1021 engine
->hangcheck
.seqno
= intel_engine_get_seqno(engine
);
1023 i915_handle_error(i915
, intel_engine_flag(engine
), "%s", __func__
);
1025 xchg(&i915
->gpu_error
.first_error
, error
);
1027 mutex_lock(&i915
->drm
.struct_mutex
);
1029 if (rq
->fence
.error
!= -EIO
) {
1030 pr_err("Guilty request not identified!\n");
1036 i915_gem_request_put(rq
);
1040 mutex_unlock(&i915
->drm
.struct_mutex
);
1044 int intel_hangcheck_live_selftests(struct drm_i915_private
*i915
)
1046 static const struct i915_subtest tests
[] = {
1047 SUBTEST(igt_global_reset
), /* attempt to recover GPU first */
1048 SUBTEST(igt_hang_sanitycheck
),
1049 SUBTEST(igt_reset_idle_engine
),
1050 SUBTEST(igt_reset_active_engine
),
1051 SUBTEST(igt_reset_idle_engine_others
),
1052 SUBTEST(igt_reset_active_engine_others
),
1053 SUBTEST(igt_wait_reset
),
1054 SUBTEST(igt_reset_queue
),
1055 SUBTEST(igt_handle_error
),
1057 bool saved_hangcheck
;
1060 if (!intel_has_gpu_reset(i915
))
1063 intel_runtime_pm_get(i915
);
1064 saved_hangcheck
= fetch_and_zero(&i915_modparams
.enable_hangcheck
);
1066 err
= i915_subtests(tests
, i915
);
1068 i915_modparams
.enable_hangcheck
= saved_hangcheck
;
1069 intel_runtime_pm_put(i915
);