2 * Copyright (c) 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
25 * Keith Packard <keithp@keithp.com>
26 * Mika Kuoppala <mika.kuoppala@intel.com>
30 #include <generated/utsrelease.h>
31 #include <linux/stop_machine.h>
32 #include <linux/zlib.h>
33 #include <drm/drm_print.h>
37 static const char *engine_str(int engine
)
40 case RCS
: return "render";
41 case VCS
: return "bsd";
42 case BCS
: return "blt";
43 case VECS
: return "vebox";
44 case VCS2
: return "bsd2";
49 static const char *tiling_flag(int tiling
)
53 case I915_TILING_NONE
: return "";
54 case I915_TILING_X
: return " X";
55 case I915_TILING_Y
: return " Y";
59 static const char *dirty_flag(int dirty
)
61 return dirty
? " dirty" : "";
64 static const char *purgeable_flag(int purgeable
)
66 return purgeable
? " purgeable" : "";
69 static bool __i915_error_ok(struct drm_i915_error_state_buf
*e
)
72 if (!e
->err
&& WARN(e
->bytes
> (e
->size
- 1), "overflow")) {
77 if (e
->bytes
== e
->size
- 1 || e
->err
)
83 static bool __i915_error_seek(struct drm_i915_error_state_buf
*e
,
86 if (e
->pos
+ len
<= e
->start
) {
91 /* First vsnprintf needs to fit in its entirety for memmove */
100 static void __i915_error_advance(struct drm_i915_error_state_buf
*e
,
103 /* If this is first printf in this window, adjust it so that
104 * start position matches start of the buffer
107 if (e
->pos
< e
->start
) {
108 const size_t off
= e
->start
- e
->pos
;
110 /* Should not happen but be paranoid */
111 if (off
> len
|| e
->bytes
) {
116 memmove(e
->buf
, e
->buf
+ off
, len
- off
);
117 e
->bytes
= len
- off
;
127 static void i915_error_vprintf(struct drm_i915_error_state_buf
*e
,
128 const char *f
, va_list args
)
132 if (!__i915_error_ok(e
))
135 /* Seek the first printf which is hits start position */
136 if (e
->pos
< e
->start
) {
140 len
= vsnprintf(NULL
, 0, f
, tmp
);
143 if (!__i915_error_seek(e
, len
))
147 len
= vsnprintf(e
->buf
+ e
->bytes
, e
->size
- e
->bytes
, f
, args
);
148 if (len
>= e
->size
- e
->bytes
)
149 len
= e
->size
- e
->bytes
- 1;
151 __i915_error_advance(e
, len
);
154 static void i915_error_puts(struct drm_i915_error_state_buf
*e
,
159 if (!__i915_error_ok(e
))
164 /* Seek the first printf which is hits start position */
165 if (e
->pos
< e
->start
) {
166 if (!__i915_error_seek(e
, len
))
170 if (len
>= e
->size
- e
->bytes
)
171 len
= e
->size
- e
->bytes
- 1;
172 memcpy(e
->buf
+ e
->bytes
, str
, len
);
174 __i915_error_advance(e
, len
);
177 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
178 #define err_puts(e, s) i915_error_puts(e, s)
180 static void __i915_printfn_error(struct drm_printer
*p
, struct va_format
*vaf
)
182 i915_error_vprintf(p
->arg
, vaf
->fmt
, *vaf
->va
);
185 static inline struct drm_printer
186 i915_error_printer(struct drm_i915_error_state_buf
*e
)
188 struct drm_printer p
= {
189 .printfn
= __i915_printfn_error
,
195 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
198 struct z_stream_s zstream
;
202 static bool compress_init(struct compress
*c
)
204 struct z_stream_s
*zstream
= memset(&c
->zstream
, 0, sizeof(c
->zstream
));
207 kmalloc(zlib_deflate_workspacesize(MAX_WBITS
, MAX_MEM_LEVEL
),
208 GFP_ATOMIC
| __GFP_NOWARN
);
209 if (!zstream
->workspace
)
212 if (zlib_deflateInit(zstream
, Z_DEFAULT_COMPRESSION
) != Z_OK
) {
213 kfree(zstream
->workspace
);
218 if (i915_has_memcpy_from_wc())
219 c
->tmp
= (void *)__get_free_page(GFP_ATOMIC
| __GFP_NOWARN
);
224 static int compress_page(struct compress
*c
,
226 struct drm_i915_error_object
*dst
)
228 struct z_stream_s
*zstream
= &c
->zstream
;
230 zstream
->next_in
= src
;
231 if (c
->tmp
&& i915_memcpy_from_wc(c
->tmp
, src
, PAGE_SIZE
))
232 zstream
->next_in
= c
->tmp
;
233 zstream
->avail_in
= PAGE_SIZE
;
236 if (zstream
->avail_out
== 0) {
239 page
= __get_free_page(GFP_ATOMIC
| __GFP_NOWARN
);
243 dst
->pages
[dst
->page_count
++] = (void *)page
;
245 zstream
->next_out
= (void *)page
;
246 zstream
->avail_out
= PAGE_SIZE
;
249 if (zlib_deflate(zstream
, Z_SYNC_FLUSH
) != Z_OK
)
251 } while (zstream
->avail_in
);
253 /* Fallback to uncompressed if we increase size? */
254 if (0 && zstream
->total_out
> zstream
->total_in
)
260 static void compress_fini(struct compress
*c
,
261 struct drm_i915_error_object
*dst
)
263 struct z_stream_s
*zstream
= &c
->zstream
;
266 zlib_deflate(zstream
, Z_FINISH
);
267 dst
->unused
= zstream
->avail_out
;
270 zlib_deflateEnd(zstream
);
271 kfree(zstream
->workspace
);
274 free_page((unsigned long)c
->tmp
);
277 static void err_compression_marker(struct drm_i915_error_state_buf
*m
)
287 static bool compress_init(struct compress
*c
)
292 static int compress_page(struct compress
*c
,
294 struct drm_i915_error_object
*dst
)
299 page
= __get_free_page(GFP_ATOMIC
| __GFP_NOWARN
);
304 if (!i915_memcpy_from_wc(ptr
, src
, PAGE_SIZE
))
305 memcpy(ptr
, src
, PAGE_SIZE
);
306 dst
->pages
[dst
->page_count
++] = ptr
;
311 static void compress_fini(struct compress
*c
,
312 struct drm_i915_error_object
*dst
)
316 static void err_compression_marker(struct drm_i915_error_state_buf
*m
)
323 static void print_error_buffers(struct drm_i915_error_state_buf
*m
,
325 struct drm_i915_error_buffer
*err
,
330 err_printf(m
, "%s [%d]:\n", name
, count
);
333 err_printf(m
, " %08x_%08x %8u %02x %02x [ ",
334 upper_32_bits(err
->gtt_offset
),
335 lower_32_bits(err
->gtt_offset
),
339 for (i
= 0; i
< I915_NUM_ENGINES
; i
++)
340 err_printf(m
, "%02x ", err
->rseqno
[i
]);
342 err_printf(m
, "] %02x", err
->wseqno
);
343 err_puts(m
, tiling_flag(err
->tiling
));
344 err_puts(m
, dirty_flag(err
->dirty
));
345 err_puts(m
, purgeable_flag(err
->purgeable
));
346 err_puts(m
, err
->userptr
? " userptr" : "");
347 err_puts(m
, err
->engine
!= -1 ? " " : "");
348 err_puts(m
, engine_str(err
->engine
));
349 err_puts(m
, i915_cache_level_str(m
->i915
, err
->cache_level
));
352 err_printf(m
, " (name: %d)", err
->name
);
353 if (err
->fence_reg
!= I915_FENCE_REG_NONE
)
354 err_printf(m
, " (fence: %d)", err
->fence_reg
);
361 static void error_print_instdone(struct drm_i915_error_state_buf
*m
,
362 const struct drm_i915_error_engine
*ee
)
367 err_printf(m
, " INSTDONE: 0x%08x\n",
368 ee
->instdone
.instdone
);
370 if (ee
->engine_id
!= RCS
|| INTEL_GEN(m
->i915
) <= 3)
373 err_printf(m
, " SC_INSTDONE: 0x%08x\n",
374 ee
->instdone
.slice_common
);
376 if (INTEL_GEN(m
->i915
) <= 6)
379 for_each_instdone_slice_subslice(m
->i915
, slice
, subslice
)
380 err_printf(m
, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
382 ee
->instdone
.sampler
[slice
][subslice
]);
384 for_each_instdone_slice_subslice(m
->i915
, slice
, subslice
)
385 err_printf(m
, " ROW_INSTDONE[%d][%d]: 0x%08x\n",
387 ee
->instdone
.row
[slice
][subslice
]);
390 static void error_print_request(struct drm_i915_error_state_buf
*m
,
392 const struct drm_i915_error_request
*erq
)
397 err_printf(m
, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
398 prefix
, erq
->pid
, erq
->ban_score
,
399 erq
->context
, erq
->seqno
, erq
->priority
,
400 jiffies_to_msecs(jiffies
- erq
->jiffies
),
401 erq
->head
, erq
->tail
);
404 static void error_print_context(struct drm_i915_error_state_buf
*m
,
406 const struct drm_i915_error_context
*ctx
)
408 err_printf(m
, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n",
409 header
, ctx
->comm
, ctx
->pid
, ctx
->handle
, ctx
->hw_id
,
410 ctx
->priority
, ctx
->ban_score
, ctx
->guilty
, ctx
->active
);
413 static void error_print_engine(struct drm_i915_error_state_buf
*m
,
414 const struct drm_i915_error_engine
*ee
)
418 err_printf(m
, "%s command stream:\n", engine_str(ee
->engine_id
));
419 err_printf(m
, " IDLE?: %s\n", yesno(ee
->idle
));
420 err_printf(m
, " START: 0x%08x\n", ee
->start
);
421 err_printf(m
, " HEAD: 0x%08x [0x%08x]\n", ee
->head
, ee
->rq_head
);
422 err_printf(m
, " TAIL: 0x%08x [0x%08x, 0x%08x]\n",
423 ee
->tail
, ee
->rq_post
, ee
->rq_tail
);
424 err_printf(m
, " CTL: 0x%08x\n", ee
->ctl
);
425 err_printf(m
, " MODE: 0x%08x\n", ee
->mode
);
426 err_printf(m
, " HWS: 0x%08x\n", ee
->hws
);
427 err_printf(m
, " ACTHD: 0x%08x %08x\n",
428 (u32
)(ee
->acthd
>>32), (u32
)ee
->acthd
);
429 err_printf(m
, " IPEIR: 0x%08x\n", ee
->ipeir
);
430 err_printf(m
, " IPEHR: 0x%08x\n", ee
->ipehr
);
432 error_print_instdone(m
, ee
);
434 if (ee
->batchbuffer
) {
435 u64 start
= ee
->batchbuffer
->gtt_offset
;
436 u64 end
= start
+ ee
->batchbuffer
->gtt_size
;
438 err_printf(m
, " batch: [0x%08x_%08x, 0x%08x_%08x]\n",
439 upper_32_bits(start
), lower_32_bits(start
),
440 upper_32_bits(end
), lower_32_bits(end
));
442 if (INTEL_GEN(m
->i915
) >= 4) {
443 err_printf(m
, " BBADDR: 0x%08x_%08x\n",
444 (u32
)(ee
->bbaddr
>>32), (u32
)ee
->bbaddr
);
445 err_printf(m
, " BB_STATE: 0x%08x\n", ee
->bbstate
);
446 err_printf(m
, " INSTPS: 0x%08x\n", ee
->instps
);
448 err_printf(m
, " INSTPM: 0x%08x\n", ee
->instpm
);
449 err_printf(m
, " FADDR: 0x%08x %08x\n", upper_32_bits(ee
->faddr
),
450 lower_32_bits(ee
->faddr
));
451 if (INTEL_GEN(m
->i915
) >= 6) {
452 err_printf(m
, " RC PSMI: 0x%08x\n", ee
->rc_psmi
);
453 err_printf(m
, " FAULT_REG: 0x%08x\n", ee
->fault_reg
);
454 err_printf(m
, " SYNC_0: 0x%08x\n",
455 ee
->semaphore_mboxes
[0]);
456 err_printf(m
, " SYNC_1: 0x%08x\n",
457 ee
->semaphore_mboxes
[1]);
458 if (HAS_VEBOX(m
->i915
))
459 err_printf(m
, " SYNC_2: 0x%08x\n",
460 ee
->semaphore_mboxes
[2]);
462 if (USES_PPGTT(m
->i915
)) {
463 err_printf(m
, " GFX_MODE: 0x%08x\n", ee
->vm_info
.gfx_mode
);
465 if (INTEL_GEN(m
->i915
) >= 8) {
467 for (i
= 0; i
< 4; i
++)
468 err_printf(m
, " PDP%d: 0x%016llx\n",
469 i
, ee
->vm_info
.pdp
[i
]);
471 err_printf(m
, " PP_DIR_BASE: 0x%08x\n",
472 ee
->vm_info
.pp_dir_base
);
475 err_printf(m
, " seqno: 0x%08x\n", ee
->seqno
);
476 err_printf(m
, " last_seqno: 0x%08x\n", ee
->last_seqno
);
477 err_printf(m
, " waiting: %s\n", yesno(ee
->waiting
));
478 err_printf(m
, " ring->head: 0x%08x\n", ee
->cpu_ring_head
);
479 err_printf(m
, " ring->tail: 0x%08x\n", ee
->cpu_ring_tail
);
480 err_printf(m
, " hangcheck stall: %s\n", yesno(ee
->hangcheck_stalled
));
481 err_printf(m
, " hangcheck action: %s\n",
482 hangcheck_action_to_str(ee
->hangcheck_action
));
483 err_printf(m
, " hangcheck action timestamp: %lu, %u ms ago\n",
484 ee
->hangcheck_timestamp
,
485 jiffies_to_msecs(jiffies
- ee
->hangcheck_timestamp
));
486 err_printf(m
, " engine reset count: %u\n", ee
->reset_count
);
488 for (n
= 0; n
< ee
->num_ports
; n
++) {
489 err_printf(m
, " ELSP[%d]:", n
);
490 error_print_request(m
, " ", &ee
->execlist
[n
]);
493 error_print_context(m
, " Active context: ", &ee
->context
);
496 void i915_error_printf(struct drm_i915_error_state_buf
*e
, const char *f
, ...)
501 i915_error_vprintf(e
, f
, args
);
506 ascii85_encode_len(int len
)
508 return DIV_ROUND_UP(len
, 4);
512 ascii85_encode(u32 in
, char *out
)
521 out
[i
] = '!' + in
% 85;
528 static void print_error_obj(struct drm_i915_error_state_buf
*m
,
529 struct intel_engine_cs
*engine
,
531 struct drm_i915_error_object
*obj
)
540 err_printf(m
, "%s --- %s = 0x%08x %08x\n",
541 engine
? engine
->name
: "global", name
,
542 upper_32_bits(obj
->gtt_offset
),
543 lower_32_bits(obj
->gtt_offset
));
546 err_compression_marker(m
);
547 for (page
= 0; page
< obj
->page_count
; page
++) {
551 if (page
== obj
->page_count
- 1)
553 len
= ascii85_encode_len(len
);
555 for (i
= 0; i
< len
; i
++) {
556 if (ascii85_encode(obj
->pages
[page
][i
], out
))
565 static void err_print_capabilities(struct drm_i915_error_state_buf
*m
,
566 const struct intel_device_info
*info
)
568 struct drm_printer p
= i915_error_printer(m
);
570 intel_device_info_dump_flags(info
, &p
);
573 static void err_print_params(struct drm_i915_error_state_buf
*m
,
574 const struct i915_params
*params
)
576 struct drm_printer p
= i915_error_printer(m
);
578 i915_params_dump(params
, &p
);
581 static void err_print_pciid(struct drm_i915_error_state_buf
*m
,
582 struct drm_i915_private
*i915
)
584 struct pci_dev
*pdev
= i915
->drm
.pdev
;
586 err_printf(m
, "PCI ID: 0x%04x\n", pdev
->device
);
587 err_printf(m
, "PCI Revision: 0x%02x\n", pdev
->revision
);
588 err_printf(m
, "PCI Subsystem: %04x:%04x\n",
589 pdev
->subsystem_vendor
,
590 pdev
->subsystem_device
);
593 static void err_print_uc(struct drm_i915_error_state_buf
*m
,
594 const struct i915_error_uc
*error_uc
)
596 struct drm_printer p
= i915_error_printer(m
);
597 const struct i915_gpu_state
*error
=
598 container_of(error_uc
, typeof(*error
), uc
);
600 if (!error
->device_info
.has_guc
)
603 intel_uc_fw_dump(&error_uc
->guc_fw
, &p
);
604 intel_uc_fw_dump(&error_uc
->huc_fw
, &p
);
605 print_error_obj(m
, NULL
, "GuC log buffer", error_uc
->guc_log
);
608 int i915_error_state_to_str(struct drm_i915_error_state_buf
*m
,
609 const struct i915_gpu_state
*error
)
611 struct drm_i915_private
*dev_priv
= m
->i915
;
612 struct drm_i915_error_object
*obj
;
616 err_printf(m
, "No error state collected\n");
620 if (*error
->error_msg
)
621 err_printf(m
, "%s\n", error
->error_msg
);
622 err_printf(m
, "Kernel: " UTS_RELEASE
"\n");
623 err_printf(m
, "Time: %ld s %ld us\n",
624 error
->time
.tv_sec
, error
->time
.tv_usec
);
625 err_printf(m
, "Boottime: %ld s %ld us\n",
626 error
->boottime
.tv_sec
, error
->boottime
.tv_usec
);
627 err_printf(m
, "Uptime: %ld s %ld us\n",
628 error
->uptime
.tv_sec
, error
->uptime
.tv_usec
);
630 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
631 if (error
->engine
[i
].hangcheck_stalled
&&
632 error
->engine
[i
].context
.pid
) {
633 err_printf(m
, "Active process (on ring %s): %s [%d], score %d\n",
635 error
->engine
[i
].context
.comm
,
636 error
->engine
[i
].context
.pid
,
637 error
->engine
[i
].context
.ban_score
);
640 err_printf(m
, "Reset count: %u\n", error
->reset_count
);
641 err_printf(m
, "Suspend count: %u\n", error
->suspend_count
);
642 err_printf(m
, "Platform: %s\n", intel_platform_name(error
->device_info
.platform
));
643 err_print_pciid(m
, error
->i915
);
645 err_printf(m
, "IOMMU enabled?: %d\n", error
->iommu
);
647 if (HAS_CSR(dev_priv
)) {
648 struct intel_csr
*csr
= &dev_priv
->csr
;
650 err_printf(m
, "DMC loaded: %s\n",
651 yesno(csr
->dmc_payload
!= NULL
));
652 err_printf(m
, "DMC fw version: %d.%d\n",
653 CSR_VERSION_MAJOR(csr
->version
),
654 CSR_VERSION_MINOR(csr
->version
));
657 err_printf(m
, "GT awake: %s\n", yesno(error
->awake
));
658 err_printf(m
, "RPM wakelock: %s\n", yesno(error
->wakelock
));
659 err_printf(m
, "PM suspended: %s\n", yesno(error
->suspended
));
660 err_printf(m
, "EIR: 0x%08x\n", error
->eir
);
661 err_printf(m
, "IER: 0x%08x\n", error
->ier
);
662 for (i
= 0; i
< error
->ngtier
; i
++)
663 err_printf(m
, "GTIER[%d]: 0x%08x\n", i
, error
->gtier
[i
]);
664 err_printf(m
, "PGTBL_ER: 0x%08x\n", error
->pgtbl_er
);
665 err_printf(m
, "FORCEWAKE: 0x%08x\n", error
->forcewake
);
666 err_printf(m
, "DERRMR: 0x%08x\n", error
->derrmr
);
667 err_printf(m
, "CCID: 0x%08x\n", error
->ccid
);
668 err_printf(m
, "Missed interrupts: 0x%08lx\n", dev_priv
->gpu_error
.missed_irq_rings
);
670 for (i
= 0; i
< error
->nfence
; i
++)
671 err_printf(m
, " fence[%d] = %08llx\n", i
, error
->fence
[i
]);
673 if (INTEL_GEN(dev_priv
) >= 6) {
674 err_printf(m
, "ERROR: 0x%08x\n", error
->error
);
676 if (INTEL_GEN(dev_priv
) >= 8)
677 err_printf(m
, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
678 error
->fault_data1
, error
->fault_data0
);
680 err_printf(m
, "DONE_REG: 0x%08x\n", error
->done_reg
);
683 if (IS_GEN7(dev_priv
))
684 err_printf(m
, "ERR_INT: 0x%08x\n", error
->err_int
);
686 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
687 if (error
->engine
[i
].engine_id
!= -1)
688 error_print_engine(m
, &error
->engine
[i
]);
691 for (i
= 0; i
< ARRAY_SIZE(error
->active_vm
); i
++) {
695 if (!error
->active_vm
[i
])
698 len
= scnprintf(buf
, sizeof(buf
), "Active (");
699 for (j
= 0; j
< ARRAY_SIZE(error
->engine
); j
++) {
700 if (error
->engine
[j
].vm
!= error
->active_vm
[i
])
703 len
+= scnprintf(buf
+ len
, sizeof(buf
), "%s%s",
705 dev_priv
->engine
[j
]->name
);
708 scnprintf(buf
+ len
, sizeof(buf
), ")");
709 print_error_buffers(m
, buf
,
711 error
->active_bo_count
[i
]);
714 print_error_buffers(m
, "Pinned (global)",
716 error
->pinned_bo_count
);
718 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
719 const struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
721 obj
= ee
->batchbuffer
;
723 err_puts(m
, dev_priv
->engine
[i
]->name
);
725 err_printf(m
, " (submitted by %s [%d], ctx %d [%d], score %d)",
730 ee
->context
.ban_score
);
731 err_printf(m
, " --- gtt_offset = 0x%08x %08x\n",
732 upper_32_bits(obj
->gtt_offset
),
733 lower_32_bits(obj
->gtt_offset
));
734 print_error_obj(m
, dev_priv
->engine
[i
], NULL
, obj
);
737 for (j
= 0; j
< ee
->user_bo_count
; j
++)
738 print_error_obj(m
, dev_priv
->engine
[i
],
739 "user", ee
->user_bo
[j
]);
741 if (ee
->num_requests
) {
742 err_printf(m
, "%s --- %d requests\n",
743 dev_priv
->engine
[i
]->name
,
745 for (j
= 0; j
< ee
->num_requests
; j
++)
746 error_print_request(m
, " ", &ee
->requests
[j
]);
749 if (IS_ERR(ee
->waiters
)) {
750 err_printf(m
, "%s --- ? waiters [unable to acquire spinlock]\n",
751 dev_priv
->engine
[i
]->name
);
752 } else if (ee
->num_waiters
) {
753 err_printf(m
, "%s --- %d waiters\n",
754 dev_priv
->engine
[i
]->name
,
756 for (j
= 0; j
< ee
->num_waiters
; j
++) {
757 err_printf(m
, " seqno 0x%08x for %s [%d]\n",
758 ee
->waiters
[j
].seqno
,
764 print_error_obj(m
, dev_priv
->engine
[i
],
765 "ringbuffer", ee
->ringbuffer
);
767 print_error_obj(m
, dev_priv
->engine
[i
],
768 "HW Status", ee
->hws_page
);
770 print_error_obj(m
, dev_priv
->engine
[i
],
771 "HW context", ee
->ctx
);
773 print_error_obj(m
, dev_priv
->engine
[i
],
774 "WA context", ee
->wa_ctx
);
776 print_error_obj(m
, dev_priv
->engine
[i
],
777 "WA batchbuffer", ee
->wa_batchbuffer
);
779 print_error_obj(m
, dev_priv
->engine
[i
],
780 "NULL context", ee
->default_state
);
784 intel_overlay_print_error_state(m
, error
->overlay
);
787 intel_display_print_error_state(m
, error
->display
);
789 err_print_capabilities(m
, &error
->device_info
);
790 err_print_params(m
, &error
->params
);
791 err_print_uc(m
, &error
->uc
);
793 if (m
->bytes
== 0 && m
->err
)
799 int i915_error_state_buf_init(struct drm_i915_error_state_buf
*ebuf
,
800 struct drm_i915_private
*i915
,
801 size_t count
, loff_t pos
)
803 memset(ebuf
, 0, sizeof(*ebuf
));
806 /* We need to have enough room to store any i915_error_state printf
807 * so that we can move it to start position.
809 ebuf
->size
= count
+ 1 > PAGE_SIZE
? count
+ 1 : PAGE_SIZE
;
810 ebuf
->buf
= kmalloc(ebuf
->size
,
811 GFP_KERNEL
| __GFP_NORETRY
| __GFP_NOWARN
);
813 if (ebuf
->buf
== NULL
) {
814 ebuf
->size
= PAGE_SIZE
;
815 ebuf
->buf
= kmalloc(ebuf
->size
, GFP_KERNEL
);
818 if (ebuf
->buf
== NULL
) {
820 ebuf
->buf
= kmalloc(ebuf
->size
, GFP_KERNEL
);
823 if (ebuf
->buf
== NULL
)
831 static void i915_error_object_free(struct drm_i915_error_object
*obj
)
838 for (page
= 0; page
< obj
->page_count
; page
++)
839 free_page((unsigned long)obj
->pages
[page
]);
844 static __always_inline
void free_param(const char *type
, void *x
)
846 if (!__builtin_strcmp(type
, "char *"))
850 static void cleanup_params(struct i915_gpu_state
*error
)
852 #define FREE(T, x, ...) free_param(#T, &error->params.x);
853 I915_PARAMS_FOR_EACH(FREE
);
857 static void cleanup_uc_state(struct i915_gpu_state
*error
)
859 struct i915_error_uc
*error_uc
= &error
->uc
;
861 kfree(error_uc
->guc_fw
.path
);
862 kfree(error_uc
->huc_fw
.path
);
863 i915_error_object_free(error_uc
->guc_log
);
866 void __i915_gpu_state_free(struct kref
*error_ref
)
868 struct i915_gpu_state
*error
=
869 container_of(error_ref
, typeof(*error
), ref
);
872 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
873 struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
875 for (j
= 0; j
< ee
->user_bo_count
; j
++)
876 i915_error_object_free(ee
->user_bo
[j
]);
879 i915_error_object_free(ee
->batchbuffer
);
880 i915_error_object_free(ee
->wa_batchbuffer
);
881 i915_error_object_free(ee
->ringbuffer
);
882 i915_error_object_free(ee
->hws_page
);
883 i915_error_object_free(ee
->ctx
);
884 i915_error_object_free(ee
->wa_ctx
);
887 if (!IS_ERR_OR_NULL(ee
->waiters
))
891 for (i
= 0; i
< ARRAY_SIZE(error
->active_bo
); i
++)
892 kfree(error
->active_bo
[i
]);
893 kfree(error
->pinned_bo
);
895 kfree(error
->overlay
);
896 kfree(error
->display
);
898 cleanup_params(error
);
899 cleanup_uc_state(error
);
904 static struct drm_i915_error_object
*
905 i915_error_object_create(struct drm_i915_private
*i915
,
906 struct i915_vma
*vma
)
908 struct i915_ggtt
*ggtt
= &i915
->ggtt
;
909 const u64 slot
= ggtt
->error_capture
.start
;
910 struct drm_i915_error_object
*dst
;
911 struct compress compress
;
912 unsigned long num_pages
;
913 struct sgt_iter iter
;
919 num_pages
= min_t(u64
, vma
->size
, vma
->obj
->base
.size
) >> PAGE_SHIFT
;
920 num_pages
= DIV_ROUND_UP(10 * num_pages
, 8); /* worstcase zlib growth */
921 dst
= kmalloc(sizeof(*dst
) + num_pages
* sizeof(u32
*),
922 GFP_ATOMIC
| __GFP_NOWARN
);
926 dst
->gtt_offset
= vma
->node
.start
;
927 dst
->gtt_size
= vma
->node
.size
;
931 if (!compress_init(&compress
)) {
936 for_each_sgt_dma(dma
, iter
, vma
->pages
) {
940 ggtt
->base
.insert_page(&ggtt
->base
, dma
, slot
,
943 s
= io_mapping_map_atomic_wc(&ggtt
->iomap
, slot
);
944 ret
= compress_page(&compress
, (void __force
*)s
, dst
);
945 io_mapping_unmap_atomic(s
);
953 while (dst
->page_count
--)
954 free_page((unsigned long)dst
->pages
[dst
->page_count
]);
959 compress_fini(&compress
, dst
);
960 ggtt
->base
.clear_range(&ggtt
->base
, slot
, PAGE_SIZE
);
964 /* The error capture is special as tries to run underneath the normal
965 * locking rules - so we use the raw version of the i915_gem_active lookup.
967 static inline uint32_t
968 __active_get_seqno(struct i915_gem_active
*active
)
970 struct drm_i915_gem_request
*request
;
972 request
= __i915_gem_active_peek(active
);
973 return request
? request
->global_seqno
: 0;
977 __active_get_engine_id(struct i915_gem_active
*active
)
979 struct drm_i915_gem_request
*request
;
981 request
= __i915_gem_active_peek(active
);
982 return request
? request
->engine
->id
: -1;
985 static void capture_bo(struct drm_i915_error_buffer
*err
,
986 struct i915_vma
*vma
)
988 struct drm_i915_gem_object
*obj
= vma
->obj
;
991 err
->size
= obj
->base
.size
;
992 err
->name
= obj
->base
.name
;
994 for (i
= 0; i
< I915_NUM_ENGINES
; i
++)
995 err
->rseqno
[i
] = __active_get_seqno(&vma
->last_read
[i
]);
996 err
->wseqno
= __active_get_seqno(&obj
->frontbuffer_write
);
997 err
->engine
= __active_get_engine_id(&obj
->frontbuffer_write
);
999 err
->gtt_offset
= vma
->node
.start
;
1000 err
->read_domains
= obj
->base
.read_domains
;
1001 err
->write_domain
= obj
->base
.write_domain
;
1002 err
->fence_reg
= vma
->fence
? vma
->fence
->id
: -1;
1003 err
->tiling
= i915_gem_object_get_tiling(obj
);
1004 err
->dirty
= obj
->mm
.dirty
;
1005 err
->purgeable
= obj
->mm
.madv
!= I915_MADV_WILLNEED
;
1006 err
->userptr
= obj
->userptr
.mm
!= NULL
;
1007 err
->cache_level
= obj
->cache_level
;
1010 static u32
capture_error_bo(struct drm_i915_error_buffer
*err
,
1011 int count
, struct list_head
*head
,
1014 struct i915_vma
*vma
;
1017 list_for_each_entry(vma
, head
, vm_link
) {
1018 if (pinned_only
&& !i915_vma_is_pinned(vma
))
1021 capture_bo(err
++, vma
);
1029 /* Generate a semi-unique error code. The code is not meant to have meaning, The
1030 * code's only purpose is to try to prevent false duplicated bug reports by
1031 * grossly estimating a GPU error state.
1033 * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
1034 * the hang if we could strip the GTT offset information from it.
1036 * It's only a small step better than a random number in its current form.
1038 static uint32_t i915_error_generate_code(struct drm_i915_private
*dev_priv
,
1039 struct i915_gpu_state
*error
,
1042 uint32_t error_code
= 0;
1045 /* IPEHR would be an ideal way to detect errors, as it's the gross
1046 * measure of "the command that hung." However, has some very common
1047 * synchronization commands which almost always appear in the case
1048 * strictly a client bug. Use instdone to differentiate those some.
1050 for (i
= 0; i
< I915_NUM_ENGINES
; i
++) {
1051 if (error
->engine
[i
].hangcheck_stalled
) {
1055 return error
->engine
[i
].ipehr
^
1056 error
->engine
[i
].instdone
.instdone
;
1063 static void i915_gem_record_fences(struct drm_i915_private
*dev_priv
,
1064 struct i915_gpu_state
*error
)
1068 if (INTEL_GEN(dev_priv
) >= 6) {
1069 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++)
1070 error
->fence
[i
] = I915_READ64(FENCE_REG_GEN6_LO(i
));
1071 } else if (INTEL_GEN(dev_priv
) >= 4) {
1072 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++)
1073 error
->fence
[i
] = I915_READ64(FENCE_REG_965_LO(i
));
1075 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++)
1076 error
->fence
[i
] = I915_READ(FENCE_REG(i
));
1082 gen8_engine_sync_index(struct intel_engine_cs
*engine
,
1083 struct intel_engine_cs
*other
)
1088 * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
1089 * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
1090 * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
1091 * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
1092 * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
1095 idx
= (other
- engine
) - 1;
1097 idx
+= I915_NUM_ENGINES
;
1102 static void gen6_record_semaphore_state(struct intel_engine_cs
*engine
,
1103 struct drm_i915_error_engine
*ee
)
1105 struct drm_i915_private
*dev_priv
= engine
->i915
;
1107 ee
->semaphore_mboxes
[0] = I915_READ(RING_SYNC_0(engine
->mmio_base
));
1108 ee
->semaphore_mboxes
[1] = I915_READ(RING_SYNC_1(engine
->mmio_base
));
1109 if (HAS_VEBOX(dev_priv
))
1110 ee
->semaphore_mboxes
[2] =
1111 I915_READ(RING_SYNC_2(engine
->mmio_base
));
1114 static void error_record_engine_waiters(struct intel_engine_cs
*engine
,
1115 struct drm_i915_error_engine
*ee
)
1117 struct intel_breadcrumbs
*b
= &engine
->breadcrumbs
;
1118 struct drm_i915_error_waiter
*waiter
;
1122 ee
->num_waiters
= 0;
1125 if (RB_EMPTY_ROOT(&b
->waiters
))
1128 if (!spin_trylock_irq(&b
->rb_lock
)) {
1129 ee
->waiters
= ERR_PTR(-EDEADLK
);
1134 for (rb
= rb_first(&b
->waiters
); rb
!= NULL
; rb
= rb_next(rb
))
1136 spin_unlock_irq(&b
->rb_lock
);
1140 waiter
= kmalloc_array(count
,
1141 sizeof(struct drm_i915_error_waiter
),
1146 if (!spin_trylock_irq(&b
->rb_lock
)) {
1148 ee
->waiters
= ERR_PTR(-EDEADLK
);
1152 ee
->waiters
= waiter
;
1153 for (rb
= rb_first(&b
->waiters
); rb
; rb
= rb_next(rb
)) {
1154 struct intel_wait
*w
= rb_entry(rb
, typeof(*w
), node
);
1156 strcpy(waiter
->comm
, w
->tsk
->comm
);
1157 waiter
->pid
= w
->tsk
->pid
;
1158 waiter
->seqno
= w
->seqno
;
1161 if (++ee
->num_waiters
== count
)
1164 spin_unlock_irq(&b
->rb_lock
);
1167 static void error_record_engine_registers(struct i915_gpu_state
*error
,
1168 struct intel_engine_cs
*engine
,
1169 struct drm_i915_error_engine
*ee
)
1171 struct drm_i915_private
*dev_priv
= engine
->i915
;
1173 if (INTEL_GEN(dev_priv
) >= 6) {
1174 ee
->rc_psmi
= I915_READ(RING_PSMI_CTL(engine
->mmio_base
));
1175 if (INTEL_GEN(dev_priv
) >= 8) {
1176 ee
->fault_reg
= I915_READ(GEN8_RING_FAULT_REG
);
1178 gen6_record_semaphore_state(engine
, ee
);
1179 ee
->fault_reg
= I915_READ(RING_FAULT_REG(engine
));
1183 if (INTEL_GEN(dev_priv
) >= 4) {
1184 ee
->faddr
= I915_READ(RING_DMA_FADD(engine
->mmio_base
));
1185 ee
->ipeir
= I915_READ(RING_IPEIR(engine
->mmio_base
));
1186 ee
->ipehr
= I915_READ(RING_IPEHR(engine
->mmio_base
));
1187 ee
->instps
= I915_READ(RING_INSTPS(engine
->mmio_base
));
1188 ee
->bbaddr
= I915_READ(RING_BBADDR(engine
->mmio_base
));
1189 if (INTEL_GEN(dev_priv
) >= 8) {
1190 ee
->faddr
|= (u64
) I915_READ(RING_DMA_FADD_UDW(engine
->mmio_base
)) << 32;
1191 ee
->bbaddr
|= (u64
) I915_READ(RING_BBADDR_UDW(engine
->mmio_base
)) << 32;
1193 ee
->bbstate
= I915_READ(RING_BBSTATE(engine
->mmio_base
));
1195 ee
->faddr
= I915_READ(DMA_FADD_I8XX
);
1196 ee
->ipeir
= I915_READ(IPEIR
);
1197 ee
->ipehr
= I915_READ(IPEHR
);
1200 intel_engine_get_instdone(engine
, &ee
->instdone
);
1202 ee
->waiting
= intel_engine_has_waiter(engine
);
1203 ee
->instpm
= I915_READ(RING_INSTPM(engine
->mmio_base
));
1204 ee
->acthd
= intel_engine_get_active_head(engine
);
1205 ee
->seqno
= intel_engine_get_seqno(engine
);
1206 ee
->last_seqno
= intel_engine_last_submit(engine
);
1207 ee
->start
= I915_READ_START(engine
);
1208 ee
->head
= I915_READ_HEAD(engine
);
1209 ee
->tail
= I915_READ_TAIL(engine
);
1210 ee
->ctl
= I915_READ_CTL(engine
);
1211 if (INTEL_GEN(dev_priv
) > 2)
1212 ee
->mode
= I915_READ_MODE(engine
);
1214 if (!HWS_NEEDS_PHYSICAL(dev_priv
)) {
1217 if (IS_GEN7(dev_priv
)) {
1218 switch (engine
->id
) {
1221 mmio
= RENDER_HWS_PGA_GEN7
;
1224 mmio
= BLT_HWS_PGA_GEN7
;
1227 mmio
= BSD_HWS_PGA_GEN7
;
1230 mmio
= VEBOX_HWS_PGA_GEN7
;
1233 } else if (IS_GEN6(engine
->i915
)) {
1234 mmio
= RING_HWS_PGA_GEN6(engine
->mmio_base
);
1236 /* XXX: gen8 returns to sanity */
1237 mmio
= RING_HWS_PGA(engine
->mmio_base
);
1240 ee
->hws
= I915_READ(mmio
);
1243 ee
->idle
= intel_engine_is_idle(engine
);
1244 ee
->hangcheck_timestamp
= engine
->hangcheck
.action_timestamp
;
1245 ee
->hangcheck_action
= engine
->hangcheck
.action
;
1246 ee
->hangcheck_stalled
= engine
->hangcheck
.stalled
;
1247 ee
->reset_count
= i915_reset_engine_count(&dev_priv
->gpu_error
,
1250 if (USES_PPGTT(dev_priv
)) {
1253 ee
->vm_info
.gfx_mode
= I915_READ(RING_MODE_GEN7(engine
));
1255 if (IS_GEN6(dev_priv
))
1256 ee
->vm_info
.pp_dir_base
=
1257 I915_READ(RING_PP_DIR_BASE_READ(engine
));
1258 else if (IS_GEN7(dev_priv
))
1259 ee
->vm_info
.pp_dir_base
=
1260 I915_READ(RING_PP_DIR_BASE(engine
));
1261 else if (INTEL_GEN(dev_priv
) >= 8)
1262 for (i
= 0; i
< 4; i
++) {
1263 ee
->vm_info
.pdp
[i
] =
1264 I915_READ(GEN8_RING_PDP_UDW(engine
, i
));
1265 ee
->vm_info
.pdp
[i
] <<= 32;
1266 ee
->vm_info
.pdp
[i
] |=
1267 I915_READ(GEN8_RING_PDP_LDW(engine
, i
));
1272 static void record_request(struct drm_i915_gem_request
*request
,
1273 struct drm_i915_error_request
*erq
)
1275 erq
->context
= request
->ctx
->hw_id
;
1276 erq
->priority
= request
->priotree
.priority
;
1277 erq
->ban_score
= atomic_read(&request
->ctx
->ban_score
);
1278 erq
->seqno
= request
->global_seqno
;
1279 erq
->jiffies
= request
->emitted_jiffies
;
1280 erq
->head
= request
->head
;
1281 erq
->tail
= request
->tail
;
1284 erq
->pid
= request
->ctx
->pid
? pid_nr(request
->ctx
->pid
) : 0;
1288 static void engine_record_requests(struct intel_engine_cs
*engine
,
1289 struct drm_i915_gem_request
*first
,
1290 struct drm_i915_error_engine
*ee
)
1292 struct drm_i915_gem_request
*request
;
1297 list_for_each_entry_from(request
, &engine
->timeline
->requests
, link
)
1302 ee
->requests
= kcalloc(count
, sizeof(*ee
->requests
), GFP_ATOMIC
);
1306 ee
->num_requests
= count
;
1310 list_for_each_entry_from(request
, &engine
->timeline
->requests
, link
) {
1311 if (count
>= ee
->num_requests
) {
1313 * If the ring request list was changed in
1314 * between the point where the error request
1315 * list was created and dimensioned and this
1316 * point then just exit early to avoid crashes.
1318 * We don't need to communicate that the
1319 * request list changed state during error
1320 * state capture and that the error state is
1321 * slightly incorrect as a consequence since we
1322 * are typically only interested in the request
1323 * list state at the point of error state
1324 * capture, not in any changes happening during
1330 record_request(request
, &ee
->requests
[count
++]);
1332 ee
->num_requests
= count
;
1335 static void error_record_engine_execlists(struct intel_engine_cs
*engine
,
1336 struct drm_i915_error_engine
*ee
)
1338 const struct intel_engine_execlists
* const execlists
= &engine
->execlists
;
1341 for (n
= 0; n
< execlists_num_ports(execlists
); n
++) {
1342 struct drm_i915_gem_request
*rq
= port_request(&execlists
->port
[n
]);
1347 record_request(rq
, &ee
->execlist
[n
]);
1353 static void record_context(struct drm_i915_error_context
*e
,
1354 struct i915_gem_context
*ctx
)
1357 struct task_struct
*task
;
1360 task
= pid_task(ctx
->pid
, PIDTYPE_PID
);
1362 strcpy(e
->comm
, task
->comm
);
1368 e
->handle
= ctx
->user_handle
;
1369 e
->hw_id
= ctx
->hw_id
;
1370 e
->priority
= ctx
->priority
;
1371 e
->ban_score
= atomic_read(&ctx
->ban_score
);
1372 e
->guilty
= atomic_read(&ctx
->guilty_count
);
1373 e
->active
= atomic_read(&ctx
->active_count
);
1376 static void request_record_user_bo(struct drm_i915_gem_request
*request
,
1377 struct drm_i915_error_engine
*ee
)
1379 struct i915_gem_capture_list
*c
;
1380 struct drm_i915_error_object
**bo
;
1384 for (c
= request
->capture_list
; c
; c
= c
->next
)
1389 bo
= kcalloc(count
, sizeof(*bo
), GFP_ATOMIC
);
1394 for (c
= request
->capture_list
; c
; c
= c
->next
) {
1395 bo
[count
] = i915_error_object_create(request
->i915
, c
->vma
);
1402 ee
->user_bo_count
= count
;
1405 static struct drm_i915_error_object
*
1406 capture_object(struct drm_i915_private
*dev_priv
,
1407 struct drm_i915_gem_object
*obj
)
1409 if (obj
&& i915_gem_object_has_pages(obj
)) {
1410 struct i915_vma fake
= {
1411 .node
= { .start
= U64_MAX
, .size
= obj
->base
.size
},
1412 .size
= obj
->base
.size
,
1413 .pages
= obj
->mm
.pages
,
1417 return i915_error_object_create(dev_priv
, &fake
);
1423 static void i915_gem_record_rings(struct drm_i915_private
*dev_priv
,
1424 struct i915_gpu_state
*error
)
1426 struct i915_ggtt
*ggtt
= &dev_priv
->ggtt
;
1429 for (i
= 0; i
< I915_NUM_ENGINES
; i
++) {
1430 struct intel_engine_cs
*engine
= dev_priv
->engine
[i
];
1431 struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
1432 struct drm_i915_gem_request
*request
;
1441 error_record_engine_registers(error
, engine
, ee
);
1442 error_record_engine_waiters(engine
, ee
);
1443 error_record_engine_execlists(engine
, ee
);
1445 request
= i915_gem_find_active_request(engine
);
1447 struct intel_ring
*ring
;
1449 ee
->vm
= request
->ctx
->ppgtt
?
1450 &request
->ctx
->ppgtt
->base
: &ggtt
->base
;
1452 record_context(&ee
->context
, request
->ctx
);
1454 /* We need to copy these to an anonymous buffer
1455 * as the simplest method to avoid being overwritten
1459 i915_error_object_create(dev_priv
,
1462 if (HAS_BROKEN_CS_TLB(dev_priv
))
1463 ee
->wa_batchbuffer
=
1464 i915_error_object_create(dev_priv
,
1466 request_record_user_bo(request
, ee
);
1469 i915_error_object_create(dev_priv
,
1470 request
->ctx
->engine
[i
].state
);
1473 i915_gem_context_no_error_capture(request
->ctx
);
1475 ee
->rq_head
= request
->head
;
1476 ee
->rq_post
= request
->postfix
;
1477 ee
->rq_tail
= request
->tail
;
1479 ring
= request
->ring
;
1480 ee
->cpu_ring_head
= ring
->head
;
1481 ee
->cpu_ring_tail
= ring
->tail
;
1483 i915_error_object_create(dev_priv
, ring
->vma
);
1485 engine_record_requests(engine
, request
, ee
);
1489 i915_error_object_create(dev_priv
,
1490 engine
->status_page
.vma
);
1493 i915_error_object_create(dev_priv
, engine
->wa_ctx
.vma
);
1496 capture_object(dev_priv
, engine
->default_state
);
1500 static void i915_gem_capture_vm(struct drm_i915_private
*dev_priv
,
1501 struct i915_gpu_state
*error
,
1502 struct i915_address_space
*vm
,
1505 struct drm_i915_error_buffer
*active_bo
;
1506 struct i915_vma
*vma
;
1510 list_for_each_entry(vma
, &vm
->active_list
, vm_link
)
1515 active_bo
= kcalloc(count
, sizeof(*active_bo
), GFP_ATOMIC
);
1517 count
= capture_error_bo(active_bo
, count
, &vm
->active_list
, false);
1521 error
->active_vm
[idx
] = vm
;
1522 error
->active_bo
[idx
] = active_bo
;
1523 error
->active_bo_count
[idx
] = count
;
1526 static void i915_capture_active_buffers(struct drm_i915_private
*dev_priv
,
1527 struct i915_gpu_state
*error
)
1531 BUILD_BUG_ON(ARRAY_SIZE(error
->engine
) > ARRAY_SIZE(error
->active_bo
));
1532 BUILD_BUG_ON(ARRAY_SIZE(error
->active_bo
) != ARRAY_SIZE(error
->active_vm
));
1533 BUILD_BUG_ON(ARRAY_SIZE(error
->active_bo
) != ARRAY_SIZE(error
->active_bo_count
));
1535 /* Scan each engine looking for unique active contexts/vm */
1536 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
1537 struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
1544 for (j
= 0; j
< i
&& !found
; j
++)
1545 found
= error
->engine
[j
].vm
== ee
->vm
;
1547 i915_gem_capture_vm(dev_priv
, error
, ee
->vm
, cnt
++);
1551 static void i915_capture_pinned_buffers(struct drm_i915_private
*dev_priv
,
1552 struct i915_gpu_state
*error
)
1554 struct i915_address_space
*vm
= &dev_priv
->ggtt
.base
;
1555 struct drm_i915_error_buffer
*bo
;
1556 struct i915_vma
*vma
;
1557 int count_inactive
, count_active
;
1560 list_for_each_entry(vma
, &vm
->active_list
, vm_link
)
1564 list_for_each_entry(vma
, &vm
->inactive_list
, vm_link
)
1568 if (count_inactive
+ count_active
)
1569 bo
= kcalloc(count_inactive
+ count_active
,
1570 sizeof(*bo
), GFP_ATOMIC
);
1574 count_inactive
= capture_error_bo(bo
, count_inactive
,
1575 &vm
->active_list
, true);
1576 count_active
= capture_error_bo(bo
+ count_inactive
, count_active
,
1577 &vm
->inactive_list
, true);
1578 error
->pinned_bo_count
= count_inactive
+ count_active
;
1579 error
->pinned_bo
= bo
;
1582 static void capture_uc_state(struct i915_gpu_state
*error
)
1584 struct drm_i915_private
*i915
= error
->i915
;
1585 struct i915_error_uc
*error_uc
= &error
->uc
;
1587 /* Capturing uC state won't be useful if there is no GuC */
1588 if (!error
->device_info
.has_guc
)
1591 error_uc
->guc_fw
= i915
->guc
.fw
;
1592 error_uc
->huc_fw
= i915
->huc
.fw
;
1594 /* Non-default firmware paths will be specified by the modparam.
1595 * As modparams are generally accesible from the userspace make
1596 * explicit copies of the firmware paths.
1598 error_uc
->guc_fw
.path
= kstrdup(i915
->guc
.fw
.path
, GFP_ATOMIC
);
1599 error_uc
->huc_fw
.path
= kstrdup(i915
->huc
.fw
.path
, GFP_ATOMIC
);
1600 error_uc
->guc_log
= i915_error_object_create(i915
, i915
->guc
.log
.vma
);
1603 /* Capture all registers which don't fit into another category. */
1604 static void i915_capture_reg_state(struct drm_i915_private
*dev_priv
,
1605 struct i915_gpu_state
*error
)
1609 /* General organization
1610 * 1. Registers specific to a single generation
1611 * 2. Registers which belong to multiple generations
1612 * 3. Feature specific registers.
1613 * 4. Everything else
1614 * Please try to follow the order.
1617 /* 1: Registers specific to a single generation */
1618 if (IS_VALLEYVIEW(dev_priv
)) {
1619 error
->gtier
[0] = I915_READ(GTIER
);
1620 error
->ier
= I915_READ(VLV_IER
);
1621 error
->forcewake
= I915_READ_FW(FORCEWAKE_VLV
);
1624 if (IS_GEN7(dev_priv
))
1625 error
->err_int
= I915_READ(GEN7_ERR_INT
);
1627 if (INTEL_GEN(dev_priv
) >= 8) {
1628 error
->fault_data0
= I915_READ(GEN8_FAULT_TLB_DATA0
);
1629 error
->fault_data1
= I915_READ(GEN8_FAULT_TLB_DATA1
);
1632 if (IS_GEN6(dev_priv
)) {
1633 error
->forcewake
= I915_READ_FW(FORCEWAKE
);
1634 error
->gab_ctl
= I915_READ(GAB_CTL
);
1635 error
->gfx_mode
= I915_READ(GFX_MODE
);
1638 /* 2: Registers which belong to multiple generations */
1639 if (INTEL_GEN(dev_priv
) >= 7)
1640 error
->forcewake
= I915_READ_FW(FORCEWAKE_MT
);
1642 if (INTEL_GEN(dev_priv
) >= 6) {
1643 error
->derrmr
= I915_READ(DERRMR
);
1644 error
->error
= I915_READ(ERROR_GEN6
);
1645 error
->done_reg
= I915_READ(DONE_REG
);
1648 if (INTEL_GEN(dev_priv
) >= 5)
1649 error
->ccid
= I915_READ(CCID
);
1651 /* 3: Feature specific registers */
1652 if (IS_GEN6(dev_priv
) || IS_GEN7(dev_priv
)) {
1653 error
->gam_ecochk
= I915_READ(GAM_ECOCHK
);
1654 error
->gac_eco
= I915_READ(GAC_ECO_BITS
);
1657 /* 4: Everything else */
1658 if (INTEL_GEN(dev_priv
) >= 8) {
1659 error
->ier
= I915_READ(GEN8_DE_MISC_IER
);
1660 for (i
= 0; i
< 4; i
++)
1661 error
->gtier
[i
] = I915_READ(GEN8_GT_IER(i
));
1663 } else if (HAS_PCH_SPLIT(dev_priv
)) {
1664 error
->ier
= I915_READ(DEIER
);
1665 error
->gtier
[0] = I915_READ(GTIER
);
1667 } else if (IS_GEN2(dev_priv
)) {
1668 error
->ier
= I915_READ16(IER
);
1669 } else if (!IS_VALLEYVIEW(dev_priv
)) {
1670 error
->ier
= I915_READ(IER
);
1672 error
->eir
= I915_READ(EIR
);
1673 error
->pgtbl_er
= I915_READ(PGTBL_ER
);
1676 static void i915_error_capture_msg(struct drm_i915_private
*dev_priv
,
1677 struct i915_gpu_state
*error
,
1679 const char *error_msg
)
1682 int engine_id
= -1, len
;
1684 ecode
= i915_error_generate_code(dev_priv
, error
, &engine_id
);
1686 len
= scnprintf(error
->error_msg
, sizeof(error
->error_msg
),
1687 "GPU HANG: ecode %d:%d:0x%08x",
1688 INTEL_GEN(dev_priv
), engine_id
, ecode
);
1690 if (engine_id
!= -1 && error
->engine
[engine_id
].context
.pid
)
1691 len
+= scnprintf(error
->error_msg
+ len
,
1692 sizeof(error
->error_msg
) - len
,
1694 error
->engine
[engine_id
].context
.comm
,
1695 error
->engine
[engine_id
].context
.pid
);
1697 scnprintf(error
->error_msg
+ len
, sizeof(error
->error_msg
) - len
,
1698 ", reason: %s, action: %s",
1700 engine_mask
? "reset" : "continue");
1703 static void i915_capture_gen_state(struct drm_i915_private
*dev_priv
,
1704 struct i915_gpu_state
*error
)
1706 error
->awake
= dev_priv
->gt
.awake
;
1707 error
->wakelock
= atomic_read(&dev_priv
->runtime_pm
.wakeref_count
);
1708 error
->suspended
= dev_priv
->runtime_pm
.suspended
;
1711 #ifdef CONFIG_INTEL_IOMMU
1712 error
->iommu
= intel_iommu_gfx_mapped
;
1714 error
->reset_count
= i915_reset_count(&dev_priv
->gpu_error
);
1715 error
->suspend_count
= dev_priv
->suspend_count
;
1717 memcpy(&error
->device_info
,
1718 INTEL_INFO(dev_priv
),
1719 sizeof(error
->device_info
));
1722 static __always_inline
void dup_param(const char *type
, void *x
)
1724 if (!__builtin_strcmp(type
, "char *"))
1725 *(void **)x
= kstrdup(*(void **)x
, GFP_ATOMIC
);
1728 static void capture_params(struct i915_gpu_state
*error
)
1730 error
->params
= i915_modparams
;
1731 #define DUP(T, x, ...) dup_param(#T, &error->params.x);
1732 I915_PARAMS_FOR_EACH(DUP
);
1736 static int capture(void *data
)
1738 struct i915_gpu_state
*error
= data
;
1740 do_gettimeofday(&error
->time
);
1741 error
->boottime
= ktime_to_timeval(ktime_get_boottime());
1743 ktime_to_timeval(ktime_sub(ktime_get(),
1744 error
->i915
->gt
.last_init_time
));
1746 capture_params(error
);
1747 capture_uc_state(error
);
1749 i915_capture_gen_state(error
->i915
, error
);
1750 i915_capture_reg_state(error
->i915
, error
);
1751 i915_gem_record_fences(error
->i915
, error
);
1752 i915_gem_record_rings(error
->i915
, error
);
1753 i915_capture_active_buffers(error
->i915
, error
);
1754 i915_capture_pinned_buffers(error
->i915
, error
);
1756 error
->overlay
= intel_overlay_capture_error_state(error
->i915
);
1757 error
->display
= intel_display_capture_error_state(error
->i915
);
1762 #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
1764 struct i915_gpu_state
*
1765 i915_capture_gpu_state(struct drm_i915_private
*i915
)
1767 struct i915_gpu_state
*error
;
1769 error
= kzalloc(sizeof(*error
), GFP_ATOMIC
);
1773 kref_init(&error
->ref
);
1776 stop_machine(capture
, error
, NULL
);
1782 * i915_capture_error_state - capture an error record for later analysis
1785 * Should be called when an error is detected (either a hang or an error
1786 * interrupt) to capture error state from the time of the error. Fills
1787 * out a structure which becomes available in debugfs for user level tools
1790 void i915_capture_error_state(struct drm_i915_private
*dev_priv
,
1792 const char *error_msg
)
1795 struct i915_gpu_state
*error
;
1796 unsigned long flags
;
1798 if (!i915_modparams
.error_capture
)
1801 if (READ_ONCE(dev_priv
->gpu_error
.first_error
))
1804 error
= i915_capture_gpu_state(dev_priv
);
1806 DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
1810 i915_error_capture_msg(dev_priv
, error
, engine_mask
, error_msg
);
1811 DRM_INFO("%s\n", error
->error_msg
);
1813 if (!error
->simulated
) {
1814 spin_lock_irqsave(&dev_priv
->gpu_error
.lock
, flags
);
1815 if (!dev_priv
->gpu_error
.first_error
) {
1816 dev_priv
->gpu_error
.first_error
= error
;
1819 spin_unlock_irqrestore(&dev_priv
->gpu_error
.lock
, flags
);
1823 __i915_gpu_state_free(&error
->ref
);
1828 ktime_get_real_seconds() - DRIVER_TIMESTAMP
< DAY_AS_SECONDS(180)) {
1829 DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
1830 DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
1831 DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
1832 DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n");
1833 DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n",
1834 dev_priv
->drm
.primary
->index
);
1839 struct i915_gpu_state
*
1840 i915_first_error_state(struct drm_i915_private
*i915
)
1842 struct i915_gpu_state
*error
;
1844 spin_lock_irq(&i915
->gpu_error
.lock
);
1845 error
= i915
->gpu_error
.first_error
;
1847 i915_gpu_state_get(error
);
1848 spin_unlock_irq(&i915
->gpu_error
.lock
);
1853 void i915_reset_error_state(struct drm_i915_private
*i915
)
1855 struct i915_gpu_state
*error
;
1857 spin_lock_irq(&i915
->gpu_error
.lock
);
1858 error
= i915
->gpu_error
.first_error
;
1859 i915
->gpu_error
.first_error
= NULL
;
1860 spin_unlock_irq(&i915
->gpu_error
.lock
);
1862 i915_gpu_state_put(error
);