2 * Copyright (c) 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
25 * Keith Packard <keithp@keithp.com>
26 * Mika Kuoppala <mika.kuoppala@intel.com>
30 #include <generated/utsrelease.h>
31 #include <linux/stop_machine.h>
32 #include <linux/zlib.h>
35 static const char *engine_str(int engine
)
38 case RCS
: return "render";
39 case VCS
: return "bsd";
40 case BCS
: return "blt";
41 case VECS
: return "vebox";
42 case VCS2
: return "bsd2";
47 static const char *tiling_flag(int tiling
)
51 case I915_TILING_NONE
: return "";
52 case I915_TILING_X
: return " X";
53 case I915_TILING_Y
: return " Y";
57 static const char *dirty_flag(int dirty
)
59 return dirty
? " dirty" : "";
62 static const char *purgeable_flag(int purgeable
)
64 return purgeable
? " purgeable" : "";
67 static bool __i915_error_ok(struct drm_i915_error_state_buf
*e
)
70 if (!e
->err
&& WARN(e
->bytes
> (e
->size
- 1), "overflow")) {
75 if (e
->bytes
== e
->size
- 1 || e
->err
)
81 static bool __i915_error_seek(struct drm_i915_error_state_buf
*e
,
84 if (e
->pos
+ len
<= e
->start
) {
89 /* First vsnprintf needs to fit in its entirety for memmove */
98 static void __i915_error_advance(struct drm_i915_error_state_buf
*e
,
101 /* If this is first printf in this window, adjust it so that
102 * start position matches start of the buffer
105 if (e
->pos
< e
->start
) {
106 const size_t off
= e
->start
- e
->pos
;
108 /* Should not happen but be paranoid */
109 if (off
> len
|| e
->bytes
) {
114 memmove(e
->buf
, e
->buf
+ off
, len
- off
);
115 e
->bytes
= len
- off
;
125 static void i915_error_vprintf(struct drm_i915_error_state_buf
*e
,
126 const char *f
, va_list args
)
130 if (!__i915_error_ok(e
))
133 /* Seek the first printf which is hits start position */
134 if (e
->pos
< e
->start
) {
138 len
= vsnprintf(NULL
, 0, f
, tmp
);
141 if (!__i915_error_seek(e
, len
))
145 len
= vsnprintf(e
->buf
+ e
->bytes
, e
->size
- e
->bytes
, f
, args
);
146 if (len
>= e
->size
- e
->bytes
)
147 len
= e
->size
- e
->bytes
- 1;
149 __i915_error_advance(e
, len
);
152 static void i915_error_puts(struct drm_i915_error_state_buf
*e
,
157 if (!__i915_error_ok(e
))
162 /* Seek the first printf which is hits start position */
163 if (e
->pos
< e
->start
) {
164 if (!__i915_error_seek(e
, len
))
168 if (len
>= e
->size
- e
->bytes
)
169 len
= e
->size
- e
->bytes
- 1;
170 memcpy(e
->buf
+ e
->bytes
, str
, len
);
172 __i915_error_advance(e
, len
);
175 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
176 #define err_puts(e, s) i915_error_puts(e, s)
178 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
181 struct z_stream_s zstream
;
185 static bool compress_init(struct compress
*c
)
187 struct z_stream_s
*zstream
= memset(&c
->zstream
, 0, sizeof(c
->zstream
));
190 kmalloc(zlib_deflate_workspacesize(MAX_WBITS
, MAX_MEM_LEVEL
),
191 GFP_ATOMIC
| __GFP_NOWARN
);
192 if (!zstream
->workspace
)
195 if (zlib_deflateInit(zstream
, Z_DEFAULT_COMPRESSION
) != Z_OK
) {
196 kfree(zstream
->workspace
);
201 if (i915_has_memcpy_from_wc())
202 c
->tmp
= (void *)__get_free_page(GFP_ATOMIC
| __GFP_NOWARN
);
207 static int compress_page(struct compress
*c
,
209 struct drm_i915_error_object
*dst
)
211 struct z_stream_s
*zstream
= &c
->zstream
;
213 zstream
->next_in
= src
;
214 if (c
->tmp
&& i915_memcpy_from_wc(c
->tmp
, src
, PAGE_SIZE
))
215 zstream
->next_in
= c
->tmp
;
216 zstream
->avail_in
= PAGE_SIZE
;
219 if (zstream
->avail_out
== 0) {
222 page
= __get_free_page(GFP_ATOMIC
| __GFP_NOWARN
);
226 dst
->pages
[dst
->page_count
++] = (void *)page
;
228 zstream
->next_out
= (void *)page
;
229 zstream
->avail_out
= PAGE_SIZE
;
232 if (zlib_deflate(zstream
, Z_SYNC_FLUSH
) != Z_OK
)
234 } while (zstream
->avail_in
);
236 /* Fallback to uncompressed if we increase size? */
237 if (0 && zstream
->total_out
> zstream
->total_in
)
243 static void compress_fini(struct compress
*c
,
244 struct drm_i915_error_object
*dst
)
246 struct z_stream_s
*zstream
= &c
->zstream
;
249 zlib_deflate(zstream
, Z_FINISH
);
250 dst
->unused
= zstream
->avail_out
;
253 zlib_deflateEnd(zstream
);
254 kfree(zstream
->workspace
);
257 free_page((unsigned long)c
->tmp
);
260 static void err_compression_marker(struct drm_i915_error_state_buf
*m
)
270 static bool compress_init(struct compress
*c
)
275 static int compress_page(struct compress
*c
,
277 struct drm_i915_error_object
*dst
)
282 page
= __get_free_page(GFP_ATOMIC
| __GFP_NOWARN
);
287 if (!i915_memcpy_from_wc(ptr
, src
, PAGE_SIZE
))
288 memcpy(ptr
, src
, PAGE_SIZE
);
289 dst
->pages
[dst
->page_count
++] = ptr
;
294 static void compress_fini(struct compress
*c
,
295 struct drm_i915_error_object
*dst
)
299 static void err_compression_marker(struct drm_i915_error_state_buf
*m
)
306 static void print_error_buffers(struct drm_i915_error_state_buf
*m
,
308 struct drm_i915_error_buffer
*err
,
313 err_printf(m
, "%s [%d]:\n", name
, count
);
316 err_printf(m
, " %08x_%08x %8u %02x %02x [ ",
317 upper_32_bits(err
->gtt_offset
),
318 lower_32_bits(err
->gtt_offset
),
322 for (i
= 0; i
< I915_NUM_ENGINES
; i
++)
323 err_printf(m
, "%02x ", err
->rseqno
[i
]);
325 err_printf(m
, "] %02x", err
->wseqno
);
326 err_puts(m
, tiling_flag(err
->tiling
));
327 err_puts(m
, dirty_flag(err
->dirty
));
328 err_puts(m
, purgeable_flag(err
->purgeable
));
329 err_puts(m
, err
->userptr
? " userptr" : "");
330 err_puts(m
, err
->engine
!= -1 ? " " : "");
331 err_puts(m
, engine_str(err
->engine
));
332 err_puts(m
, i915_cache_level_str(m
->i915
, err
->cache_level
));
335 err_printf(m
, " (name: %d)", err
->name
);
336 if (err
->fence_reg
!= I915_FENCE_REG_NONE
)
337 err_printf(m
, " (fence: %d)", err
->fence_reg
);
344 static void error_print_instdone(struct drm_i915_error_state_buf
*m
,
345 struct drm_i915_error_engine
*ee
)
350 err_printf(m
, " INSTDONE: 0x%08x\n",
351 ee
->instdone
.instdone
);
353 if (ee
->engine_id
!= RCS
|| INTEL_GEN(m
->i915
) <= 3)
356 err_printf(m
, " SC_INSTDONE: 0x%08x\n",
357 ee
->instdone
.slice_common
);
359 if (INTEL_GEN(m
->i915
) <= 6)
362 for_each_instdone_slice_subslice(m
->i915
, slice
, subslice
)
363 err_printf(m
, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
365 ee
->instdone
.sampler
[slice
][subslice
]);
367 for_each_instdone_slice_subslice(m
->i915
, slice
, subslice
)
368 err_printf(m
, " ROW_INSTDONE[%d][%d]: 0x%08x\n",
370 ee
->instdone
.row
[slice
][subslice
]);
373 static void error_print_request(struct drm_i915_error_state_buf
*m
,
375 struct drm_i915_error_request
*erq
)
380 err_printf(m
, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
381 prefix
, erq
->pid
, erq
->ban_score
,
382 erq
->context
, erq
->seqno
,
383 jiffies_to_msecs(jiffies
- erq
->jiffies
),
384 erq
->head
, erq
->tail
);
387 static void error_print_engine(struct drm_i915_error_state_buf
*m
,
388 struct drm_i915_error_engine
*ee
)
390 err_printf(m
, "%s command stream:\n", engine_str(ee
->engine_id
));
391 err_printf(m
, " START: 0x%08x\n", ee
->start
);
392 err_printf(m
, " HEAD: 0x%08x [0x%08x]\n", ee
->head
, ee
->rq_head
);
393 err_printf(m
, " TAIL: 0x%08x [0x%08x, 0x%08x]\n",
394 ee
->tail
, ee
->rq_post
, ee
->rq_tail
);
395 err_printf(m
, " CTL: 0x%08x\n", ee
->ctl
);
396 err_printf(m
, " MODE: 0x%08x\n", ee
->mode
);
397 err_printf(m
, " HWS: 0x%08x\n", ee
->hws
);
398 err_printf(m
, " ACTHD: 0x%08x %08x\n",
399 (u32
)(ee
->acthd
>>32), (u32
)ee
->acthd
);
400 err_printf(m
, " IPEIR: 0x%08x\n", ee
->ipeir
);
401 err_printf(m
, " IPEHR: 0x%08x\n", ee
->ipehr
);
403 error_print_instdone(m
, ee
);
405 if (ee
->batchbuffer
) {
406 u64 start
= ee
->batchbuffer
->gtt_offset
;
407 u64 end
= start
+ ee
->batchbuffer
->gtt_size
;
409 err_printf(m
, " batch: [0x%08x_%08x, 0x%08x_%08x]\n",
410 upper_32_bits(start
), lower_32_bits(start
),
411 upper_32_bits(end
), lower_32_bits(end
));
413 if (INTEL_GEN(m
->i915
) >= 4) {
414 err_printf(m
, " BBADDR: 0x%08x_%08x\n",
415 (u32
)(ee
->bbaddr
>>32), (u32
)ee
->bbaddr
);
416 err_printf(m
, " BB_STATE: 0x%08x\n", ee
->bbstate
);
417 err_printf(m
, " INSTPS: 0x%08x\n", ee
->instps
);
419 err_printf(m
, " INSTPM: 0x%08x\n", ee
->instpm
);
420 err_printf(m
, " FADDR: 0x%08x %08x\n", upper_32_bits(ee
->faddr
),
421 lower_32_bits(ee
->faddr
));
422 if (INTEL_GEN(m
->i915
) >= 6) {
423 err_printf(m
, " RC PSMI: 0x%08x\n", ee
->rc_psmi
);
424 err_printf(m
, " FAULT_REG: 0x%08x\n", ee
->fault_reg
);
425 err_printf(m
, " SYNC_0: 0x%08x\n",
426 ee
->semaphore_mboxes
[0]);
427 err_printf(m
, " SYNC_1: 0x%08x\n",
428 ee
->semaphore_mboxes
[1]);
429 if (HAS_VEBOX(m
->i915
))
430 err_printf(m
, " SYNC_2: 0x%08x\n",
431 ee
->semaphore_mboxes
[2]);
433 if (USES_PPGTT(m
->i915
)) {
434 err_printf(m
, " GFX_MODE: 0x%08x\n", ee
->vm_info
.gfx_mode
);
436 if (INTEL_GEN(m
->i915
) >= 8) {
438 for (i
= 0; i
< 4; i
++)
439 err_printf(m
, " PDP%d: 0x%016llx\n",
440 i
, ee
->vm_info
.pdp
[i
]);
442 err_printf(m
, " PP_DIR_BASE: 0x%08x\n",
443 ee
->vm_info
.pp_dir_base
);
446 err_printf(m
, " seqno: 0x%08x\n", ee
->seqno
);
447 err_printf(m
, " last_seqno: 0x%08x\n", ee
->last_seqno
);
448 err_printf(m
, " waiting: %s\n", yesno(ee
->waiting
));
449 err_printf(m
, " ring->head: 0x%08x\n", ee
->cpu_ring_head
);
450 err_printf(m
, " ring->tail: 0x%08x\n", ee
->cpu_ring_tail
);
451 err_printf(m
, " hangcheck stall: %s\n", yesno(ee
->hangcheck_stalled
));
452 err_printf(m
, " hangcheck action: %s\n",
453 hangcheck_action_to_str(ee
->hangcheck_action
));
454 err_printf(m
, " hangcheck action timestamp: %lu, %u ms ago\n",
455 ee
->hangcheck_timestamp
,
456 jiffies_to_msecs(jiffies
- ee
->hangcheck_timestamp
));
458 error_print_request(m
, " ELSP[0]: ", &ee
->execlist
[0]);
459 error_print_request(m
, " ELSP[1]: ", &ee
->execlist
[1]);
462 void i915_error_printf(struct drm_i915_error_state_buf
*e
, const char *f
, ...)
467 i915_error_vprintf(e
, f
, args
);
472 ascii85_encode_len(int len
)
474 return DIV_ROUND_UP(len
, 4);
478 ascii85_encode(u32 in
, char *out
)
487 out
[i
] = '!' + in
% 85;
494 static void print_error_obj(struct drm_i915_error_state_buf
*m
,
495 struct intel_engine_cs
*engine
,
497 struct drm_i915_error_object
*obj
)
506 err_printf(m
, "%s --- %s = 0x%08x %08x\n",
507 engine
? engine
->name
: "global", name
,
508 upper_32_bits(obj
->gtt_offset
),
509 lower_32_bits(obj
->gtt_offset
));
512 err_compression_marker(m
);
513 for (page
= 0; page
< obj
->page_count
; page
++) {
517 if (page
== obj
->page_count
- 1)
519 len
= ascii85_encode_len(len
);
521 for (i
= 0; i
< len
; i
++) {
522 if (ascii85_encode(obj
->pages
[page
][i
], out
))
531 static void err_print_capabilities(struct drm_i915_error_state_buf
*m
,
532 const struct intel_device_info
*info
)
534 #define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x))
535 DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG
);
539 int i915_error_state_to_str(struct drm_i915_error_state_buf
*m
,
540 const struct i915_error_state_file_priv
*error_priv
)
542 struct drm_i915_private
*dev_priv
= error_priv
->i915
;
543 struct pci_dev
*pdev
= dev_priv
->drm
.pdev
;
544 struct drm_i915_error_state
*error
= error_priv
->error
;
545 struct drm_i915_error_object
*obj
;
549 err_printf(m
, "no error state collected\n");
553 err_printf(m
, "%s\n", error
->error_msg
);
554 err_printf(m
, "Kernel: " UTS_RELEASE
"\n");
555 err_printf(m
, "Time: %ld s %ld us\n",
556 error
->time
.tv_sec
, error
->time
.tv_usec
);
557 err_printf(m
, "Boottime: %ld s %ld us\n",
558 error
->boottime
.tv_sec
, error
->boottime
.tv_usec
);
559 err_printf(m
, "Uptime: %ld s %ld us\n",
560 error
->uptime
.tv_sec
, error
->uptime
.tv_usec
);
561 err_print_capabilities(m
, &error
->device_info
);
563 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
564 if (error
->engine
[i
].hangcheck_stalled
&&
565 error
->engine
[i
].pid
!= -1) {
566 err_printf(m
, "Active process (on ring %s): %s [%d], context bans %d\n",
568 error
->engine
[i
].comm
,
569 error
->engine
[i
].pid
,
570 error
->engine
[i
].context_bans
);
573 err_printf(m
, "Reset count: %u\n", error
->reset_count
);
574 err_printf(m
, "Suspend count: %u\n", error
->suspend_count
);
575 err_printf(m
, "Platform: %s\n", intel_platform_name(error
->device_info
.platform
));
576 err_printf(m
, "PCI ID: 0x%04x\n", pdev
->device
);
577 err_printf(m
, "PCI Revision: 0x%02x\n", pdev
->revision
);
578 err_printf(m
, "PCI Subsystem: %04x:%04x\n",
579 pdev
->subsystem_vendor
,
580 pdev
->subsystem_device
);
581 err_printf(m
, "IOMMU enabled?: %d\n", error
->iommu
);
583 if (HAS_CSR(dev_priv
)) {
584 struct intel_csr
*csr
= &dev_priv
->csr
;
586 err_printf(m
, "DMC loaded: %s\n",
587 yesno(csr
->dmc_payload
!= NULL
));
588 err_printf(m
, "DMC fw version: %d.%d\n",
589 CSR_VERSION_MAJOR(csr
->version
),
590 CSR_VERSION_MINOR(csr
->version
));
593 err_printf(m
, "EIR: 0x%08x\n", error
->eir
);
594 err_printf(m
, "IER: 0x%08x\n", error
->ier
);
595 if (INTEL_GEN(dev_priv
) >= 8) {
596 for (i
= 0; i
< 4; i
++)
597 err_printf(m
, "GTIER gt %d: 0x%08x\n", i
,
599 } else if (HAS_PCH_SPLIT(dev_priv
) || IS_VALLEYVIEW(dev_priv
))
600 err_printf(m
, "GTIER: 0x%08x\n", error
->gtier
[0]);
601 err_printf(m
, "PGTBL_ER: 0x%08x\n", error
->pgtbl_er
);
602 err_printf(m
, "FORCEWAKE: 0x%08x\n", error
->forcewake
);
603 err_printf(m
, "DERRMR: 0x%08x\n", error
->derrmr
);
604 err_printf(m
, "CCID: 0x%08x\n", error
->ccid
);
605 err_printf(m
, "Missed interrupts: 0x%08lx\n", dev_priv
->gpu_error
.missed_irq_rings
);
607 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++)
608 err_printf(m
, " fence[%d] = %08llx\n", i
, error
->fence
[i
]);
610 if (INTEL_GEN(dev_priv
) >= 6) {
611 err_printf(m
, "ERROR: 0x%08x\n", error
->error
);
613 if (INTEL_GEN(dev_priv
) >= 8)
614 err_printf(m
, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
615 error
->fault_data1
, error
->fault_data0
);
617 err_printf(m
, "DONE_REG: 0x%08x\n", error
->done_reg
);
620 if (IS_GEN7(dev_priv
))
621 err_printf(m
, "ERR_INT: 0x%08x\n", error
->err_int
);
623 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
624 if (error
->engine
[i
].engine_id
!= -1)
625 error_print_engine(m
, &error
->engine
[i
]);
628 for (i
= 0; i
< ARRAY_SIZE(error
->active_vm
); i
++) {
632 if (!error
->active_vm
[i
])
635 len
= scnprintf(buf
, sizeof(buf
), "Active (");
636 for (j
= 0; j
< ARRAY_SIZE(error
->engine
); j
++) {
637 if (error
->engine
[j
].vm
!= error
->active_vm
[i
])
640 len
+= scnprintf(buf
+ len
, sizeof(buf
), "%s%s",
642 dev_priv
->engine
[j
]->name
);
645 scnprintf(buf
+ len
, sizeof(buf
), ")");
646 print_error_buffers(m
, buf
,
648 error
->active_bo_count
[i
]);
651 print_error_buffers(m
, "Pinned (global)",
653 error
->pinned_bo_count
);
655 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
656 struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
658 obj
= ee
->batchbuffer
;
660 err_puts(m
, dev_priv
->engine
[i
]->name
);
662 err_printf(m
, " (submitted by %s [%d], bans %d)",
666 err_printf(m
, " --- gtt_offset = 0x%08x %08x\n",
667 upper_32_bits(obj
->gtt_offset
),
668 lower_32_bits(obj
->gtt_offset
));
669 print_error_obj(m
, dev_priv
->engine
[i
], NULL
, obj
);
672 if (ee
->num_requests
) {
673 err_printf(m
, "%s --- %d requests\n",
674 dev_priv
->engine
[i
]->name
,
676 for (j
= 0; j
< ee
->num_requests
; j
++)
677 error_print_request(m
, " ", &ee
->requests
[j
]);
680 if (IS_ERR(ee
->waiters
)) {
681 err_printf(m
, "%s --- ? waiters [unable to acquire spinlock]\n",
682 dev_priv
->engine
[i
]->name
);
683 } else if (ee
->num_waiters
) {
684 err_printf(m
, "%s --- %d waiters\n",
685 dev_priv
->engine
[i
]->name
,
687 for (j
= 0; j
< ee
->num_waiters
; j
++) {
688 err_printf(m
, " seqno 0x%08x for %s [%d]\n",
689 ee
->waiters
[j
].seqno
,
695 print_error_obj(m
, dev_priv
->engine
[i
],
696 "ringbuffer", ee
->ringbuffer
);
698 print_error_obj(m
, dev_priv
->engine
[i
],
699 "HW Status", ee
->hws_page
);
701 print_error_obj(m
, dev_priv
->engine
[i
],
702 "HW context", ee
->ctx
);
704 print_error_obj(m
, dev_priv
->engine
[i
],
705 "WA context", ee
->wa_ctx
);
707 print_error_obj(m
, dev_priv
->engine
[i
],
708 "WA batchbuffer", ee
->wa_batchbuffer
);
711 print_error_obj(m
, NULL
, "Semaphores", error
->semaphore
);
713 print_error_obj(m
, NULL
, "GuC log buffer", error
->guc_log
);
716 intel_overlay_print_error_state(m
, error
->overlay
);
719 intel_display_print_error_state(m
, dev_priv
, error
->display
);
722 if (m
->bytes
== 0 && m
->err
)
728 int i915_error_state_buf_init(struct drm_i915_error_state_buf
*ebuf
,
729 struct drm_i915_private
*i915
,
730 size_t count
, loff_t pos
)
732 memset(ebuf
, 0, sizeof(*ebuf
));
735 /* We need to have enough room to store any i915_error_state printf
736 * so that we can move it to start position.
738 ebuf
->size
= count
+ 1 > PAGE_SIZE
? count
+ 1 : PAGE_SIZE
;
739 ebuf
->buf
= kmalloc(ebuf
->size
,
740 GFP_TEMPORARY
| __GFP_NORETRY
| __GFP_NOWARN
);
742 if (ebuf
->buf
== NULL
) {
743 ebuf
->size
= PAGE_SIZE
;
744 ebuf
->buf
= kmalloc(ebuf
->size
, GFP_TEMPORARY
);
747 if (ebuf
->buf
== NULL
) {
749 ebuf
->buf
= kmalloc(ebuf
->size
, GFP_TEMPORARY
);
752 if (ebuf
->buf
== NULL
)
760 static void i915_error_object_free(struct drm_i915_error_object
*obj
)
767 for (page
= 0; page
< obj
->page_count
; page
++)
768 free_page((unsigned long)obj
->pages
[page
]);
773 static void i915_error_state_free(struct kref
*error_ref
)
775 struct drm_i915_error_state
*error
= container_of(error_ref
,
776 typeof(*error
), ref
);
779 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
780 struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
782 i915_error_object_free(ee
->batchbuffer
);
783 i915_error_object_free(ee
->wa_batchbuffer
);
784 i915_error_object_free(ee
->ringbuffer
);
785 i915_error_object_free(ee
->hws_page
);
786 i915_error_object_free(ee
->ctx
);
787 i915_error_object_free(ee
->wa_ctx
);
790 if (!IS_ERR_OR_NULL(ee
->waiters
))
794 i915_error_object_free(error
->semaphore
);
795 i915_error_object_free(error
->guc_log
);
797 for (i
= 0; i
< ARRAY_SIZE(error
->active_bo
); i
++)
798 kfree(error
->active_bo
[i
]);
799 kfree(error
->pinned_bo
);
801 kfree(error
->overlay
);
802 kfree(error
->display
);
806 static struct drm_i915_error_object
*
807 i915_error_object_create(struct drm_i915_private
*i915
,
808 struct i915_vma
*vma
)
810 struct i915_ggtt
*ggtt
= &i915
->ggtt
;
811 const u64 slot
= ggtt
->error_capture
.start
;
812 struct drm_i915_error_object
*dst
;
813 struct compress compress
;
814 unsigned long num_pages
;
815 struct sgt_iter iter
;
821 num_pages
= min_t(u64
, vma
->size
, vma
->obj
->base
.size
) >> PAGE_SHIFT
;
822 num_pages
= DIV_ROUND_UP(10 * num_pages
, 8); /* worstcase zlib growth */
823 dst
= kmalloc(sizeof(*dst
) + num_pages
* sizeof(u32
*),
824 GFP_ATOMIC
| __GFP_NOWARN
);
828 dst
->gtt_offset
= vma
->node
.start
;
829 dst
->gtt_size
= vma
->node
.size
;
833 if (!compress_init(&compress
)) {
838 for_each_sgt_dma(dma
, iter
, vma
->pages
) {
842 ggtt
->base
.insert_page(&ggtt
->base
, dma
, slot
,
845 s
= io_mapping_map_atomic_wc(&ggtt
->mappable
, slot
);
846 ret
= compress_page(&compress
, (void __force
*)s
, dst
);
847 io_mapping_unmap_atomic(s
);
855 while (dst
->page_count
--)
856 free_page((unsigned long)dst
->pages
[dst
->page_count
]);
861 compress_fini(&compress
, dst
);
862 ggtt
->base
.clear_range(&ggtt
->base
, slot
, PAGE_SIZE
);
866 /* The error capture is special as tries to run underneath the normal
867 * locking rules - so we use the raw version of the i915_gem_active lookup.
869 static inline uint32_t
870 __active_get_seqno(struct i915_gem_active
*active
)
872 struct drm_i915_gem_request
*request
;
874 request
= __i915_gem_active_peek(active
);
875 return request
? request
->global_seqno
: 0;
879 __active_get_engine_id(struct i915_gem_active
*active
)
881 struct drm_i915_gem_request
*request
;
883 request
= __i915_gem_active_peek(active
);
884 return request
? request
->engine
->id
: -1;
887 static void capture_bo(struct drm_i915_error_buffer
*err
,
888 struct i915_vma
*vma
)
890 struct drm_i915_gem_object
*obj
= vma
->obj
;
893 err
->size
= obj
->base
.size
;
894 err
->name
= obj
->base
.name
;
896 for (i
= 0; i
< I915_NUM_ENGINES
; i
++)
897 err
->rseqno
[i
] = __active_get_seqno(&vma
->last_read
[i
]);
898 err
->wseqno
= __active_get_seqno(&obj
->frontbuffer_write
);
899 err
->engine
= __active_get_engine_id(&obj
->frontbuffer_write
);
901 err
->gtt_offset
= vma
->node
.start
;
902 err
->read_domains
= obj
->base
.read_domains
;
903 err
->write_domain
= obj
->base
.write_domain
;
904 err
->fence_reg
= vma
->fence
? vma
->fence
->id
: -1;
905 err
->tiling
= i915_gem_object_get_tiling(obj
);
906 err
->dirty
= obj
->mm
.dirty
;
907 err
->purgeable
= obj
->mm
.madv
!= I915_MADV_WILLNEED
;
908 err
->userptr
= obj
->userptr
.mm
!= NULL
;
909 err
->cache_level
= obj
->cache_level
;
912 static u32
capture_error_bo(struct drm_i915_error_buffer
*err
,
913 int count
, struct list_head
*head
,
916 struct i915_vma
*vma
;
919 list_for_each_entry(vma
, head
, vm_link
) {
920 if (pinned_only
&& !i915_vma_is_pinned(vma
))
923 capture_bo(err
++, vma
);
931 /* Generate a semi-unique error code. The code is not meant to have meaning, The
932 * code's only purpose is to try to prevent false duplicated bug reports by
933 * grossly estimating a GPU error state.
935 * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
936 * the hang if we could strip the GTT offset information from it.
938 * It's only a small step better than a random number in its current form.
940 static uint32_t i915_error_generate_code(struct drm_i915_private
*dev_priv
,
941 struct drm_i915_error_state
*error
,
944 uint32_t error_code
= 0;
947 /* IPEHR would be an ideal way to detect errors, as it's the gross
948 * measure of "the command that hung." However, has some very common
949 * synchronization commands which almost always appear in the case
950 * strictly a client bug. Use instdone to differentiate those some.
952 for (i
= 0; i
< I915_NUM_ENGINES
; i
++) {
953 if (error
->engine
[i
].hangcheck_stalled
) {
957 return error
->engine
[i
].ipehr
^
958 error
->engine
[i
].instdone
.instdone
;
965 static void i915_gem_record_fences(struct drm_i915_private
*dev_priv
,
966 struct drm_i915_error_state
*error
)
970 if (IS_GEN3(dev_priv
) || IS_GEN2(dev_priv
)) {
971 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++)
972 error
->fence
[i
] = I915_READ(FENCE_REG(i
));
973 } else if (IS_GEN5(dev_priv
) || IS_GEN4(dev_priv
)) {
974 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++)
975 error
->fence
[i
] = I915_READ64(FENCE_REG_965_LO(i
));
976 } else if (INTEL_GEN(dev_priv
) >= 6) {
977 for (i
= 0; i
< dev_priv
->num_fence_regs
; i
++)
978 error
->fence
[i
] = I915_READ64(FENCE_REG_GEN6_LO(i
));
983 gen8_engine_sync_index(struct intel_engine_cs
*engine
,
984 struct intel_engine_cs
*other
)
989 * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
990 * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
991 * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
992 * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
993 * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
996 idx
= (other
- engine
) - 1;
998 idx
+= I915_NUM_ENGINES
;
1003 static void gen8_record_semaphore_state(struct drm_i915_error_state
*error
,
1004 struct intel_engine_cs
*engine
,
1005 struct drm_i915_error_engine
*ee
)
1007 struct drm_i915_private
*dev_priv
= engine
->i915
;
1008 struct intel_engine_cs
*to
;
1009 enum intel_engine_id id
;
1011 if (!error
->semaphore
)
1014 for_each_engine(to
, dev_priv
, id
) {
1023 (GEN8_SIGNAL_OFFSET(engine
, id
) & (PAGE_SIZE
- 1)) / 4;
1024 tmp
= error
->semaphore
->pages
[0];
1025 idx
= gen8_engine_sync_index(engine
, to
);
1027 ee
->semaphore_mboxes
[idx
] = tmp
[signal_offset
];
1031 static void gen6_record_semaphore_state(struct intel_engine_cs
*engine
,
1032 struct drm_i915_error_engine
*ee
)
1034 struct drm_i915_private
*dev_priv
= engine
->i915
;
1036 ee
->semaphore_mboxes
[0] = I915_READ(RING_SYNC_0(engine
->mmio_base
));
1037 ee
->semaphore_mboxes
[1] = I915_READ(RING_SYNC_1(engine
->mmio_base
));
1038 if (HAS_VEBOX(dev_priv
))
1039 ee
->semaphore_mboxes
[2] =
1040 I915_READ(RING_SYNC_2(engine
->mmio_base
));
1043 static void error_record_engine_waiters(struct intel_engine_cs
*engine
,
1044 struct drm_i915_error_engine
*ee
)
1046 struct intel_breadcrumbs
*b
= &engine
->breadcrumbs
;
1047 struct drm_i915_error_waiter
*waiter
;
1051 ee
->num_waiters
= 0;
1054 if (RB_EMPTY_ROOT(&b
->waiters
))
1057 if (!spin_trylock_irq(&b
->lock
)) {
1058 ee
->waiters
= ERR_PTR(-EDEADLK
);
1063 for (rb
= rb_first(&b
->waiters
); rb
!= NULL
; rb
= rb_next(rb
))
1065 spin_unlock_irq(&b
->lock
);
1069 waiter
= kmalloc_array(count
,
1070 sizeof(struct drm_i915_error_waiter
),
1075 if (!spin_trylock_irq(&b
->lock
)) {
1077 ee
->waiters
= ERR_PTR(-EDEADLK
);
1081 ee
->waiters
= waiter
;
1082 for (rb
= rb_first(&b
->waiters
); rb
; rb
= rb_next(rb
)) {
1083 struct intel_wait
*w
= container_of(rb
, typeof(*w
), node
);
1085 strcpy(waiter
->comm
, w
->tsk
->comm
);
1086 waiter
->pid
= w
->tsk
->pid
;
1087 waiter
->seqno
= w
->seqno
;
1090 if (++ee
->num_waiters
== count
)
1093 spin_unlock_irq(&b
->lock
);
1096 static void error_record_engine_registers(struct drm_i915_error_state
*error
,
1097 struct intel_engine_cs
*engine
,
1098 struct drm_i915_error_engine
*ee
)
1100 struct drm_i915_private
*dev_priv
= engine
->i915
;
1102 if (INTEL_GEN(dev_priv
) >= 6) {
1103 ee
->rc_psmi
= I915_READ(RING_PSMI_CTL(engine
->mmio_base
));
1104 ee
->fault_reg
= I915_READ(RING_FAULT_REG(engine
));
1105 if (INTEL_GEN(dev_priv
) >= 8)
1106 gen8_record_semaphore_state(error
, engine
, ee
);
1108 gen6_record_semaphore_state(engine
, ee
);
1111 if (INTEL_GEN(dev_priv
) >= 4) {
1112 ee
->faddr
= I915_READ(RING_DMA_FADD(engine
->mmio_base
));
1113 ee
->ipeir
= I915_READ(RING_IPEIR(engine
->mmio_base
));
1114 ee
->ipehr
= I915_READ(RING_IPEHR(engine
->mmio_base
));
1115 ee
->instps
= I915_READ(RING_INSTPS(engine
->mmio_base
));
1116 ee
->bbaddr
= I915_READ(RING_BBADDR(engine
->mmio_base
));
1117 if (INTEL_GEN(dev_priv
) >= 8) {
1118 ee
->faddr
|= (u64
) I915_READ(RING_DMA_FADD_UDW(engine
->mmio_base
)) << 32;
1119 ee
->bbaddr
|= (u64
) I915_READ(RING_BBADDR_UDW(engine
->mmio_base
)) << 32;
1121 ee
->bbstate
= I915_READ(RING_BBSTATE(engine
->mmio_base
));
1123 ee
->faddr
= I915_READ(DMA_FADD_I8XX
);
1124 ee
->ipeir
= I915_READ(IPEIR
);
1125 ee
->ipehr
= I915_READ(IPEHR
);
1128 intel_engine_get_instdone(engine
, &ee
->instdone
);
1130 ee
->waiting
= intel_engine_has_waiter(engine
);
1131 ee
->instpm
= I915_READ(RING_INSTPM(engine
->mmio_base
));
1132 ee
->acthd
= intel_engine_get_active_head(engine
);
1133 ee
->seqno
= intel_engine_get_seqno(engine
);
1134 ee
->last_seqno
= intel_engine_last_submit(engine
);
1135 ee
->start
= I915_READ_START(engine
);
1136 ee
->head
= I915_READ_HEAD(engine
);
1137 ee
->tail
= I915_READ_TAIL(engine
);
1138 ee
->ctl
= I915_READ_CTL(engine
);
1139 if (INTEL_GEN(dev_priv
) > 2)
1140 ee
->mode
= I915_READ_MODE(engine
);
1142 if (!HWS_NEEDS_PHYSICAL(dev_priv
)) {
1145 if (IS_GEN7(dev_priv
)) {
1146 switch (engine
->id
) {
1149 mmio
= RENDER_HWS_PGA_GEN7
;
1152 mmio
= BLT_HWS_PGA_GEN7
;
1155 mmio
= BSD_HWS_PGA_GEN7
;
1158 mmio
= VEBOX_HWS_PGA_GEN7
;
1161 } else if (IS_GEN6(engine
->i915
)) {
1162 mmio
= RING_HWS_PGA_GEN6(engine
->mmio_base
);
1164 /* XXX: gen8 returns to sanity */
1165 mmio
= RING_HWS_PGA(engine
->mmio_base
);
1168 ee
->hws
= I915_READ(mmio
);
1171 ee
->hangcheck_timestamp
= engine
->hangcheck
.action_timestamp
;
1172 ee
->hangcheck_action
= engine
->hangcheck
.action
;
1173 ee
->hangcheck_stalled
= engine
->hangcheck
.stalled
;
1175 if (USES_PPGTT(dev_priv
)) {
1178 ee
->vm_info
.gfx_mode
= I915_READ(RING_MODE_GEN7(engine
));
1180 if (IS_GEN6(dev_priv
))
1181 ee
->vm_info
.pp_dir_base
=
1182 I915_READ(RING_PP_DIR_BASE_READ(engine
));
1183 else if (IS_GEN7(dev_priv
))
1184 ee
->vm_info
.pp_dir_base
=
1185 I915_READ(RING_PP_DIR_BASE(engine
));
1186 else if (INTEL_GEN(dev_priv
) >= 8)
1187 for (i
= 0; i
< 4; i
++) {
1188 ee
->vm_info
.pdp
[i
] =
1189 I915_READ(GEN8_RING_PDP_UDW(engine
, i
));
1190 ee
->vm_info
.pdp
[i
] <<= 32;
1191 ee
->vm_info
.pdp
[i
] |=
1192 I915_READ(GEN8_RING_PDP_LDW(engine
, i
));
1197 static void record_request(struct drm_i915_gem_request
*request
,
1198 struct drm_i915_error_request
*erq
)
1200 erq
->context
= request
->ctx
->hw_id
;
1201 erq
->ban_score
= request
->ctx
->ban_score
;
1202 erq
->seqno
= request
->global_seqno
;
1203 erq
->jiffies
= request
->emitted_jiffies
;
1204 erq
->head
= request
->head
;
1205 erq
->tail
= request
->tail
;
1208 erq
->pid
= request
->ctx
->pid
? pid_nr(request
->ctx
->pid
) : 0;
1212 static void engine_record_requests(struct intel_engine_cs
*engine
,
1213 struct drm_i915_gem_request
*first
,
1214 struct drm_i915_error_engine
*ee
)
1216 struct drm_i915_gem_request
*request
;
1221 list_for_each_entry_from(request
, &engine
->timeline
->requests
, link
)
1226 ee
->requests
= kcalloc(count
, sizeof(*ee
->requests
), GFP_ATOMIC
);
1230 ee
->num_requests
= count
;
1234 list_for_each_entry_from(request
, &engine
->timeline
->requests
, link
) {
1235 if (count
>= ee
->num_requests
) {
1237 * If the ring request list was changed in
1238 * between the point where the error request
1239 * list was created and dimensioned and this
1240 * point then just exit early to avoid crashes.
1242 * We don't need to communicate that the
1243 * request list changed state during error
1244 * state capture and that the error state is
1245 * slightly incorrect as a consequence since we
1246 * are typically only interested in the request
1247 * list state at the point of error state
1248 * capture, not in any changes happening during
1254 record_request(request
, &ee
->requests
[count
++]);
1256 ee
->num_requests
= count
;
1259 static void error_record_engine_execlists(struct intel_engine_cs
*engine
,
1260 struct drm_i915_error_engine
*ee
)
1264 for (n
= 0; n
< ARRAY_SIZE(engine
->execlist_port
); n
++)
1265 if (engine
->execlist_port
[n
].request
)
1266 record_request(engine
->execlist_port
[n
].request
,
1270 static void i915_gem_record_rings(struct drm_i915_private
*dev_priv
,
1271 struct drm_i915_error_state
*error
)
1273 struct i915_ggtt
*ggtt
= &dev_priv
->ggtt
;
1277 i915_error_object_create(dev_priv
, dev_priv
->semaphore
);
1279 for (i
= 0; i
< I915_NUM_ENGINES
; i
++) {
1280 struct intel_engine_cs
*engine
= dev_priv
->engine
[i
];
1281 struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
1282 struct drm_i915_gem_request
*request
;
1292 error_record_engine_registers(error
, engine
, ee
);
1293 error_record_engine_waiters(engine
, ee
);
1294 error_record_engine_execlists(engine
, ee
);
1296 request
= i915_gem_find_active_request(engine
);
1298 struct intel_ring
*ring
;
1301 ee
->vm
= request
->ctx
->ppgtt
?
1302 &request
->ctx
->ppgtt
->base
: &ggtt
->base
;
1304 /* We need to copy these to an anonymous buffer
1305 * as the simplest method to avoid being overwritten
1309 i915_error_object_create(dev_priv
,
1312 if (HAS_BROKEN_CS_TLB(dev_priv
))
1313 ee
->wa_batchbuffer
=
1314 i915_error_object_create(dev_priv
,
1318 i915_error_object_create(dev_priv
,
1319 request
->ctx
->engine
[i
].state
);
1321 pid
= request
->ctx
->pid
;
1323 struct task_struct
*task
;
1326 task
= pid_task(pid
, PIDTYPE_PID
);
1328 strcpy(ee
->comm
, task
->comm
);
1329 ee
->pid
= task
->pid
;
1335 i915_gem_context_no_error_capture(request
->ctx
);
1337 ee
->rq_head
= request
->head
;
1338 ee
->rq_post
= request
->postfix
;
1339 ee
->rq_tail
= request
->tail
;
1341 ring
= request
->ring
;
1342 ee
->cpu_ring_head
= ring
->head
;
1343 ee
->cpu_ring_tail
= ring
->tail
;
1345 i915_error_object_create(dev_priv
, ring
->vma
);
1347 engine_record_requests(engine
, request
, ee
);
1351 i915_error_object_create(dev_priv
,
1352 engine
->status_page
.vma
);
1355 i915_error_object_create(dev_priv
, engine
->wa_ctx
.vma
);
1359 static void i915_gem_capture_vm(struct drm_i915_private
*dev_priv
,
1360 struct drm_i915_error_state
*error
,
1361 struct i915_address_space
*vm
,
1364 struct drm_i915_error_buffer
*active_bo
;
1365 struct i915_vma
*vma
;
1369 list_for_each_entry(vma
, &vm
->active_list
, vm_link
)
1374 active_bo
= kcalloc(count
, sizeof(*active_bo
), GFP_ATOMIC
);
1376 count
= capture_error_bo(active_bo
, count
, &vm
->active_list
, false);
1380 error
->active_vm
[idx
] = vm
;
1381 error
->active_bo
[idx
] = active_bo
;
1382 error
->active_bo_count
[idx
] = count
;
1385 static void i915_capture_active_buffers(struct drm_i915_private
*dev_priv
,
1386 struct drm_i915_error_state
*error
)
1390 BUILD_BUG_ON(ARRAY_SIZE(error
->engine
) > ARRAY_SIZE(error
->active_bo
));
1391 BUILD_BUG_ON(ARRAY_SIZE(error
->active_bo
) != ARRAY_SIZE(error
->active_vm
));
1392 BUILD_BUG_ON(ARRAY_SIZE(error
->active_bo
) != ARRAY_SIZE(error
->active_bo_count
));
1394 /* Scan each engine looking for unique active contexts/vm */
1395 for (i
= 0; i
< ARRAY_SIZE(error
->engine
); i
++) {
1396 struct drm_i915_error_engine
*ee
= &error
->engine
[i
];
1403 for (j
= 0; j
< i
&& !found
; j
++)
1404 found
= error
->engine
[j
].vm
== ee
->vm
;
1406 i915_gem_capture_vm(dev_priv
, error
, ee
->vm
, cnt
++);
1410 static void i915_capture_pinned_buffers(struct drm_i915_private
*dev_priv
,
1411 struct drm_i915_error_state
*error
)
1413 struct i915_address_space
*vm
= &dev_priv
->ggtt
.base
;
1414 struct drm_i915_error_buffer
*bo
;
1415 struct i915_vma
*vma
;
1416 int count_inactive
, count_active
;
1419 list_for_each_entry(vma
, &vm
->active_list
, vm_link
)
1423 list_for_each_entry(vma
, &vm
->inactive_list
, vm_link
)
1427 if (count_inactive
+ count_active
)
1428 bo
= kcalloc(count_inactive
+ count_active
,
1429 sizeof(*bo
), GFP_ATOMIC
);
1433 count_inactive
= capture_error_bo(bo
, count_inactive
,
1434 &vm
->active_list
, true);
1435 count_active
= capture_error_bo(bo
+ count_inactive
, count_active
,
1436 &vm
->inactive_list
, true);
1437 error
->pinned_bo_count
= count_inactive
+ count_active
;
1438 error
->pinned_bo
= bo
;
1441 static void i915_gem_capture_guc_log_buffer(struct drm_i915_private
*dev_priv
,
1442 struct drm_i915_error_state
*error
)
1444 /* Capturing log buf contents won't be useful if logging was disabled */
1445 if (!dev_priv
->guc
.log
.vma
|| (i915
.guc_log_level
< 0))
1448 error
->guc_log
= i915_error_object_create(dev_priv
,
1449 dev_priv
->guc
.log
.vma
);
1452 /* Capture all registers which don't fit into another category. */
1453 static void i915_capture_reg_state(struct drm_i915_private
*dev_priv
,
1454 struct drm_i915_error_state
*error
)
1458 /* General organization
1459 * 1. Registers specific to a single generation
1460 * 2. Registers which belong to multiple generations
1461 * 3. Feature specific registers.
1462 * 4. Everything else
1463 * Please try to follow the order.
1466 /* 1: Registers specific to a single generation */
1467 if (IS_VALLEYVIEW(dev_priv
)) {
1468 error
->gtier
[0] = I915_READ(GTIER
);
1469 error
->ier
= I915_READ(VLV_IER
);
1470 error
->forcewake
= I915_READ_FW(FORCEWAKE_VLV
);
1473 if (IS_GEN7(dev_priv
))
1474 error
->err_int
= I915_READ(GEN7_ERR_INT
);
1476 if (INTEL_GEN(dev_priv
) >= 8) {
1477 error
->fault_data0
= I915_READ(GEN8_FAULT_TLB_DATA0
);
1478 error
->fault_data1
= I915_READ(GEN8_FAULT_TLB_DATA1
);
1481 if (IS_GEN6(dev_priv
)) {
1482 error
->forcewake
= I915_READ_FW(FORCEWAKE
);
1483 error
->gab_ctl
= I915_READ(GAB_CTL
);
1484 error
->gfx_mode
= I915_READ(GFX_MODE
);
1487 /* 2: Registers which belong to multiple generations */
1488 if (INTEL_GEN(dev_priv
) >= 7)
1489 error
->forcewake
= I915_READ_FW(FORCEWAKE_MT
);
1491 if (INTEL_GEN(dev_priv
) >= 6) {
1492 error
->derrmr
= I915_READ(DERRMR
);
1493 error
->error
= I915_READ(ERROR_GEN6
);
1494 error
->done_reg
= I915_READ(DONE_REG
);
1497 /* 3: Feature specific registers */
1498 if (IS_GEN6(dev_priv
) || IS_GEN7(dev_priv
)) {
1499 error
->gam_ecochk
= I915_READ(GAM_ECOCHK
);
1500 error
->gac_eco
= I915_READ(GAC_ECO_BITS
);
1503 /* 4: Everything else */
1504 if (HAS_HW_CONTEXTS(dev_priv
))
1505 error
->ccid
= I915_READ(CCID
);
1507 if (INTEL_GEN(dev_priv
) >= 8) {
1508 error
->ier
= I915_READ(GEN8_DE_MISC_IER
);
1509 for (i
= 0; i
< 4; i
++)
1510 error
->gtier
[i
] = I915_READ(GEN8_GT_IER(i
));
1511 } else if (HAS_PCH_SPLIT(dev_priv
)) {
1512 error
->ier
= I915_READ(DEIER
);
1513 error
->gtier
[0] = I915_READ(GTIER
);
1514 } else if (IS_GEN2(dev_priv
)) {
1515 error
->ier
= I915_READ16(IER
);
1516 } else if (!IS_VALLEYVIEW(dev_priv
)) {
1517 error
->ier
= I915_READ(IER
);
1519 error
->eir
= I915_READ(EIR
);
1520 error
->pgtbl_er
= I915_READ(PGTBL_ER
);
1523 static void i915_error_capture_msg(struct drm_i915_private
*dev_priv
,
1524 struct drm_i915_error_state
*error
,
1526 const char *error_msg
)
1529 int engine_id
= -1, len
;
1531 ecode
= i915_error_generate_code(dev_priv
, error
, &engine_id
);
1533 len
= scnprintf(error
->error_msg
, sizeof(error
->error_msg
),
1534 "GPU HANG: ecode %d:%d:0x%08x",
1535 INTEL_GEN(dev_priv
), engine_id
, ecode
);
1537 if (engine_id
!= -1 && error
->engine
[engine_id
].pid
!= -1)
1538 len
+= scnprintf(error
->error_msg
+ len
,
1539 sizeof(error
->error_msg
) - len
,
1541 error
->engine
[engine_id
].comm
,
1542 error
->engine
[engine_id
].pid
);
1544 scnprintf(error
->error_msg
+ len
, sizeof(error
->error_msg
) - len
,
1545 ", reason: %s, action: %s",
1547 engine_mask
? "reset" : "continue");
1550 static void i915_capture_gen_state(struct drm_i915_private
*dev_priv
,
1551 struct drm_i915_error_state
*error
)
1554 #ifdef CONFIG_INTEL_IOMMU
1555 error
->iommu
= intel_iommu_gfx_mapped
;
1557 error
->reset_count
= i915_reset_count(&dev_priv
->gpu_error
);
1558 error
->suspend_count
= dev_priv
->suspend_count
;
1560 memcpy(&error
->device_info
,
1561 INTEL_INFO(dev_priv
),
1562 sizeof(error
->device_info
));
1565 static int capture(void *data
)
1567 struct drm_i915_error_state
*error
= data
;
1569 i915_capture_gen_state(error
->i915
, error
);
1570 i915_capture_reg_state(error
->i915
, error
);
1571 i915_gem_record_fences(error
->i915
, error
);
1572 i915_gem_record_rings(error
->i915
, error
);
1573 i915_capture_active_buffers(error
->i915
, error
);
1574 i915_capture_pinned_buffers(error
->i915
, error
);
1575 i915_gem_capture_guc_log_buffer(error
->i915
, error
);
1577 do_gettimeofday(&error
->time
);
1578 error
->boottime
= ktime_to_timeval(ktime_get_boottime());
1580 ktime_to_timeval(ktime_sub(ktime_get(),
1581 error
->i915
->gt
.last_init_time
));
1583 error
->overlay
= intel_overlay_capture_error_state(error
->i915
);
1584 error
->display
= intel_display_capture_error_state(error
->i915
);
1589 #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
1592 * i915_capture_error_state - capture an error record for later analysis
1595 * Should be called when an error is detected (either a hang or an error
1596 * interrupt) to capture error state from the time of the error. Fills
1597 * out a structure which becomes available in debugfs for user level tools
1600 void i915_capture_error_state(struct drm_i915_private
*dev_priv
,
1602 const char *error_msg
)
1605 struct drm_i915_error_state
*error
;
1606 unsigned long flags
;
1608 if (!i915
.error_capture
)
1611 if (READ_ONCE(dev_priv
->gpu_error
.first_error
))
1614 /* Account for pipe specific data like PIPE*STAT */
1615 error
= kzalloc(sizeof(*error
), GFP_ATOMIC
);
1617 DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
1621 kref_init(&error
->ref
);
1622 error
->i915
= dev_priv
;
1624 stop_machine(capture
, error
, NULL
);
1626 i915_error_capture_msg(dev_priv
, error
, engine_mask
, error_msg
);
1627 DRM_INFO("%s\n", error
->error_msg
);
1629 if (!error
->simulated
) {
1630 spin_lock_irqsave(&dev_priv
->gpu_error
.lock
, flags
);
1631 if (!dev_priv
->gpu_error
.first_error
) {
1632 dev_priv
->gpu_error
.first_error
= error
;
1635 spin_unlock_irqrestore(&dev_priv
->gpu_error
.lock
, flags
);
1639 i915_error_state_free(&error
->ref
);
1644 ktime_get_real_seconds() - DRIVER_TIMESTAMP
< DAY_AS_SECONDS(180)) {
1645 DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
1646 DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
1647 DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
1648 DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n");
1649 DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n",
1650 dev_priv
->drm
.primary
->index
);
1655 void i915_error_state_get(struct drm_device
*dev
,
1656 struct i915_error_state_file_priv
*error_priv
)
1658 struct drm_i915_private
*dev_priv
= to_i915(dev
);
1660 spin_lock_irq(&dev_priv
->gpu_error
.lock
);
1661 error_priv
->error
= dev_priv
->gpu_error
.first_error
;
1662 if (error_priv
->error
)
1663 kref_get(&error_priv
->error
->ref
);
1664 spin_unlock_irq(&dev_priv
->gpu_error
.lock
);
1667 void i915_error_state_put(struct i915_error_state_file_priv
*error_priv
)
1669 if (error_priv
->error
)
1670 kref_put(&error_priv
->error
->ref
, i915_error_state_free
);
1673 void i915_destroy_error_state(struct drm_i915_private
*dev_priv
)
1675 struct drm_i915_error_state
*error
;
1677 spin_lock_irq(&dev_priv
->gpu_error
.lock
);
1678 error
= dev_priv
->gpu_error
.first_error
;
1679 dev_priv
->gpu_error
.first_error
= NULL
;
1680 spin_unlock_irq(&dev_priv
->gpu_error
.lock
);
1683 kref_put(&error
->ref
, i915_error_state_free
);