1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
4 #include <linux/ascii85.h>
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
11 struct a6xx_gpu_state_obj
{
16 struct a6xx_gpu_state
{
17 struct msm_gpu_state base
;
19 struct a6xx_gpu_state_obj
*gmu_registers
;
22 struct a6xx_gpu_state_obj
*registers
;
25 struct a6xx_gpu_state_obj
*shaders
;
28 struct a6xx_gpu_state_obj
*clusters
;
31 struct a6xx_gpu_state_obj
*dbgahb_clusters
;
32 int nr_dbgahb_clusters
;
34 struct a6xx_gpu_state_obj
*indexed_regs
;
37 struct a6xx_gpu_state_obj
*debugbus
;
40 struct a6xx_gpu_state_obj
*vbif_debugbus
;
42 struct a6xx_gpu_state_obj
*cx_debugbus
;
45 struct list_head objs
;
48 static inline int CRASHDUMP_WRITE(u64
*in
, u32 reg
, u32 val
)
51 in
[1] = (((u64
) reg
) << 44 | (1 << 21) | 1);
56 static inline int CRASHDUMP_READ(u64
*in
, u32 reg
, u32 dwords
, u64 target
)
59 in
[1] = (((u64
) reg
) << 44 | dwords
);
64 static inline int CRASHDUMP_FINI(u64
*in
)
72 struct a6xx_crashdumper
{
74 struct drm_gem_object
*bo
;
78 struct a6xx_state_memobj
{
79 struct list_head node
;
80 unsigned long long data
[];
83 void *state_kcalloc(struct a6xx_gpu_state
*a6xx_state
, int nr
, size_t objsize
)
85 struct a6xx_state_memobj
*obj
=
86 kzalloc((nr
* objsize
) + sizeof(*obj
), GFP_KERNEL
);
91 list_add_tail(&obj
->node
, &a6xx_state
->objs
);
95 void *state_kmemdup(struct a6xx_gpu_state
*a6xx_state
, void *src
,
98 void *dst
= state_kcalloc(a6xx_state
, 1, size
);
101 memcpy(dst
, src
, size
);
106 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
107 * the rest for the data
109 #define A6XX_CD_DATA_OFFSET 8192
110 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
112 static int a6xx_crashdumper_init(struct msm_gpu
*gpu
,
113 struct a6xx_crashdumper
*dumper
)
115 dumper
->ptr
= msm_gem_kernel_new_locked(gpu
->dev
,
116 SZ_1M
, MSM_BO_UNCACHED
, gpu
->aspace
,
117 &dumper
->bo
, &dumper
->iova
);
119 if (!IS_ERR(dumper
->ptr
))
120 msm_gem_object_set_name(dumper
->bo
, "crashdump");
122 return PTR_ERR_OR_ZERO(dumper
->ptr
);
125 static int a6xx_crashdumper_run(struct msm_gpu
*gpu
,
126 struct a6xx_crashdumper
*dumper
)
128 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
129 struct a6xx_gpu
*a6xx_gpu
= to_a6xx_gpu(adreno_gpu
);
133 if (IS_ERR_OR_NULL(dumper
->ptr
))
136 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu
->gmu
))
139 /* Make sure all pending memory writes are posted */
142 gpu_write64(gpu
, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO
,
143 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI
, dumper
->iova
);
145 gpu_write(gpu
, REG_A6XX_CP_CRASH_DUMP_CNTL
, 1);
147 ret
= gpu_poll_timeout(gpu
, REG_A6XX_CP_CRASH_DUMP_STATUS
, val
,
148 val
& 0x02, 100, 10000);
150 gpu_write(gpu
, REG_A6XX_CP_CRASH_DUMP_CNTL
, 0);
155 /* read a value from the GX debug bus */
156 static int debugbus_read(struct msm_gpu
*gpu
, u32 block
, u32 offset
,
159 u32 reg
= A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset
) |
160 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block
);
162 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A
, reg
);
163 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B
, reg
);
164 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C
, reg
);
165 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D
, reg
);
167 /* Wait 1 us to make sure the data is flowing */
170 data
[0] = gpu_read(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2
);
171 data
[1] = gpu_read(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1
);
176 #define cxdbg_write(ptr, offset, val) \
177 msm_writel((val), (ptr) + ((offset) << 2))
179 #define cxdbg_read(ptr, offset) \
180 msm_readl((ptr) + ((offset) << 2))
182 /* read a value from the CX debug bus */
183 static int cx_debugbus_read(void *__iomem cxdbg
, u32 block
, u32 offset
,
186 u32 reg
= A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset
) |
187 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block
);
189 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A
, reg
);
190 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B
, reg
);
191 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C
, reg
);
192 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D
, reg
);
194 /* Wait 1 us to make sure the data is flowing */
197 data
[0] = cxdbg_read(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2
);
198 data
[1] = cxdbg_read(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1
);
203 /* Read a chunk of data from the VBIF debug bus */
204 static int vbif_debugbus_read(struct msm_gpu
*gpu
, u32 ctrl0
, u32 ctrl1
,
205 u32 reg
, int count
, u32
*data
)
209 gpu_write(gpu
, ctrl0
, reg
);
211 for (i
= 0; i
< count
; i
++) {
212 gpu_write(gpu
, ctrl1
, i
);
213 data
[i
] = gpu_read(gpu
, REG_A6XX_VBIF_TEST_BUS_OUT
);
219 #define AXI_ARB_BLOCKS 2
220 #define XIN_AXI_BLOCKS 5
221 #define XIN_CORE_BLOCKS 4
223 #define VBIF_DEBUGBUS_BLOCK_SIZE \
224 ((16 * AXI_ARB_BLOCKS) + \
225 (18 * XIN_AXI_BLOCKS) + \
226 (12 * XIN_CORE_BLOCKS))
228 static void a6xx_get_vbif_debugbus_block(struct msm_gpu
*gpu
,
229 struct a6xx_gpu_state
*a6xx_state
,
230 struct a6xx_gpu_state_obj
*obj
)
235 obj
->data
= state_kcalloc(a6xx_state
, VBIF_DEBUGBUS_BLOCK_SIZE
,
242 /* Get the current clock setting */
243 clk
= gpu_read(gpu
, REG_A6XX_VBIF_CLKON
);
245 /* Force on the bus so we can read it */
246 gpu_write(gpu
, REG_A6XX_VBIF_CLKON
,
247 clk
| A6XX_VBIF_CLKON_FORCE_ON_TESTBUS
);
249 /* We will read from BUS2 first, so disable BUS1 */
250 gpu_write(gpu
, REG_A6XX_VBIF_TEST_BUS1_CTRL0
, 0);
252 /* Enable the VBIF bus for reading */
253 gpu_write(gpu
, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL
, 1);
257 for (i
= 0; i
< AXI_ARB_BLOCKS
; i
++)
258 ptr
+= vbif_debugbus_read(gpu
,
259 REG_A6XX_VBIF_TEST_BUS2_CTRL0
,
260 REG_A6XX_VBIF_TEST_BUS2_CTRL1
,
261 1 << (i
+ 16), 16, ptr
);
263 for (i
= 0; i
< XIN_AXI_BLOCKS
; i
++)
264 ptr
+= vbif_debugbus_read(gpu
,
265 REG_A6XX_VBIF_TEST_BUS2_CTRL0
,
266 REG_A6XX_VBIF_TEST_BUS2_CTRL1
,
269 /* Stop BUS2 so we can turn on BUS1 */
270 gpu_write(gpu
, REG_A6XX_VBIF_TEST_BUS2_CTRL0
, 0);
272 for (i
= 0; i
< XIN_CORE_BLOCKS
; i
++)
273 ptr
+= vbif_debugbus_read(gpu
,
274 REG_A6XX_VBIF_TEST_BUS1_CTRL0
,
275 REG_A6XX_VBIF_TEST_BUS1_CTRL1
,
278 /* Restore the VBIF clock setting */
279 gpu_write(gpu
, REG_A6XX_VBIF_CLKON
, clk
);
282 static void a6xx_get_debugbus_block(struct msm_gpu
*gpu
,
283 struct a6xx_gpu_state
*a6xx_state
,
284 const struct a6xx_debugbus_block
*block
,
285 struct a6xx_gpu_state_obj
*obj
)
290 obj
->data
= state_kcalloc(a6xx_state
, block
->count
, sizeof(u64
));
296 for (ptr
= obj
->data
, i
= 0; i
< block
->count
; i
++)
297 ptr
+= debugbus_read(gpu
, block
->id
, i
, ptr
);
300 static void a6xx_get_cx_debugbus_block(void __iomem
*cxdbg
,
301 struct a6xx_gpu_state
*a6xx_state
,
302 const struct a6xx_debugbus_block
*block
,
303 struct a6xx_gpu_state_obj
*obj
)
308 obj
->data
= state_kcalloc(a6xx_state
, block
->count
, sizeof(u64
));
314 for (ptr
= obj
->data
, i
= 0; i
< block
->count
; i
++)
315 ptr
+= cx_debugbus_read(cxdbg
, block
->id
, i
, ptr
);
318 static void a6xx_get_debugbus(struct msm_gpu
*gpu
,
319 struct a6xx_gpu_state
*a6xx_state
)
321 struct resource
*res
;
322 void __iomem
*cxdbg
= NULL
;
323 int nr_debugbus_blocks
;
325 /* Set up the GX debug bus */
327 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT
,
328 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
330 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM
,
331 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
333 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0
, 0);
334 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1
, 0);
335 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2
, 0);
336 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3
, 0);
338 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0
, 0x76543210);
339 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1
, 0xFEDCBA98);
341 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0
, 0);
342 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1
, 0);
343 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2
, 0);
344 gpu_write(gpu
, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3
, 0);
346 /* Set up the CX debug bus - it lives elsewhere in the system so do a
347 * temporary ioremap for the registers
349 res
= platform_get_resource_byname(gpu
->pdev
, IORESOURCE_MEM
,
353 cxdbg
= ioremap(res
->start
, resource_size(res
));
356 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT
,
357 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
359 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM
,
360 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
362 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0
, 0);
363 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1
, 0);
364 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2
, 0);
365 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3
, 0);
367 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0
,
369 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1
,
372 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0
, 0);
373 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1
, 0);
374 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2
, 0);
375 cxdbg_write(cxdbg
, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3
, 0);
378 nr_debugbus_blocks
= ARRAY_SIZE(a6xx_debugbus_blocks
) +
379 (a6xx_has_gbif(to_adreno_gpu(gpu
)) ? 1 : 0);
381 a6xx_state
->debugbus
= state_kcalloc(a6xx_state
, nr_debugbus_blocks
,
382 sizeof(*a6xx_state
->debugbus
));
384 if (a6xx_state
->debugbus
) {
387 for (i
= 0; i
< ARRAY_SIZE(a6xx_debugbus_blocks
); i
++)
388 a6xx_get_debugbus_block(gpu
,
390 &a6xx_debugbus_blocks
[i
],
391 &a6xx_state
->debugbus
[i
]);
393 a6xx_state
->nr_debugbus
= ARRAY_SIZE(a6xx_debugbus_blocks
);
396 * GBIF has same debugbus as of other GPU blocks, fall back to
397 * default path if GPU uses GBIF, also GBIF uses exactly same
400 if (a6xx_has_gbif(to_adreno_gpu(gpu
))) {
401 a6xx_get_debugbus_block(gpu
, a6xx_state
,
402 &a6xx_gbif_debugbus_block
,
403 &a6xx_state
->debugbus
[i
]);
405 a6xx_state
->nr_debugbus
+= 1;
409 /* Dump the VBIF debugbus on applicable targets */
410 if (!a6xx_has_gbif(to_adreno_gpu(gpu
))) {
411 a6xx_state
->vbif_debugbus
=
412 state_kcalloc(a6xx_state
, 1,
413 sizeof(*a6xx_state
->vbif_debugbus
));
415 if (a6xx_state
->vbif_debugbus
)
416 a6xx_get_vbif_debugbus_block(gpu
, a6xx_state
,
417 a6xx_state
->vbif_debugbus
);
421 a6xx_state
->cx_debugbus
=
422 state_kcalloc(a6xx_state
,
423 ARRAY_SIZE(a6xx_cx_debugbus_blocks
),
424 sizeof(*a6xx_state
->cx_debugbus
));
426 if (a6xx_state
->cx_debugbus
) {
429 for (i
= 0; i
< ARRAY_SIZE(a6xx_cx_debugbus_blocks
); i
++)
430 a6xx_get_cx_debugbus_block(cxdbg
,
432 &a6xx_cx_debugbus_blocks
[i
],
433 &a6xx_state
->cx_debugbus
[i
]);
435 a6xx_state
->nr_cx_debugbus
=
436 ARRAY_SIZE(a6xx_cx_debugbus_blocks
);
443 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
445 /* Read a data cluster from behind the AHB aperture */
446 static void a6xx_get_dbgahb_cluster(struct msm_gpu
*gpu
,
447 struct a6xx_gpu_state
*a6xx_state
,
448 const struct a6xx_dbgahb_cluster
*dbgahb
,
449 struct a6xx_gpu_state_obj
*obj
,
450 struct a6xx_crashdumper
*dumper
)
452 u64
*in
= dumper
->ptr
;
453 u64 out
= dumper
->iova
+ A6XX_CD_DATA_OFFSET
;
457 for (i
= 0; i
< A6XX_NUM_CONTEXTS
; i
++) {
460 in
+= CRASHDUMP_WRITE(in
, REG_A6XX_HLSQ_DBG_READ_SEL
,
461 (dbgahb
->statetype
+ i
* 2) << 8);
463 for (j
= 0; j
< dbgahb
->count
; j
+= 2) {
464 int count
= RANGE(dbgahb
->registers
, j
);
465 u32 offset
= REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE
+
466 dbgahb
->registers
[j
] - (dbgahb
->base
>> 2);
468 in
+= CRASHDUMP_READ(in
, offset
, count
, out
);
470 out
+= count
* sizeof(u32
);
479 datasize
= regcount
* A6XX_NUM_CONTEXTS
* sizeof(u32
);
481 if (WARN_ON(datasize
> A6XX_CD_DATA_SIZE
))
484 if (a6xx_crashdumper_run(gpu
, dumper
))
487 obj
->handle
= dbgahb
;
488 obj
->data
= state_kmemdup(a6xx_state
, dumper
->ptr
+ A6XX_CD_DATA_OFFSET
,
492 static void a6xx_get_dbgahb_clusters(struct msm_gpu
*gpu
,
493 struct a6xx_gpu_state
*a6xx_state
,
494 struct a6xx_crashdumper
*dumper
)
498 a6xx_state
->dbgahb_clusters
= state_kcalloc(a6xx_state
,
499 ARRAY_SIZE(a6xx_dbgahb_clusters
),
500 sizeof(*a6xx_state
->dbgahb_clusters
));
502 if (!a6xx_state
->dbgahb_clusters
)
505 a6xx_state
->nr_dbgahb_clusters
= ARRAY_SIZE(a6xx_dbgahb_clusters
);
507 for (i
= 0; i
< ARRAY_SIZE(a6xx_dbgahb_clusters
); i
++)
508 a6xx_get_dbgahb_cluster(gpu
, a6xx_state
,
509 &a6xx_dbgahb_clusters
[i
],
510 &a6xx_state
->dbgahb_clusters
[i
], dumper
);
513 /* Read a data cluster from the CP aperture with the crashdumper */
514 static void a6xx_get_cluster(struct msm_gpu
*gpu
,
515 struct a6xx_gpu_state
*a6xx_state
,
516 const struct a6xx_cluster
*cluster
,
517 struct a6xx_gpu_state_obj
*obj
,
518 struct a6xx_crashdumper
*dumper
)
520 u64
*in
= dumper
->ptr
;
521 u64 out
= dumper
->iova
+ A6XX_CD_DATA_OFFSET
;
525 /* Some clusters need a selector register to be programmed too */
526 if (cluster
->sel_reg
)
527 in
+= CRASHDUMP_WRITE(in
, cluster
->sel_reg
, cluster
->sel_val
);
529 for (i
= 0; i
< A6XX_NUM_CONTEXTS
; i
++) {
532 in
+= CRASHDUMP_WRITE(in
, REG_A6XX_CP_APERTURE_CNTL_CD
,
533 (cluster
->id
<< 8) | (i
<< 4) | i
);
535 for (j
= 0; j
< cluster
->count
; j
+= 2) {
536 int count
= RANGE(cluster
->registers
, j
);
538 in
+= CRASHDUMP_READ(in
, cluster
->registers
[j
],
541 out
+= count
* sizeof(u32
);
550 datasize
= regcount
* A6XX_NUM_CONTEXTS
* sizeof(u32
);
552 if (WARN_ON(datasize
> A6XX_CD_DATA_SIZE
))
555 if (a6xx_crashdumper_run(gpu
, dumper
))
558 obj
->handle
= cluster
;
559 obj
->data
= state_kmemdup(a6xx_state
, dumper
->ptr
+ A6XX_CD_DATA_OFFSET
,
563 static void a6xx_get_clusters(struct msm_gpu
*gpu
,
564 struct a6xx_gpu_state
*a6xx_state
,
565 struct a6xx_crashdumper
*dumper
)
569 a6xx_state
->clusters
= state_kcalloc(a6xx_state
,
570 ARRAY_SIZE(a6xx_clusters
), sizeof(*a6xx_state
->clusters
));
572 if (!a6xx_state
->clusters
)
575 a6xx_state
->nr_clusters
= ARRAY_SIZE(a6xx_clusters
);
577 for (i
= 0; i
< ARRAY_SIZE(a6xx_clusters
); i
++)
578 a6xx_get_cluster(gpu
, a6xx_state
, &a6xx_clusters
[i
],
579 &a6xx_state
->clusters
[i
], dumper
);
582 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
583 static void a6xx_get_shader_block(struct msm_gpu
*gpu
,
584 struct a6xx_gpu_state
*a6xx_state
,
585 const struct a6xx_shader_block
*block
,
586 struct a6xx_gpu_state_obj
*obj
,
587 struct a6xx_crashdumper
*dumper
)
589 u64
*in
= dumper
->ptr
;
590 size_t datasize
= block
->size
* A6XX_NUM_SHADER_BANKS
* sizeof(u32
);
593 if (WARN_ON(datasize
> A6XX_CD_DATA_SIZE
))
596 for (i
= 0; i
< A6XX_NUM_SHADER_BANKS
; i
++) {
597 in
+= CRASHDUMP_WRITE(in
, REG_A6XX_HLSQ_DBG_READ_SEL
,
598 (block
->type
<< 8) | i
);
600 in
+= CRASHDUMP_READ(in
, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE
,
601 block
->size
, dumper
->iova
+ A6XX_CD_DATA_OFFSET
);
606 if (a6xx_crashdumper_run(gpu
, dumper
))
610 obj
->data
= state_kmemdup(a6xx_state
, dumper
->ptr
+ A6XX_CD_DATA_OFFSET
,
614 static void a6xx_get_shaders(struct msm_gpu
*gpu
,
615 struct a6xx_gpu_state
*a6xx_state
,
616 struct a6xx_crashdumper
*dumper
)
620 a6xx_state
->shaders
= state_kcalloc(a6xx_state
,
621 ARRAY_SIZE(a6xx_shader_blocks
), sizeof(*a6xx_state
->shaders
));
623 if (!a6xx_state
->shaders
)
626 a6xx_state
->nr_shaders
= ARRAY_SIZE(a6xx_shader_blocks
);
628 for (i
= 0; i
< ARRAY_SIZE(a6xx_shader_blocks
); i
++)
629 a6xx_get_shader_block(gpu
, a6xx_state
, &a6xx_shader_blocks
[i
],
630 &a6xx_state
->shaders
[i
], dumper
);
633 /* Read registers from behind the HLSQ aperture with the crashdumper */
634 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu
*gpu
,
635 struct a6xx_gpu_state
*a6xx_state
,
636 const struct a6xx_registers
*regs
,
637 struct a6xx_gpu_state_obj
*obj
,
638 struct a6xx_crashdumper
*dumper
)
641 u64
*in
= dumper
->ptr
;
642 u64 out
= dumper
->iova
+ A6XX_CD_DATA_OFFSET
;
645 in
+= CRASHDUMP_WRITE(in
, REG_A6XX_HLSQ_DBG_READ_SEL
, regs
->val1
);
647 for (i
= 0; i
< regs
->count
; i
+= 2) {
648 u32 count
= RANGE(regs
->registers
, i
);
649 u32 offset
= REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE
+
650 regs
->registers
[i
] - (regs
->val0
>> 2);
652 in
+= CRASHDUMP_READ(in
, offset
, count
, out
);
654 out
+= count
* sizeof(u32
);
660 if (WARN_ON((regcount
* sizeof(u32
)) > A6XX_CD_DATA_SIZE
))
663 if (a6xx_crashdumper_run(gpu
, dumper
))
667 obj
->data
= state_kmemdup(a6xx_state
, dumper
->ptr
+ A6XX_CD_DATA_OFFSET
,
668 regcount
* sizeof(u32
));
671 /* Read a block of registers using the crashdumper */
672 static void a6xx_get_crashdumper_registers(struct msm_gpu
*gpu
,
673 struct a6xx_gpu_state
*a6xx_state
,
674 const struct a6xx_registers
*regs
,
675 struct a6xx_gpu_state_obj
*obj
,
676 struct a6xx_crashdumper
*dumper
)
679 u64
*in
= dumper
->ptr
;
680 u64 out
= dumper
->iova
+ A6XX_CD_DATA_OFFSET
;
683 /* Some blocks might need to program a selector register first */
685 in
+= CRASHDUMP_WRITE(in
, regs
->val0
, regs
->val1
);
687 for (i
= 0; i
< regs
->count
; i
+= 2) {
688 u32 count
= RANGE(regs
->registers
, i
);
690 in
+= CRASHDUMP_READ(in
, regs
->registers
[i
], count
, out
);
692 out
+= count
* sizeof(u32
);
698 if (WARN_ON((regcount
* sizeof(u32
)) > A6XX_CD_DATA_SIZE
))
701 if (a6xx_crashdumper_run(gpu
, dumper
))
705 obj
->data
= state_kmemdup(a6xx_state
, dumper
->ptr
+ A6XX_CD_DATA_OFFSET
,
706 regcount
* sizeof(u32
));
709 /* Read a block of registers via AHB */
710 static void a6xx_get_ahb_gpu_registers(struct msm_gpu
*gpu
,
711 struct a6xx_gpu_state
*a6xx_state
,
712 const struct a6xx_registers
*regs
,
713 struct a6xx_gpu_state_obj
*obj
)
715 int i
, regcount
= 0, index
= 0;
717 for (i
= 0; i
< regs
->count
; i
+= 2)
718 regcount
+= RANGE(regs
->registers
, i
);
720 obj
->handle
= (const void *) regs
;
721 obj
->data
= state_kcalloc(a6xx_state
, regcount
, sizeof(u32
));
725 for (i
= 0; i
< regs
->count
; i
+= 2) {
726 u32 count
= RANGE(regs
->registers
, i
);
729 for (j
= 0; j
< count
; j
++)
730 obj
->data
[index
++] = gpu_read(gpu
,
731 regs
->registers
[i
] + j
);
735 /* Read a block of GMU registers */
736 static void _a6xx_get_gmu_registers(struct msm_gpu
*gpu
,
737 struct a6xx_gpu_state
*a6xx_state
,
738 const struct a6xx_registers
*regs
,
739 struct a6xx_gpu_state_obj
*obj
)
741 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
742 struct a6xx_gpu
*a6xx_gpu
= to_a6xx_gpu(adreno_gpu
);
743 struct a6xx_gmu
*gmu
= &a6xx_gpu
->gmu
;
744 int i
, regcount
= 0, index
= 0;
746 for (i
= 0; i
< regs
->count
; i
+= 2)
747 regcount
+= RANGE(regs
->registers
, i
);
749 obj
->handle
= (const void *) regs
;
750 obj
->data
= state_kcalloc(a6xx_state
, regcount
, sizeof(u32
));
754 for (i
= 0; i
< regs
->count
; i
+= 2) {
755 u32 count
= RANGE(regs
->registers
, i
);
758 for (j
= 0; j
< count
; j
++)
759 obj
->data
[index
++] = gmu_read(gmu
,
760 regs
->registers
[i
] + j
);
764 static void a6xx_get_gmu_registers(struct msm_gpu
*gpu
,
765 struct a6xx_gpu_state
*a6xx_state
)
767 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
768 struct a6xx_gpu
*a6xx_gpu
= to_a6xx_gpu(adreno_gpu
);
770 a6xx_state
->gmu_registers
= state_kcalloc(a6xx_state
,
771 2, sizeof(*a6xx_state
->gmu_registers
));
773 if (!a6xx_state
->gmu_registers
)
776 a6xx_state
->nr_gmu_registers
= 2;
778 /* Get the CX GMU registers from AHB */
779 _a6xx_get_gmu_registers(gpu
, a6xx_state
, &a6xx_gmu_reglist
[0],
780 &a6xx_state
->gmu_registers
[0]);
782 if (!a6xx_gmu_gx_is_on(&a6xx_gpu
->gmu
))
785 /* Set the fence to ALLOW mode so we can access the registers */
786 gpu_write(gpu
, REG_A6XX_GMU_AO_AHB_FENCE_CTRL
, 0);
788 _a6xx_get_gmu_registers(gpu
, a6xx_state
, &a6xx_gmu_reglist
[1],
789 &a6xx_state
->gmu_registers
[1]);
792 #define A6XX_GBIF_REGLIST_SIZE 1
793 static void a6xx_get_registers(struct msm_gpu
*gpu
,
794 struct a6xx_gpu_state
*a6xx_state
,
795 struct a6xx_crashdumper
*dumper
)
797 int i
, count
= ARRAY_SIZE(a6xx_ahb_reglist
) +
798 ARRAY_SIZE(a6xx_reglist
) +
799 ARRAY_SIZE(a6xx_hlsq_reglist
) + A6XX_GBIF_REGLIST_SIZE
;
801 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
803 a6xx_state
->registers
= state_kcalloc(a6xx_state
,
804 count
, sizeof(*a6xx_state
->registers
));
806 if (!a6xx_state
->registers
)
809 a6xx_state
->nr_registers
= count
;
811 for (i
= 0; i
< ARRAY_SIZE(a6xx_ahb_reglist
); i
++)
812 a6xx_get_ahb_gpu_registers(gpu
,
813 a6xx_state
, &a6xx_ahb_reglist
[i
],
814 &a6xx_state
->registers
[index
++]);
816 if (a6xx_has_gbif(adreno_gpu
))
817 a6xx_get_ahb_gpu_registers(gpu
,
818 a6xx_state
, &a6xx_gbif_reglist
,
819 &a6xx_state
->registers
[index
++]);
821 a6xx_get_ahb_gpu_registers(gpu
,
822 a6xx_state
, &a6xx_vbif_reglist
,
823 &a6xx_state
->registers
[index
++]);
825 for (i
= 0; i
< ARRAY_SIZE(a6xx_reglist
); i
++)
826 a6xx_get_crashdumper_registers(gpu
,
827 a6xx_state
, &a6xx_reglist
[i
],
828 &a6xx_state
->registers
[index
++],
831 for (i
= 0; i
< ARRAY_SIZE(a6xx_hlsq_reglist
); i
++)
832 a6xx_get_crashdumper_hlsq_registers(gpu
,
833 a6xx_state
, &a6xx_hlsq_reglist
[i
],
834 &a6xx_state
->registers
[index
++],
838 /* Read a block of data from an indexed register pair */
839 static void a6xx_get_indexed_regs(struct msm_gpu
*gpu
,
840 struct a6xx_gpu_state
*a6xx_state
,
841 const struct a6xx_indexed_registers
*indexed
,
842 struct a6xx_gpu_state_obj
*obj
)
846 obj
->handle
= (const void *) indexed
;
847 obj
->data
= state_kcalloc(a6xx_state
, indexed
->count
, sizeof(u32
));
851 /* All the indexed banks start at address 0 */
852 gpu_write(gpu
, indexed
->addr
, 0);
854 /* Read the data - each read increments the internal address by 1 */
855 for (i
= 0; i
< indexed
->count
; i
++)
856 obj
->data
[i
] = gpu_read(gpu
, indexed
->data
);
859 static void a6xx_get_indexed_registers(struct msm_gpu
*gpu
,
860 struct a6xx_gpu_state
*a6xx_state
)
863 int count
= ARRAY_SIZE(a6xx_indexed_reglist
) + 1;
866 a6xx_state
->indexed_regs
= state_kcalloc(a6xx_state
, count
,
867 sizeof(a6xx_state
->indexed_regs
));
868 if (!a6xx_state
->indexed_regs
)
871 for (i
= 0; i
< ARRAY_SIZE(a6xx_indexed_reglist
); i
++)
872 a6xx_get_indexed_regs(gpu
, a6xx_state
, &a6xx_indexed_reglist
[i
],
873 &a6xx_state
->indexed_regs
[i
]);
875 /* Set the CP mempool size to 0 to stabilize it while dumping */
876 mempool_size
= gpu_read(gpu
, REG_A6XX_CP_MEM_POOL_SIZE
);
877 gpu_write(gpu
, REG_A6XX_CP_MEM_POOL_SIZE
, 0);
879 /* Get the contents of the CP mempool */
880 a6xx_get_indexed_regs(gpu
, a6xx_state
, &a6xx_cp_mempool_indexed
,
881 &a6xx_state
->indexed_regs
[i
]);
884 * Offset 0x2000 in the mempool is the size - copy the saved size over
885 * so the data is consistent
887 a6xx_state
->indexed_regs
[i
].data
[0x2000] = mempool_size
;
889 /* Restore the size in the hardware */
890 gpu_write(gpu
, REG_A6XX_CP_MEM_POOL_SIZE
, mempool_size
);
892 a6xx_state
->nr_indexed_regs
= count
;
895 struct msm_gpu_state
*a6xx_gpu_state_get(struct msm_gpu
*gpu
)
897 struct a6xx_crashdumper dumper
= { 0 };
898 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
899 struct a6xx_gpu
*a6xx_gpu
= to_a6xx_gpu(adreno_gpu
);
900 struct a6xx_gpu_state
*a6xx_state
= kzalloc(sizeof(*a6xx_state
),
904 return ERR_PTR(-ENOMEM
);
906 INIT_LIST_HEAD(&a6xx_state
->objs
);
908 /* Get the generic state from the adreno core */
909 adreno_gpu_state_get(gpu
, &a6xx_state
->base
);
911 a6xx_get_gmu_registers(gpu
, a6xx_state
);
913 /* If GX isn't on the rest of the data isn't going to be accessible */
914 if (!a6xx_gmu_gx_is_on(&a6xx_gpu
->gmu
))
915 return &a6xx_state
->base
;
917 /* Get the banks of indexed registers */
918 a6xx_get_indexed_registers(gpu
, a6xx_state
);
920 /* Try to initialize the crashdumper */
921 if (!a6xx_crashdumper_init(gpu
, &dumper
)) {
922 a6xx_get_registers(gpu
, a6xx_state
, &dumper
);
923 a6xx_get_shaders(gpu
, a6xx_state
, &dumper
);
924 a6xx_get_clusters(gpu
, a6xx_state
, &dumper
);
925 a6xx_get_dbgahb_clusters(gpu
, a6xx_state
, &dumper
);
927 msm_gem_kernel_put(dumper
.bo
, gpu
->aspace
, true);
930 a6xx_get_debugbus(gpu
, a6xx_state
);
932 return &a6xx_state
->base
;
935 void a6xx_gpu_state_destroy(struct kref
*kref
)
937 struct a6xx_state_memobj
*obj
, *tmp
;
938 struct msm_gpu_state
*state
= container_of(kref
,
939 struct msm_gpu_state
, ref
);
940 struct a6xx_gpu_state
*a6xx_state
= container_of(state
,
941 struct a6xx_gpu_state
, base
);
943 list_for_each_entry_safe(obj
, tmp
, &a6xx_state
->objs
, node
)
946 adreno_gpu_state_destroy(state
);
950 int a6xx_gpu_state_put(struct msm_gpu_state
*state
)
952 if (IS_ERR_OR_NULL(state
))
955 return kref_put(&state
->ref
, a6xx_gpu_state_destroy
);
958 static void a6xx_show_registers(const u32
*registers
, u32
*data
, size_t count
,
959 struct drm_printer
*p
)
966 for (i
= 0; i
< count
; i
+= 2) {
967 u32 count
= RANGE(registers
, i
);
968 u32 offset
= registers
[i
];
971 for (j
= 0; j
< count
; index
++, offset
++, j
++) {
972 if (data
[index
] == 0xdeafbead)
975 drm_printf(p
, " - { offset: 0x%06x, value: 0x%08x }\n",
976 offset
<< 2, data
[index
]);
981 static void print_ascii85(struct drm_printer
*p
, size_t len
, u32
*data
)
983 char out
[ASCII85_BUFSZ
];
984 long i
, l
, datalen
= 0;
986 for (i
= 0; i
< len
>> 2; i
++) {
988 datalen
= (i
+ 1) << 2;
994 drm_puts(p
, " data: !!ascii85 |\n");
998 l
= ascii85_encode_len(datalen
);
1000 for (i
= 0; i
< l
; i
++)
1001 drm_puts(p
, ascii85_encode(data
[i
], out
));
1006 static void print_name(struct drm_printer
*p
, const char *fmt
, const char *name
)
1013 static void a6xx_show_shader(struct a6xx_gpu_state_obj
*obj
,
1014 struct drm_printer
*p
)
1016 const struct a6xx_shader_block
*block
= obj
->handle
;
1022 print_name(p
, " - type: ", block
->name
);
1024 for (i
= 0; i
< A6XX_NUM_SHADER_BANKS
; i
++) {
1025 drm_printf(p
, " - bank: %d\n", i
);
1026 drm_printf(p
, " size: %d\n", block
->size
);
1031 print_ascii85(p
, block
->size
<< 2,
1032 obj
->data
+ (block
->size
* i
));
1036 static void a6xx_show_cluster_data(const u32
*registers
, int size
, u32
*data
,
1037 struct drm_printer
*p
)
1041 for (ctx
= 0; ctx
< A6XX_NUM_CONTEXTS
; ctx
++) {
1044 drm_printf(p
, " - context: %d\n", ctx
);
1046 for (j
= 0; j
< size
; j
+= 2) {
1047 u32 count
= RANGE(registers
, j
);
1048 u32 offset
= registers
[j
];
1051 for (k
= 0; k
< count
; index
++, offset
++, k
++) {
1052 if (data
[index
] == 0xdeafbead)
1055 drm_printf(p
, " - { offset: 0x%06x, value: 0x%08x }\n",
1056 offset
<< 2, data
[index
]);
1062 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj
*obj
,
1063 struct drm_printer
*p
)
1065 const struct a6xx_dbgahb_cluster
*dbgahb
= obj
->handle
;
1068 print_name(p
, " - cluster-name: ", dbgahb
->name
);
1069 a6xx_show_cluster_data(dbgahb
->registers
, dbgahb
->count
,
1074 static void a6xx_show_cluster(struct a6xx_gpu_state_obj
*obj
,
1075 struct drm_printer
*p
)
1077 const struct a6xx_cluster
*cluster
= obj
->handle
;
1080 print_name(p
, " - cluster-name: ", cluster
->name
);
1081 a6xx_show_cluster_data(cluster
->registers
, cluster
->count
,
1086 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj
*obj
,
1087 struct drm_printer
*p
)
1089 const struct a6xx_indexed_registers
*indexed
= obj
->handle
;
1094 print_name(p
, " - regs-name: ", indexed
->name
);
1095 drm_printf(p
, " dwords: %d\n", indexed
->count
);
1097 print_ascii85(p
, indexed
->count
<< 2, obj
->data
);
1100 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block
*block
,
1101 u32
*data
, struct drm_printer
*p
)
1104 print_name(p
, " - debugbus-block: ", block
->name
);
1107 * count for regular debugbus data is in quadwords,
1108 * but print the size in dwords for consistency
1110 drm_printf(p
, " count: %d\n", block
->count
<< 1);
1112 print_ascii85(p
, block
->count
<< 3, data
);
1116 static void a6xx_show_debugbus(struct a6xx_gpu_state
*a6xx_state
,
1117 struct drm_printer
*p
)
1121 for (i
= 0; i
< a6xx_state
->nr_debugbus
; i
++) {
1122 struct a6xx_gpu_state_obj
*obj
= &a6xx_state
->debugbus
[i
];
1124 a6xx_show_debugbus_block(obj
->handle
, obj
->data
, p
);
1127 if (a6xx_state
->vbif_debugbus
) {
1128 struct a6xx_gpu_state_obj
*obj
= a6xx_state
->vbif_debugbus
;
1130 drm_puts(p
, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1131 drm_printf(p
, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE
);
1133 /* vbif debugbus data is in dwords. Confusing, huh? */
1134 print_ascii85(p
, VBIF_DEBUGBUS_BLOCK_SIZE
<< 2, obj
->data
);
1137 for (i
= 0; i
< a6xx_state
->nr_cx_debugbus
; i
++) {
1138 struct a6xx_gpu_state_obj
*obj
= &a6xx_state
->cx_debugbus
[i
];
1140 a6xx_show_debugbus_block(obj
->handle
, obj
->data
, p
);
1144 void a6xx_show(struct msm_gpu
*gpu
, struct msm_gpu_state
*state
,
1145 struct drm_printer
*p
)
1147 struct a6xx_gpu_state
*a6xx_state
= container_of(state
,
1148 struct a6xx_gpu_state
, base
);
1151 if (IS_ERR_OR_NULL(state
))
1154 adreno_show(gpu
, state
, p
);
1156 drm_puts(p
, "registers:\n");
1157 for (i
= 0; i
< a6xx_state
->nr_registers
; i
++) {
1158 struct a6xx_gpu_state_obj
*obj
= &a6xx_state
->registers
[i
];
1159 const struct a6xx_registers
*regs
= obj
->handle
;
1164 a6xx_show_registers(regs
->registers
, obj
->data
, regs
->count
, p
);
1167 drm_puts(p
, "registers-gmu:\n");
1168 for (i
= 0; i
< a6xx_state
->nr_gmu_registers
; i
++) {
1169 struct a6xx_gpu_state_obj
*obj
= &a6xx_state
->gmu_registers
[i
];
1170 const struct a6xx_registers
*regs
= obj
->handle
;
1175 a6xx_show_registers(regs
->registers
, obj
->data
, regs
->count
, p
);
1178 drm_puts(p
, "indexed-registers:\n");
1179 for (i
= 0; i
< a6xx_state
->nr_indexed_regs
; i
++)
1180 a6xx_show_indexed_regs(&a6xx_state
->indexed_regs
[i
], p
);
1182 drm_puts(p
, "shader-blocks:\n");
1183 for (i
= 0; i
< a6xx_state
->nr_shaders
; i
++)
1184 a6xx_show_shader(&a6xx_state
->shaders
[i
], p
);
1186 drm_puts(p
, "clusters:\n");
1187 for (i
= 0; i
< a6xx_state
->nr_clusters
; i
++)
1188 a6xx_show_cluster(&a6xx_state
->clusters
[i
], p
);
1190 for (i
= 0; i
< a6xx_state
->nr_dbgahb_clusters
; i
++)
1191 a6xx_show_dbgahb_cluster(&a6xx_state
->dbgahb_clusters
[i
], p
);
1193 drm_puts(p
, "debugbus:\n");
1194 a6xx_show_debugbus(a6xx_state
, p
);