2 * Copyright 2018 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
27 #include <linux/debugfs.h>
28 #include <linux/list.h>
30 #include "amdgpu_psp.h"
31 #include "ta_ras_if.h"
32 #include "amdgpu_ras_eeprom.h"
34 enum amdgpu_ras_block
{
35 AMDGPU_RAS_BLOCK__UMC
= 0,
36 AMDGPU_RAS_BLOCK__SDMA
,
37 AMDGPU_RAS_BLOCK__GFX
,
38 AMDGPU_RAS_BLOCK__MMHUB
,
39 AMDGPU_RAS_BLOCK__ATHUB
,
40 AMDGPU_RAS_BLOCK__PCIE_BIF
,
41 AMDGPU_RAS_BLOCK__HDP
,
42 AMDGPU_RAS_BLOCK__XGMI_WAFL
,
44 AMDGPU_RAS_BLOCK__SMN
,
45 AMDGPU_RAS_BLOCK__SEM
,
46 AMDGPU_RAS_BLOCK__MP0
,
47 AMDGPU_RAS_BLOCK__MP1
,
48 AMDGPU_RAS_BLOCK__FUSE
,
50 AMDGPU_RAS_BLOCK__LAST
53 #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
54 #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
56 enum amdgpu_ras_gfx_subblock
{
58 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START
= 0,
59 AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH
=
60 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START
,
61 AMDGPU_RAS_BLOCK__GFX_CPC_UCODE
,
62 AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1
,
63 AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1
,
64 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1
,
65 AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2
,
66 AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2
,
67 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2
,
68 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END
=
69 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2
,
71 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START
,
72 AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2
=
73 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START
,
74 AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1
,
75 AMDGPU_RAS_BLOCK__GFX_CPF_TAG
,
76 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END
= AMDGPU_RAS_BLOCK__GFX_CPF_TAG
,
78 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START
,
79 AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ
=
80 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START
,
81 AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG
,
82 AMDGPU_RAS_BLOCK__GFX_CPG_TAG
,
83 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END
= AMDGPU_RAS_BLOCK__GFX_CPG_TAG
,
85 AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START
,
86 AMDGPU_RAS_BLOCK__GFX_GDS_MEM
= AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START
,
87 AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE
,
88 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM
,
89 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM
,
90 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM
,
91 AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END
=
92 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM
,
94 AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM
,
96 AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START
,
97 AMDGPU_RAS_BLOCK__GFX_SQ_SGPR
= AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START
,
98 AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D
,
99 AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I
,
100 AMDGPU_RAS_BLOCK__GFX_SQ_VGPR
,
101 AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END
= AMDGPU_RAS_BLOCK__GFX_SQ_VGPR
,
103 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START
,
105 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START
=
106 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START
,
107 AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO
=
108 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START
,
109 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF
,
110 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO
,
111 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF
,
112 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO
,
113 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF
,
114 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO
,
115 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END
=
116 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO
,
118 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START
,
119 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM
=
120 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START
,
121 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO
,
122 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO
,
123 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM
,
124 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM
,
125 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO
,
126 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO
,
127 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM
,
128 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM
,
129 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END
=
130 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM
,
132 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START
,
133 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM
=
134 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START
,
135 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO
,
136 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO
,
137 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM
,
138 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM
,
139 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO
,
140 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO
,
141 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM
,
142 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM
,
143 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END
=
144 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM
,
145 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END
=
146 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END
,
148 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START
,
149 AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO
=
150 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START
,
151 AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO
,
152 AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO
,
153 AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO
,
154 AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO
,
155 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END
= AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO
,
157 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START
,
158 AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO
=
159 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START
,
160 AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO
,
161 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END
=
162 AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO
,
163 /* TCC (5 sub-ranges) */
164 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START
,
166 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START
=
167 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START
,
168 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA
=
169 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START
,
170 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1
,
171 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0
,
172 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1
,
173 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0
,
174 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1
,
175 AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG
,
176 AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG
,
177 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END
=
178 AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG
,
180 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START
,
181 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC
=
182 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START
,
183 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER
,
184 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END
=
185 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER
,
187 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START
,
188 AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA
=
189 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START
,
190 AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL
,
191 AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO
,
192 AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN
,
193 AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ
,
194 AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO
,
195 AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM
,
196 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO
,
197 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END
=
198 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO
,
200 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START
,
201 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO
=
202 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START
,
203 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM
,
204 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END
=
205 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM
,
207 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START
,
208 AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN
=
209 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START
,
210 AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER
,
211 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END
=
212 AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER
,
213 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END
=
214 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END
,
216 AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM
,
218 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START
,
219 AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM
=
220 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START
,
221 AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM
,
222 AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO
,
223 AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO
,
224 AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM
,
225 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0
,
226 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1
,
227 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END
=
228 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1
,
230 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START
,
231 AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO
=
232 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START
,
233 AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI
,
234 AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO
,
235 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END
= AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO
,
236 /* EA (3 sub-ranges) */
237 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START
,
239 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START
=
240 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START
,
241 AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM
=
242 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START
,
243 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM
,
244 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM
,
245 AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM
,
246 AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM
,
247 AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM
,
248 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM
,
249 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM
,
250 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END
=
251 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM
,
253 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START
,
254 AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM
=
255 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START
,
256 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM
,
257 AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM
,
258 AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM
,
259 AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM
,
260 AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM
,
261 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM
,
262 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END
=
263 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM
,
265 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START
,
266 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM
=
267 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START
,
268 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM
,
269 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM
,
270 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM
,
271 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END
=
272 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM
,
273 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END
=
274 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END
,
276 AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE
,
278 AMDGPU_RAS_BLOCK__UTC_VML2_WALKER
,
279 /* UTC ATC L2 2MB cache */
280 AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK
,
281 /* UTC ATC L2 4KB cache */
282 AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK
,
283 AMDGPU_RAS_BLOCK__GFX_MAX
286 enum amdgpu_ras_error_type
{
287 AMDGPU_RAS_ERROR__NONE
= 0,
288 AMDGPU_RAS_ERROR__PARITY
= 1,
289 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE
= 2,
290 AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE
= 4,
291 AMDGPU_RAS_ERROR__POISON
= 8,
294 enum amdgpu_ras_ret
{
295 AMDGPU_RAS_SUCCESS
= 0,
302 struct ras_common_if
{
303 enum amdgpu_ras_block block
;
304 enum amdgpu_ras_error_type type
;
305 uint32_t sub_block_index
;
311 /* ras infrastructure */
312 /* for ras itself. */
313 uint32_t hw_supported
;
314 /* for IP to check its ras ability. */
317 struct list_head head
;
321 struct device_attribute features_attr
;
322 struct bin_attribute badpages_attr
;
324 struct ras_manager
*objs
;
327 struct work_struct recovery_work
;
328 atomic_t in_recovery
;
329 struct amdgpu_device
*adev
;
330 /* error handler data */
331 struct ras_err_handler_data
*eh_data
;
332 struct mutex recovery_lock
;
336 struct amdgpu_ras_eeprom_control eeprom_control
;
341 char debugfs_name
[32];
344 struct ras_err_data
{
345 unsigned long ue_count
;
346 unsigned long ce_count
;
347 unsigned long err_addr_cnt
;
348 struct eeprom_table_record
*err_addr
;
351 struct ras_err_handler_data
{
352 /* point to bad page records array */
353 struct eeprom_table_record
*bps
;
354 /* point to reserved bo array */
355 struct amdgpu_bo
**bps_bo
;
356 /* the count of entries */
358 /* the space can place new entries */
360 /* last reserved entry's index + 1 */
364 typedef int (*ras_ih_cb
)(struct amdgpu_device
*adev
,
366 struct amdgpu_iv_entry
*entry
);
369 /* interrupt bottom half */
370 struct work_struct ih_work
;
374 /* full of entries */
376 unsigned int ring_size
;
377 unsigned int element_size
;
378 unsigned int aligned_element_size
;
384 struct ras_common_if head
;
385 /* reference count */
388 struct list_head node
;
390 struct amdgpu_device
*adev
;
394 struct device_attribute sysfs_attr
;
398 struct ras_fs_data fs_data
;
401 struct ras_ih_data ih_data
;
403 struct ras_err_data err_data
;
412 /* interfaces for IP */
414 struct ras_common_if head
;
416 char debugfs_name
[32];
419 struct ras_query_if
{
420 struct ras_common_if head
;
421 unsigned long ue_count
;
422 unsigned long ce_count
;
425 struct ras_inject_if
{
426 struct ras_common_if head
;
432 struct ras_common_if head
;
437 struct ras_common_if head
;
441 struct ras_dispatch_if
{
442 struct ras_common_if head
;
443 struct amdgpu_iv_entry
*entry
;
446 struct ras_debug_if
{
448 struct ras_common_if head
;
449 struct ras_inject_if inject
;
455 * 1: ras feature enable (enabled by default)
457 * 2: ras framework init (in ip_init)
460 * 4: debugfs/sysfs create
462 * 6: debugfs/sysfs remove
467 #define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras)
468 #define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con))
470 /* check if ras is supported on block, say, sdma, gfx */
471 static inline int amdgpu_ras_is_supported(struct amdgpu_device
*adev
,
474 struct amdgpu_ras
*ras
= amdgpu_ras_get_context(adev
);
476 if (block
>= AMDGPU_RAS_BLOCK_COUNT
)
478 return ras
&& (ras
->supported
& (1 << block
));
481 int amdgpu_ras_recovery_init(struct amdgpu_device
*adev
);
482 int amdgpu_ras_request_reset_on_boot(struct amdgpu_device
*adev
,
485 void amdgpu_ras_resume(struct amdgpu_device
*adev
);
486 void amdgpu_ras_suspend(struct amdgpu_device
*adev
);
488 unsigned long amdgpu_ras_query_error_count(struct amdgpu_device
*adev
,
491 /* error handling functions */
492 int amdgpu_ras_add_bad_pages(struct amdgpu_device
*adev
,
493 struct eeprom_table_record
*bps
, int pages
);
495 int amdgpu_ras_reserve_bad_pages(struct amdgpu_device
*adev
);
497 static inline int amdgpu_ras_reset_gpu(struct amdgpu_device
*adev
)
499 struct amdgpu_ras
*ras
= amdgpu_ras_get_context(adev
);
501 /* save bad page to eeprom before gpu reset,
502 * i2c may be unstable in gpu reset
505 amdgpu_ras_reserve_bad_pages(adev
);
507 if (atomic_cmpxchg(&ras
->in_recovery
, 0, 1) == 0)
508 schedule_work(&ras
->recovery_work
);
512 static inline enum ta_ras_block
513 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block
) {
515 case AMDGPU_RAS_BLOCK__UMC
:
516 return TA_RAS_BLOCK__UMC
;
517 case AMDGPU_RAS_BLOCK__SDMA
:
518 return TA_RAS_BLOCK__SDMA
;
519 case AMDGPU_RAS_BLOCK__GFX
:
520 return TA_RAS_BLOCK__GFX
;
521 case AMDGPU_RAS_BLOCK__MMHUB
:
522 return TA_RAS_BLOCK__MMHUB
;
523 case AMDGPU_RAS_BLOCK__ATHUB
:
524 return TA_RAS_BLOCK__ATHUB
;
525 case AMDGPU_RAS_BLOCK__PCIE_BIF
:
526 return TA_RAS_BLOCK__PCIE_BIF
;
527 case AMDGPU_RAS_BLOCK__HDP
:
528 return TA_RAS_BLOCK__HDP
;
529 case AMDGPU_RAS_BLOCK__XGMI_WAFL
:
530 return TA_RAS_BLOCK__XGMI_WAFL
;
531 case AMDGPU_RAS_BLOCK__DF
:
532 return TA_RAS_BLOCK__DF
;
533 case AMDGPU_RAS_BLOCK__SMN
:
534 return TA_RAS_BLOCK__SMN
;
535 case AMDGPU_RAS_BLOCK__SEM
:
536 return TA_RAS_BLOCK__SEM
;
537 case AMDGPU_RAS_BLOCK__MP0
:
538 return TA_RAS_BLOCK__MP0
;
539 case AMDGPU_RAS_BLOCK__MP1
:
540 return TA_RAS_BLOCK__MP1
;
541 case AMDGPU_RAS_BLOCK__FUSE
:
542 return TA_RAS_BLOCK__FUSE
;
544 WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block
);
545 return TA_RAS_BLOCK__UMC
;
549 static inline enum ta_ras_error_type
550 amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error
) {
552 case AMDGPU_RAS_ERROR__NONE
:
553 return TA_RAS_ERROR__NONE
;
554 case AMDGPU_RAS_ERROR__PARITY
:
555 return TA_RAS_ERROR__PARITY
;
556 case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE
:
557 return TA_RAS_ERROR__SINGLE_CORRECTABLE
;
558 case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE
:
559 return TA_RAS_ERROR__MULTI_UNCORRECTABLE
;
560 case AMDGPU_RAS_ERROR__POISON
:
561 return TA_RAS_ERROR__POISON
;
563 WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error
);
564 return TA_RAS_ERROR__NONE
;
568 /* called in ip_init and ip_fini */
569 int amdgpu_ras_init(struct amdgpu_device
*adev
);
570 int amdgpu_ras_fini(struct amdgpu_device
*adev
);
571 int amdgpu_ras_pre_fini(struct amdgpu_device
*adev
);
572 int amdgpu_ras_late_init(struct amdgpu_device
*adev
,
573 struct ras_common_if
*ras_block
,
574 struct ras_fs_if
*fs_info
,
575 struct ras_ih_if
*ih_info
);
576 void amdgpu_ras_late_fini(struct amdgpu_device
*adev
,
577 struct ras_common_if
*ras_block
,
578 struct ras_ih_if
*ih_info
);
580 int amdgpu_ras_feature_enable(struct amdgpu_device
*adev
,
581 struct ras_common_if
*head
, bool enable
);
583 int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device
*adev
,
584 struct ras_common_if
*head
, bool enable
);
586 int amdgpu_ras_sysfs_create(struct amdgpu_device
*adev
,
587 struct ras_fs_if
*head
);
589 int amdgpu_ras_sysfs_remove(struct amdgpu_device
*adev
,
590 struct ras_common_if
*head
);
592 void amdgpu_ras_debugfs_create(struct amdgpu_device
*adev
,
593 struct ras_fs_if
*head
);
595 void amdgpu_ras_debugfs_remove(struct amdgpu_device
*adev
,
596 struct ras_common_if
*head
);
598 int amdgpu_ras_error_query(struct amdgpu_device
*adev
,
599 struct ras_query_if
*info
);
601 int amdgpu_ras_error_inject(struct amdgpu_device
*adev
,
602 struct ras_inject_if
*info
);
604 int amdgpu_ras_interrupt_add_handler(struct amdgpu_device
*adev
,
605 struct ras_ih_if
*info
);
607 int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device
*adev
,
608 struct ras_ih_if
*info
);
610 int amdgpu_ras_interrupt_dispatch(struct amdgpu_device
*adev
,
611 struct ras_dispatch_if
*info
);
613 struct ras_manager
*amdgpu_ras_find_obj(struct amdgpu_device
*adev
,
614 struct ras_common_if
*head
);
616 extern atomic_t amdgpu_ras_in_intr
;
618 static inline bool amdgpu_ras_intr_triggered(void)
620 return !!atomic_read(&amdgpu_ras_in_intr
);
623 static inline void amdgpu_ras_intr_cleared(void)
625 atomic_set(&amdgpu_ras_in_intr
, 0);
628 void amdgpu_ras_global_ras_isr(struct amdgpu_device
*adev
);