2 * Copyright 2019 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "amdgpu_ras.h"
26 int amdgpu_umc_ras_late_init(struct amdgpu_device
*adev
)
29 struct ras_fs_if fs_info
= {
30 .sysfs_name
= "umc_err_count",
31 .debugfs_name
= "umc_err_inject",
33 struct ras_ih_if ih_info
= {
34 .cb
= amdgpu_umc_process_ras_data_cb
,
37 if (!adev
->umc
.ras_if
) {
39 kmalloc(sizeof(struct ras_common_if
), GFP_KERNEL
);
40 if (!adev
->umc
.ras_if
)
42 adev
->umc
.ras_if
->block
= AMDGPU_RAS_BLOCK__UMC
;
43 adev
->umc
.ras_if
->type
= AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE
;
44 adev
->umc
.ras_if
->sub_block_index
= 0;
45 strcpy(adev
->umc
.ras_if
->name
, "umc");
47 ih_info
.head
= fs_info
.head
= *adev
->umc
.ras_if
;
49 r
= amdgpu_ras_late_init(adev
, adev
->umc
.ras_if
,
54 if (amdgpu_ras_is_supported(adev
, adev
->umc
.ras_if
->block
)) {
55 r
= amdgpu_irq_get(adev
, &adev
->gmc
.ecc_irq
, 0);
63 /* ras init of specific umc version */
64 if (adev
->umc
.funcs
&& adev
->umc
.funcs
->err_cnt_init
)
65 adev
->umc
.funcs
->err_cnt_init(adev
);
70 amdgpu_ras_late_fini(adev
, adev
->umc
.ras_if
, &ih_info
);
72 kfree(adev
->umc
.ras_if
);
73 adev
->umc
.ras_if
= NULL
;
77 void amdgpu_umc_ras_fini(struct amdgpu_device
*adev
)
79 if (amdgpu_ras_is_supported(adev
, AMDGPU_RAS_BLOCK__UMC
) &&
81 struct ras_common_if
*ras_if
= adev
->umc
.ras_if
;
82 struct ras_ih_if ih_info
= {
84 .cb
= amdgpu_umc_process_ras_data_cb
,
87 amdgpu_ras_late_fini(adev
, ras_if
, &ih_info
);
92 int amdgpu_umc_process_ras_data_cb(struct amdgpu_device
*adev
,
93 void *ras_error_status
,
94 struct amdgpu_iv_entry
*entry
)
96 struct ras_err_data
*err_data
= (struct ras_err_data
*)ras_error_status
;
98 kgd2kfd_set_sram_ecc_flag(adev
->kfd
.dev
);
99 if (adev
->umc
.funcs
&&
100 adev
->umc
.funcs
->query_ras_error_count
)
101 adev
->umc
.funcs
->query_ras_error_count(adev
, ras_error_status
);
103 if (adev
->umc
.funcs
&&
104 adev
->umc
.funcs
->query_ras_error_address
&&
105 adev
->umc
.max_ras_err_cnt_per_query
) {
107 kcalloc(adev
->umc
.max_ras_err_cnt_per_query
,
108 sizeof(struct eeprom_table_record
), GFP_KERNEL
);
110 /* still call query_ras_error_address to clear error status
111 * even NOMEM error is encountered
113 if(!err_data
->err_addr
)
114 DRM_WARN("Failed to alloc memory for umc error address record!\n");
116 /* umc query_ras_error_address is also responsible for clearing
119 adev
->umc
.funcs
->query_ras_error_address(adev
, ras_error_status
);
122 /* only uncorrectable error needs gpu reset */
123 if (err_data
->ue_count
) {
124 if (err_data
->err_addr_cnt
&&
125 amdgpu_ras_add_bad_pages(adev
, err_data
->err_addr
,
126 err_data
->err_addr_cnt
))
127 DRM_WARN("Failed to add ras bad page!\n");
129 amdgpu_ras_reset_gpu(adev
);
132 kfree(err_data
->err_addr
);
133 return AMDGPU_RAS_SUCCESS
;
136 int amdgpu_umc_process_ecc_irq(struct amdgpu_device
*adev
,
137 struct amdgpu_irq_src
*source
,
138 struct amdgpu_iv_entry
*entry
)
140 struct ras_common_if
*ras_if
= adev
->umc
.ras_if
;
141 struct ras_dispatch_if ih_data
= {
148 ih_data
.head
= *ras_if
;
150 amdgpu_ras_interrupt_dispatch(adev
, &ih_data
);