treewide: remove redundant IS_ERR() before error code check
[linux/fpc-iii.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
blob46ab46757b25756afa0c8e37ce57b9441ec702bd
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
49 #include "amdgpu_ras.h"
51 #define GFX9_NUM_GFX_RINGS 1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
56 #define mmPWR_MISC_CNTL_STATUS 0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
63 #define mmGCEA_PROBE_MAP 0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX 0
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
123 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
125 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
127 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
129 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
131 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
134 enum ta_ras_gfx_subblock {
135 /*CPC*/
136 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138 TA_RAS_BLOCK__GFX_CPC_UCODE,
139 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146 /* CPF*/
147 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150 TA_RAS_BLOCK__GFX_CPF_TAG,
151 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152 /* CPG*/
153 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156 TA_RAS_BLOCK__GFX_CPG_TAG,
157 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158 /* GDS*/
159 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166 /* SPI*/
167 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168 /* SQ*/
169 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171 TA_RAS_BLOCK__GFX_SQ_LDS_D,
172 TA_RAS_BLOCK__GFX_SQ_LDS_I,
173 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175 /* SQC (3 ranges)*/
176 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177 /* SQC range 0*/
178 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 /* SQC range 1*/
190 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 /* SQC range 2*/
204 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218 /* TA*/
219 TA_RAS_BLOCK__GFX_TA_INDEX_START,
220 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226 /* TCA*/
227 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231 /* TCC (5 sub-ranges)*/
232 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233 /* TCC range 0*/
234 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244 /* TCC range 1*/
245 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 /* TCC range 2*/
251 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 /* TCC range 3*/
263 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 /* TCC range 4*/
269 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276 /* TCI*/
277 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278 /* TCP*/
279 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288 /* TD*/
289 TA_RAS_BLOCK__GFX_TD_INDEX_START,
290 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294 /* EA (3 sub-ranges)*/
295 TA_RAS_BLOCK__GFX_EA_INDEX_START,
296 /* EA range 0*/
297 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307 /* EA range 1*/
308 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317 /* EA range 2*/
318 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325 /* UTC VM L2 bank*/
326 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327 /* UTC VM walker*/
328 TA_RAS_BLOCK__UTC_VML2_WALKER,
329 /* UTC ATC L2 2MB cache*/
330 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331 /* UTC ATC L2 4KB cache*/
332 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333 TA_RAS_BLOCK__GFX_MAX
336 struct ras_gfx_subblock {
337 unsigned char *name;
338 int ta_subblock;
339 int hw_supported_error_type;
340 int sw_supported_error_type;
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
344 [AMDGPU_RAS_BLOCK__##subblock] = { \
345 #subblock, \
346 TA_RAS_BLOCK__##subblock, \
347 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
348 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
370 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
372 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380 0, 0),
381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384 0, 0),
385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388 0, 0),
389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394 0, 0, 0),
395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406 0, 0),
407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 0, 0, 0),
413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 0, 0),
425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
427 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462 0, 0),
463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
465 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
700 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
712 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
723 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
724 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
725 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
727 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
729 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
731 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
732 struct amdgpu_cu_info *cu_info);
733 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
734 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
735 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
736 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
737 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
738 void *ras_error_status);
739 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
740 void *inject_if);
742 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
743 uint64_t queue_mask)
745 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
746 amdgpu_ring_write(kiq_ring,
747 PACKET3_SET_RESOURCES_VMID_MASK(0) |
748 /* vmid_mask:0* queue_type:0 (KIQ) */
749 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
750 amdgpu_ring_write(kiq_ring,
751 lower_32_bits(queue_mask)); /* queue mask lo */
752 amdgpu_ring_write(kiq_ring,
753 upper_32_bits(queue_mask)); /* queue mask hi */
754 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
755 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
756 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
757 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
760 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
761 struct amdgpu_ring *ring)
763 struct amdgpu_device *adev = kiq_ring->adev;
764 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
765 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
766 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
768 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
769 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
770 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
771 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
772 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
773 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
774 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
775 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
776 /*queue_type: normal compute queue */
777 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
778 /* alloc format: all_on_one_pipe */
779 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
780 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
781 /* num_queues: must be 1 */
782 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
783 amdgpu_ring_write(kiq_ring,
784 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
785 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
786 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
787 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
788 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
791 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
792 struct amdgpu_ring *ring,
793 enum amdgpu_unmap_queues_action action,
794 u64 gpu_addr, u64 seq)
796 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
798 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
799 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
800 PACKET3_UNMAP_QUEUES_ACTION(action) |
801 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
802 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
803 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
804 amdgpu_ring_write(kiq_ring,
805 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
807 if (action == PREEMPT_QUEUES_NO_UNMAP) {
808 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
809 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
810 amdgpu_ring_write(kiq_ring, seq);
811 } else {
812 amdgpu_ring_write(kiq_ring, 0);
813 amdgpu_ring_write(kiq_ring, 0);
814 amdgpu_ring_write(kiq_ring, 0);
818 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
819 struct amdgpu_ring *ring,
820 u64 addr,
821 u64 seq)
823 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
825 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
826 amdgpu_ring_write(kiq_ring,
827 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
828 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
829 PACKET3_QUERY_STATUS_COMMAND(2));
830 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
831 amdgpu_ring_write(kiq_ring,
832 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
833 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
834 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
835 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
836 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
837 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
840 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
841 uint16_t pasid, uint32_t flush_type,
842 bool all_hub)
844 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
845 amdgpu_ring_write(kiq_ring,
846 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
847 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
848 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
849 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
852 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
853 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
854 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
855 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
856 .kiq_query_status = gfx_v9_0_kiq_query_status,
857 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
858 .set_resources_size = 8,
859 .map_queues_size = 7,
860 .unmap_queues_size = 6,
861 .query_status_size = 7,
862 .invalidate_tlbs_size = 12,
865 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
867 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
870 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
872 switch (adev->asic_type) {
873 case CHIP_VEGA10:
874 soc15_program_register_sequence(adev,
875 golden_settings_gc_9_0,
876 ARRAY_SIZE(golden_settings_gc_9_0));
877 soc15_program_register_sequence(adev,
878 golden_settings_gc_9_0_vg10,
879 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
880 break;
881 case CHIP_VEGA12:
882 soc15_program_register_sequence(adev,
883 golden_settings_gc_9_2_1,
884 ARRAY_SIZE(golden_settings_gc_9_2_1));
885 soc15_program_register_sequence(adev,
886 golden_settings_gc_9_2_1_vg12,
887 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
888 break;
889 case CHIP_VEGA20:
890 soc15_program_register_sequence(adev,
891 golden_settings_gc_9_0,
892 ARRAY_SIZE(golden_settings_gc_9_0));
893 soc15_program_register_sequence(adev,
894 golden_settings_gc_9_0_vg20,
895 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
896 break;
897 case CHIP_ARCTURUS:
898 soc15_program_register_sequence(adev,
899 golden_settings_gc_9_4_1_arct,
900 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
901 break;
902 case CHIP_RAVEN:
903 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
904 ARRAY_SIZE(golden_settings_gc_9_1));
905 if (adev->rev_id >= 8)
906 soc15_program_register_sequence(adev,
907 golden_settings_gc_9_1_rv2,
908 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
909 else
910 soc15_program_register_sequence(adev,
911 golden_settings_gc_9_1_rv1,
912 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
913 break;
914 case CHIP_RENOIR:
915 soc15_program_register_sequence(adev,
916 golden_settings_gc_9_1_rn,
917 ARRAY_SIZE(golden_settings_gc_9_1_rn));
918 return; /* for renoir, don't need common goldensetting */
919 default:
920 break;
923 if (adev->asic_type != CHIP_ARCTURUS)
924 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
925 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
928 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
930 adev->gfx.scratch.num_reg = 8;
931 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
932 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
935 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
936 bool wc, uint32_t reg, uint32_t val)
938 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
939 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
940 WRITE_DATA_DST_SEL(0) |
941 (wc ? WR_CONFIRM : 0));
942 amdgpu_ring_write(ring, reg);
943 amdgpu_ring_write(ring, 0);
944 amdgpu_ring_write(ring, val);
947 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
948 int mem_space, int opt, uint32_t addr0,
949 uint32_t addr1, uint32_t ref, uint32_t mask,
950 uint32_t inv)
952 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
953 amdgpu_ring_write(ring,
954 /* memory (1) or register (0) */
955 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
956 WAIT_REG_MEM_OPERATION(opt) | /* wait */
957 WAIT_REG_MEM_FUNCTION(3) | /* equal */
958 WAIT_REG_MEM_ENGINE(eng_sel)));
960 if (mem_space)
961 BUG_ON(addr0 & 0x3); /* Dword align */
962 amdgpu_ring_write(ring, addr0);
963 amdgpu_ring_write(ring, addr1);
964 amdgpu_ring_write(ring, ref);
965 amdgpu_ring_write(ring, mask);
966 amdgpu_ring_write(ring, inv); /* poll interval */
969 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
971 struct amdgpu_device *adev = ring->adev;
972 uint32_t scratch;
973 uint32_t tmp = 0;
974 unsigned i;
975 int r;
977 r = amdgpu_gfx_scratch_get(adev, &scratch);
978 if (r)
979 return r;
981 WREG32(scratch, 0xCAFEDEAD);
982 r = amdgpu_ring_alloc(ring, 3);
983 if (r)
984 goto error_free_scratch;
986 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
987 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
988 amdgpu_ring_write(ring, 0xDEADBEEF);
989 amdgpu_ring_commit(ring);
991 for (i = 0; i < adev->usec_timeout; i++) {
992 tmp = RREG32(scratch);
993 if (tmp == 0xDEADBEEF)
994 break;
995 udelay(1);
998 if (i >= adev->usec_timeout)
999 r = -ETIMEDOUT;
1001 error_free_scratch:
1002 amdgpu_gfx_scratch_free(adev, scratch);
1003 return r;
1006 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1008 struct amdgpu_device *adev = ring->adev;
1009 struct amdgpu_ib ib;
1010 struct dma_fence *f = NULL;
1012 unsigned index;
1013 uint64_t gpu_addr;
1014 uint32_t tmp;
1015 long r;
1017 r = amdgpu_device_wb_get(adev, &index);
1018 if (r)
1019 return r;
1021 gpu_addr = adev->wb.gpu_addr + (index * 4);
1022 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1023 memset(&ib, 0, sizeof(ib));
1024 r = amdgpu_ib_get(adev, NULL, 16, &ib);
1025 if (r)
1026 goto err1;
1028 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1029 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1030 ib.ptr[2] = lower_32_bits(gpu_addr);
1031 ib.ptr[3] = upper_32_bits(gpu_addr);
1032 ib.ptr[4] = 0xDEADBEEF;
1033 ib.length_dw = 5;
1035 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1036 if (r)
1037 goto err2;
1039 r = dma_fence_wait_timeout(f, false, timeout);
1040 if (r == 0) {
1041 r = -ETIMEDOUT;
1042 goto err2;
1043 } else if (r < 0) {
1044 goto err2;
1047 tmp = adev->wb.wb[index];
1048 if (tmp == 0xDEADBEEF)
1049 r = 0;
1050 else
1051 r = -EINVAL;
1053 err2:
1054 amdgpu_ib_free(adev, &ib, NULL);
1055 dma_fence_put(f);
1056 err1:
1057 amdgpu_device_wb_free(adev, index);
1058 return r;
1062 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1064 release_firmware(adev->gfx.pfp_fw);
1065 adev->gfx.pfp_fw = NULL;
1066 release_firmware(adev->gfx.me_fw);
1067 adev->gfx.me_fw = NULL;
1068 release_firmware(adev->gfx.ce_fw);
1069 adev->gfx.ce_fw = NULL;
1070 release_firmware(adev->gfx.rlc_fw);
1071 adev->gfx.rlc_fw = NULL;
1072 release_firmware(adev->gfx.mec_fw);
1073 adev->gfx.mec_fw = NULL;
1074 release_firmware(adev->gfx.mec2_fw);
1075 adev->gfx.mec2_fw = NULL;
1077 kfree(adev->gfx.rlc.register_list_format);
1080 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1082 const struct rlc_firmware_header_v2_1 *rlc_hdr;
1084 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1085 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1086 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1087 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1088 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1089 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1090 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1091 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1092 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1093 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1094 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1095 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1096 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1097 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1098 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1101 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1103 adev->gfx.me_fw_write_wait = false;
1104 adev->gfx.mec_fw_write_wait = false;
1106 if ((adev->gfx.mec_fw_version < 0x000001a5) ||
1107 (adev->gfx.mec_feature_version < 46) ||
1108 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1109 (adev->gfx.pfp_feature_version < 46))
1110 DRM_WARN_ONCE("CP firmware version too old, please update!");
1112 switch (adev->asic_type) {
1113 case CHIP_VEGA10:
1114 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1115 (adev->gfx.me_feature_version >= 42) &&
1116 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1117 (adev->gfx.pfp_feature_version >= 42))
1118 adev->gfx.me_fw_write_wait = true;
1120 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1121 (adev->gfx.mec_feature_version >= 42))
1122 adev->gfx.mec_fw_write_wait = true;
1123 break;
1124 case CHIP_VEGA12:
1125 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1126 (adev->gfx.me_feature_version >= 44) &&
1127 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1128 (adev->gfx.pfp_feature_version >= 44))
1129 adev->gfx.me_fw_write_wait = true;
1131 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1132 (adev->gfx.mec_feature_version >= 44))
1133 adev->gfx.mec_fw_write_wait = true;
1134 break;
1135 case CHIP_VEGA20:
1136 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1137 (adev->gfx.me_feature_version >= 44) &&
1138 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1139 (adev->gfx.pfp_feature_version >= 44))
1140 adev->gfx.me_fw_write_wait = true;
1142 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1143 (adev->gfx.mec_feature_version >= 44))
1144 adev->gfx.mec_fw_write_wait = true;
1145 break;
1146 case CHIP_RAVEN:
1147 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1148 (adev->gfx.me_feature_version >= 42) &&
1149 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1150 (adev->gfx.pfp_feature_version >= 42))
1151 adev->gfx.me_fw_write_wait = true;
1153 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1154 (adev->gfx.mec_feature_version >= 42))
1155 adev->gfx.mec_fw_write_wait = true;
1156 break;
1157 default:
1158 break;
1162 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1164 switch (adev->asic_type) {
1165 case CHIP_VEGA10:
1166 case CHIP_VEGA12:
1167 case CHIP_VEGA20:
1168 break;
1169 case CHIP_RAVEN:
1170 if (!(adev->rev_id >= 0x8 ||
1171 adev->pdev->device == 0x15d8) &&
1172 (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
1173 !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
1174 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1176 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1177 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1178 AMD_PG_SUPPORT_CP |
1179 AMD_PG_SUPPORT_RLC_SMU_HS;
1180 break;
1181 case CHIP_RENOIR:
1182 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1183 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1184 AMD_PG_SUPPORT_CP |
1185 AMD_PG_SUPPORT_RLC_SMU_HS;
1186 break;
1187 default:
1188 break;
1192 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1193 const char *chip_name)
1195 char fw_name[30];
1196 int err;
1197 struct amdgpu_firmware_info *info = NULL;
1198 const struct common_firmware_header *header = NULL;
1199 const struct gfx_firmware_header_v1_0 *cp_hdr;
1201 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1202 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1203 if (err)
1204 goto out;
1205 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1206 if (err)
1207 goto out;
1208 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1209 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1210 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1212 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1213 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1214 if (err)
1215 goto out;
1216 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1217 if (err)
1218 goto out;
1219 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1220 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1221 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1223 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1224 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1225 if (err)
1226 goto out;
1227 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1228 if (err)
1229 goto out;
1230 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1231 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1232 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1234 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1235 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1236 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1237 info->fw = adev->gfx.pfp_fw;
1238 header = (const struct common_firmware_header *)info->fw->data;
1239 adev->firmware.fw_size +=
1240 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1242 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1243 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1244 info->fw = adev->gfx.me_fw;
1245 header = (const struct common_firmware_header *)info->fw->data;
1246 adev->firmware.fw_size +=
1247 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1249 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1250 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1251 info->fw = adev->gfx.ce_fw;
1252 header = (const struct common_firmware_header *)info->fw->data;
1253 adev->firmware.fw_size +=
1254 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1257 out:
1258 if (err) {
1259 dev_err(adev->dev,
1260 "gfx9: Failed to load firmware \"%s\"\n",
1261 fw_name);
1262 release_firmware(adev->gfx.pfp_fw);
1263 adev->gfx.pfp_fw = NULL;
1264 release_firmware(adev->gfx.me_fw);
1265 adev->gfx.me_fw = NULL;
1266 release_firmware(adev->gfx.ce_fw);
1267 adev->gfx.ce_fw = NULL;
1269 return err;
1272 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1273 const char *chip_name)
1275 char fw_name[30];
1276 int err;
1277 struct amdgpu_firmware_info *info = NULL;
1278 const struct common_firmware_header *header = NULL;
1279 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1280 unsigned int *tmp = NULL;
1281 unsigned int i = 0;
1282 uint16_t version_major;
1283 uint16_t version_minor;
1284 uint32_t smu_version;
1287 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1288 * instead of picasso_rlc.bin.
1289 * Judgment method:
1290 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1291 * or revision >= 0xD8 && revision <= 0xDF
1292 * otherwise is PCO FP5
1294 if (!strcmp(chip_name, "picasso") &&
1295 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1296 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1297 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1298 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1299 (smu_version >= 0x41e2b))
1301 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1303 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1304 else
1305 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1306 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1307 if (err)
1308 goto out;
1309 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1310 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1312 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1313 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1314 if (version_major == 2 && version_minor == 1)
1315 adev->gfx.rlc.is_rlc_v2_1 = true;
1317 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1318 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1319 adev->gfx.rlc.save_and_restore_offset =
1320 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1321 adev->gfx.rlc.clear_state_descriptor_offset =
1322 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1323 adev->gfx.rlc.avail_scratch_ram_locations =
1324 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1325 adev->gfx.rlc.reg_restore_list_size =
1326 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1327 adev->gfx.rlc.reg_list_format_start =
1328 le32_to_cpu(rlc_hdr->reg_list_format_start);
1329 adev->gfx.rlc.reg_list_format_separate_start =
1330 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1331 adev->gfx.rlc.starting_offsets_start =
1332 le32_to_cpu(rlc_hdr->starting_offsets_start);
1333 adev->gfx.rlc.reg_list_format_size_bytes =
1334 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1335 adev->gfx.rlc.reg_list_size_bytes =
1336 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1337 adev->gfx.rlc.register_list_format =
1338 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1339 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1340 if (!adev->gfx.rlc.register_list_format) {
1341 err = -ENOMEM;
1342 goto out;
1345 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1346 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1347 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1348 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1350 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1352 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1353 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1354 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1355 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1357 if (adev->gfx.rlc.is_rlc_v2_1)
1358 gfx_v9_0_init_rlc_ext_microcode(adev);
1360 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1361 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1362 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1363 info->fw = adev->gfx.rlc_fw;
1364 header = (const struct common_firmware_header *)info->fw->data;
1365 adev->firmware.fw_size +=
1366 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1368 if (adev->gfx.rlc.is_rlc_v2_1 &&
1369 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1370 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1371 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1372 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1373 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1374 info->fw = adev->gfx.rlc_fw;
1375 adev->firmware.fw_size +=
1376 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1378 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1379 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1380 info->fw = adev->gfx.rlc_fw;
1381 adev->firmware.fw_size +=
1382 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1384 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1385 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1386 info->fw = adev->gfx.rlc_fw;
1387 adev->firmware.fw_size +=
1388 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1392 out:
1393 if (err) {
1394 dev_err(adev->dev,
1395 "gfx9: Failed to load firmware \"%s\"\n",
1396 fw_name);
1397 release_firmware(adev->gfx.rlc_fw);
1398 adev->gfx.rlc_fw = NULL;
1400 return err;
1403 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1404 const char *chip_name)
1406 char fw_name[30];
1407 int err;
1408 struct amdgpu_firmware_info *info = NULL;
1409 const struct common_firmware_header *header = NULL;
1410 const struct gfx_firmware_header_v1_0 *cp_hdr;
1412 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1413 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1414 if (err)
1415 goto out;
1416 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1417 if (err)
1418 goto out;
1419 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1420 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1421 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1424 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1425 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1426 if (!err) {
1427 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1428 if (err)
1429 goto out;
1430 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1431 adev->gfx.mec2_fw->data;
1432 adev->gfx.mec2_fw_version =
1433 le32_to_cpu(cp_hdr->header.ucode_version);
1434 adev->gfx.mec2_feature_version =
1435 le32_to_cpu(cp_hdr->ucode_feature_version);
1436 } else {
1437 err = 0;
1438 adev->gfx.mec2_fw = NULL;
1441 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1442 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1443 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1444 info->fw = adev->gfx.mec_fw;
1445 header = (const struct common_firmware_header *)info->fw->data;
1446 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1447 adev->firmware.fw_size +=
1448 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1450 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1451 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1452 info->fw = adev->gfx.mec_fw;
1453 adev->firmware.fw_size +=
1454 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1456 if (adev->gfx.mec2_fw) {
1457 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1458 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1459 info->fw = adev->gfx.mec2_fw;
1460 header = (const struct common_firmware_header *)info->fw->data;
1461 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1462 adev->firmware.fw_size +=
1463 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1465 /* TODO: Determine if MEC2 JT FW loading can be removed
1466 for all GFX V9 asic and above */
1467 if (adev->asic_type != CHIP_ARCTURUS &&
1468 adev->asic_type != CHIP_RENOIR) {
1469 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1470 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1471 info->fw = adev->gfx.mec2_fw;
1472 adev->firmware.fw_size +=
1473 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1474 PAGE_SIZE);
1479 out:
1480 gfx_v9_0_check_if_need_gfxoff(adev);
1481 gfx_v9_0_check_fw_write_wait(adev);
1482 if (err) {
1483 dev_err(adev->dev,
1484 "gfx9: Failed to load firmware \"%s\"\n",
1485 fw_name);
1486 release_firmware(adev->gfx.mec_fw);
1487 adev->gfx.mec_fw = NULL;
1488 release_firmware(adev->gfx.mec2_fw);
1489 adev->gfx.mec2_fw = NULL;
1491 return err;
1494 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1496 const char *chip_name;
1497 int r;
1499 DRM_DEBUG("\n");
1501 switch (adev->asic_type) {
1502 case CHIP_VEGA10:
1503 chip_name = "vega10";
1504 break;
1505 case CHIP_VEGA12:
1506 chip_name = "vega12";
1507 break;
1508 case CHIP_VEGA20:
1509 chip_name = "vega20";
1510 break;
1511 case CHIP_RAVEN:
1512 if (adev->rev_id >= 8)
1513 chip_name = "raven2";
1514 else if (adev->pdev->device == 0x15d8)
1515 chip_name = "picasso";
1516 else
1517 chip_name = "raven";
1518 break;
1519 case CHIP_ARCTURUS:
1520 chip_name = "arcturus";
1521 break;
1522 case CHIP_RENOIR:
1523 chip_name = "renoir";
1524 break;
1525 default:
1526 BUG();
1529 /* No CPG in Arcturus */
1530 if (adev->asic_type != CHIP_ARCTURUS) {
1531 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1532 if (r)
1533 return r;
1536 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1537 if (r)
1538 return r;
1540 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1541 if (r)
1542 return r;
1544 return r;
1547 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1549 u32 count = 0;
1550 const struct cs_section_def *sect = NULL;
1551 const struct cs_extent_def *ext = NULL;
1553 /* begin clear state */
1554 count += 2;
1555 /* context control state */
1556 count += 3;
1558 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1559 for (ext = sect->section; ext->extent != NULL; ++ext) {
1560 if (sect->id == SECT_CONTEXT)
1561 count += 2 + ext->reg_count;
1562 else
1563 return 0;
1567 /* end clear state */
1568 count += 2;
1569 /* clear state */
1570 count += 2;
1572 return count;
1575 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1576 volatile u32 *buffer)
1578 u32 count = 0, i;
1579 const struct cs_section_def *sect = NULL;
1580 const struct cs_extent_def *ext = NULL;
1582 if (adev->gfx.rlc.cs_data == NULL)
1583 return;
1584 if (buffer == NULL)
1585 return;
1587 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1588 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1590 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1591 buffer[count++] = cpu_to_le32(0x80000000);
1592 buffer[count++] = cpu_to_le32(0x80000000);
1594 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1595 for (ext = sect->section; ext->extent != NULL; ++ext) {
1596 if (sect->id == SECT_CONTEXT) {
1597 buffer[count++] =
1598 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1599 buffer[count++] = cpu_to_le32(ext->reg_index -
1600 PACKET3_SET_CONTEXT_REG_START);
1601 for (i = 0; i < ext->reg_count; i++)
1602 buffer[count++] = cpu_to_le32(ext->extent[i]);
1603 } else {
1604 return;
1609 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1610 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1612 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1613 buffer[count++] = cpu_to_le32(0);
1616 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1618 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1619 uint32_t pg_always_on_cu_num = 2;
1620 uint32_t always_on_cu_num;
1621 uint32_t i, j, k;
1622 uint32_t mask, cu_bitmap, counter;
1624 if (adev->flags & AMD_IS_APU)
1625 always_on_cu_num = 4;
1626 else if (adev->asic_type == CHIP_VEGA12)
1627 always_on_cu_num = 8;
1628 else
1629 always_on_cu_num = 12;
1631 mutex_lock(&adev->grbm_idx_mutex);
1632 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1633 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1634 mask = 1;
1635 cu_bitmap = 0;
1636 counter = 0;
1637 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1639 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1640 if (cu_info->bitmap[i][j] & mask) {
1641 if (counter == pg_always_on_cu_num)
1642 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1643 if (counter < always_on_cu_num)
1644 cu_bitmap |= mask;
1645 else
1646 break;
1647 counter++;
1649 mask <<= 1;
1652 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1653 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1656 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1657 mutex_unlock(&adev->grbm_idx_mutex);
1660 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1662 uint32_t data;
1664 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1665 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1666 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1667 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1668 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1670 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1671 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1673 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1674 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1676 mutex_lock(&adev->grbm_idx_mutex);
1677 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1678 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1679 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1681 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1682 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1683 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1684 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1685 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1687 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1688 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1689 data &= 0x0000FFFF;
1690 data |= 0x00C00000;
1691 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1694 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1695 * programmed in gfx_v9_0_init_always_on_cu_mask()
1698 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1699 * but used for RLC_LB_CNTL configuration */
1700 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1701 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1702 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1703 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1704 mutex_unlock(&adev->grbm_idx_mutex);
1706 gfx_v9_0_init_always_on_cu_mask(adev);
1709 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1711 uint32_t data;
1713 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1714 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1716 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1717 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1719 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1720 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1722 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1723 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1725 mutex_lock(&adev->grbm_idx_mutex);
1726 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1727 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1728 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1730 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1731 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1732 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1733 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1734 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1736 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1737 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1738 data &= 0x0000FFFF;
1739 data |= 0x00C00000;
1740 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1743 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1744 * programmed in gfx_v9_0_init_always_on_cu_mask()
1747 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1748 * but used for RLC_LB_CNTL configuration */
1749 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1750 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1751 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1752 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1753 mutex_unlock(&adev->grbm_idx_mutex);
1755 gfx_v9_0_init_always_on_cu_mask(adev);
1758 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1760 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1763 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1765 return 5;
1768 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1770 const struct cs_section_def *cs_data;
1771 int r;
1773 adev->gfx.rlc.cs_data = gfx9_cs_data;
1775 cs_data = adev->gfx.rlc.cs_data;
1777 if (cs_data) {
1778 /* init clear state block */
1779 r = amdgpu_gfx_rlc_init_csb(adev);
1780 if (r)
1781 return r;
1784 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1785 /* TODO: double check the cp_table_size for RV */
1786 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1787 r = amdgpu_gfx_rlc_init_cpt(adev);
1788 if (r)
1789 return r;
1792 switch (adev->asic_type) {
1793 case CHIP_RAVEN:
1794 gfx_v9_0_init_lbpw(adev);
1795 break;
1796 case CHIP_VEGA20:
1797 gfx_v9_4_init_lbpw(adev);
1798 break;
1799 default:
1800 break;
1803 return 0;
1806 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1808 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1809 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1812 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1814 int r;
1815 u32 *hpd;
1816 const __le32 *fw_data;
1817 unsigned fw_size;
1818 u32 *fw;
1819 size_t mec_hpd_size;
1821 const struct gfx_firmware_header_v1_0 *mec_hdr;
1823 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1825 /* take ownership of the relevant compute queues */
1826 amdgpu_gfx_compute_queue_acquire(adev);
1827 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1829 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1830 AMDGPU_GEM_DOMAIN_VRAM,
1831 &adev->gfx.mec.hpd_eop_obj,
1832 &adev->gfx.mec.hpd_eop_gpu_addr,
1833 (void **)&hpd);
1834 if (r) {
1835 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1836 gfx_v9_0_mec_fini(adev);
1837 return r;
1840 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1842 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1843 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1845 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1847 fw_data = (const __le32 *)
1848 (adev->gfx.mec_fw->data +
1849 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1850 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1852 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1853 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1854 &adev->gfx.mec.mec_fw_obj,
1855 &adev->gfx.mec.mec_fw_gpu_addr,
1856 (void **)&fw);
1857 if (r) {
1858 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1859 gfx_v9_0_mec_fini(adev);
1860 return r;
1863 memcpy(fw, fw_data, fw_size);
1865 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1866 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1868 return 0;
1871 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1873 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1874 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1875 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1876 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1877 (SQ_IND_INDEX__FORCE_READ_MASK));
1878 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1881 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1882 uint32_t wave, uint32_t thread,
1883 uint32_t regno, uint32_t num, uint32_t *out)
1885 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1886 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1887 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1888 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1889 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1890 (SQ_IND_INDEX__FORCE_READ_MASK) |
1891 (SQ_IND_INDEX__AUTO_INCR_MASK));
1892 while (num--)
1893 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1896 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1898 /* type 1 wave data */
1899 dst[(*no_fields)++] = 1;
1900 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1901 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1902 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1903 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1904 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1905 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1906 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1907 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1908 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1909 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1910 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1911 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1912 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1913 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1917 uint32_t wave, uint32_t start,
1918 uint32_t size, uint32_t *dst)
1920 wave_read_regs(
1921 adev, simd, wave, 0,
1922 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1926 uint32_t wave, uint32_t thread,
1927 uint32_t start, uint32_t size,
1928 uint32_t *dst)
1930 wave_read_regs(
1931 adev, simd, wave, thread,
1932 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1936 u32 me, u32 pipe, u32 q, u32 vm)
1938 soc15_grbm_select(adev, me, pipe, q, vm);
1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1942 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1943 .select_se_sh = &gfx_v9_0_select_se_sh,
1944 .read_wave_data = &gfx_v9_0_read_wave_data,
1945 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1946 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1947 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1948 .ras_error_inject = &gfx_v9_0_ras_error_inject,
1949 .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1952 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1954 u32 gb_addr_config;
1955 int err;
1957 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1959 switch (adev->asic_type) {
1960 case CHIP_VEGA10:
1961 adev->gfx.config.max_hw_contexts = 8;
1962 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1963 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1964 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1965 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1966 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1967 break;
1968 case CHIP_VEGA12:
1969 adev->gfx.config.max_hw_contexts = 8;
1970 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1971 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1972 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1973 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1974 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1975 DRM_INFO("fix gfx.config for vega12\n");
1976 break;
1977 case CHIP_VEGA20:
1978 adev->gfx.config.max_hw_contexts = 8;
1979 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1980 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1981 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1982 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1983 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1984 gb_addr_config &= ~0xf3e777ff;
1985 gb_addr_config |= 0x22014042;
1986 /* check vbios table if gpu info is not available */
1987 err = amdgpu_atomfirmware_get_gfx_info(adev);
1988 if (err)
1989 return err;
1990 break;
1991 case CHIP_RAVEN:
1992 adev->gfx.config.max_hw_contexts = 8;
1993 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1994 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1995 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1996 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1997 if (adev->rev_id >= 8)
1998 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1999 else
2000 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2001 break;
2002 case CHIP_ARCTURUS:
2003 adev->gfx.config.max_hw_contexts = 8;
2004 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2005 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2006 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2007 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2008 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2009 gb_addr_config &= ~0xf3e777ff;
2010 gb_addr_config |= 0x22014042;
2011 break;
2012 case CHIP_RENOIR:
2013 adev->gfx.config.max_hw_contexts = 8;
2014 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2015 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2016 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2017 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2018 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2019 gb_addr_config &= ~0xf3e777ff;
2020 gb_addr_config |= 0x22010042;
2021 break;
2022 default:
2023 BUG();
2024 break;
2027 adev->gfx.config.gb_addr_config = gb_addr_config;
2029 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2030 REG_GET_FIELD(
2031 adev->gfx.config.gb_addr_config,
2032 GB_ADDR_CONFIG,
2033 NUM_PIPES);
2035 adev->gfx.config.max_tile_pipes =
2036 adev->gfx.config.gb_addr_config_fields.num_pipes;
2038 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2039 REG_GET_FIELD(
2040 adev->gfx.config.gb_addr_config,
2041 GB_ADDR_CONFIG,
2042 NUM_BANKS);
2043 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2044 REG_GET_FIELD(
2045 adev->gfx.config.gb_addr_config,
2046 GB_ADDR_CONFIG,
2047 MAX_COMPRESSED_FRAGS);
2048 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2049 REG_GET_FIELD(
2050 adev->gfx.config.gb_addr_config,
2051 GB_ADDR_CONFIG,
2052 NUM_RB_PER_SE);
2053 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2054 REG_GET_FIELD(
2055 adev->gfx.config.gb_addr_config,
2056 GB_ADDR_CONFIG,
2057 NUM_SHADER_ENGINES);
2058 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2059 REG_GET_FIELD(
2060 adev->gfx.config.gb_addr_config,
2061 GB_ADDR_CONFIG,
2062 PIPE_INTERLEAVE_SIZE));
2064 return 0;
2067 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2068 int mec, int pipe, int queue)
2070 int r;
2071 unsigned irq_type;
2072 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2074 ring = &adev->gfx.compute_ring[ring_id];
2076 /* mec0 is me1 */
2077 ring->me = mec + 1;
2078 ring->pipe = pipe;
2079 ring->queue = queue;
2081 ring->ring_obj = NULL;
2082 ring->use_doorbell = true;
2083 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2084 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2085 + (ring_id * GFX9_MEC_HPD_SIZE);
2086 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2088 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2089 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2090 + ring->pipe;
2092 /* type-2 packets are deprecated on MEC, use type-3 instead */
2093 r = amdgpu_ring_init(adev, ring, 1024,
2094 &adev->gfx.eop_irq, irq_type);
2095 if (r)
2096 return r;
2099 return 0;
2102 static int gfx_v9_0_sw_init(void *handle)
2104 int i, j, k, r, ring_id;
2105 struct amdgpu_ring *ring;
2106 struct amdgpu_kiq *kiq;
2107 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2109 switch (adev->asic_type) {
2110 case CHIP_VEGA10:
2111 case CHIP_VEGA12:
2112 case CHIP_VEGA20:
2113 case CHIP_RAVEN:
2114 case CHIP_ARCTURUS:
2115 case CHIP_RENOIR:
2116 adev->gfx.mec.num_mec = 2;
2117 break;
2118 default:
2119 adev->gfx.mec.num_mec = 1;
2120 break;
2123 adev->gfx.mec.num_pipe_per_mec = 4;
2124 adev->gfx.mec.num_queue_per_pipe = 8;
2126 /* EOP Event */
2127 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2128 if (r)
2129 return r;
2131 /* Privileged reg */
2132 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2133 &adev->gfx.priv_reg_irq);
2134 if (r)
2135 return r;
2137 /* Privileged inst */
2138 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2139 &adev->gfx.priv_inst_irq);
2140 if (r)
2141 return r;
2143 /* ECC error */
2144 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2145 &adev->gfx.cp_ecc_error_irq);
2146 if (r)
2147 return r;
2149 /* FUE error */
2150 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2151 &adev->gfx.cp_ecc_error_irq);
2152 if (r)
2153 return r;
2155 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2157 gfx_v9_0_scratch_init(adev);
2159 r = gfx_v9_0_init_microcode(adev);
2160 if (r) {
2161 DRM_ERROR("Failed to load gfx firmware!\n");
2162 return r;
2165 r = adev->gfx.rlc.funcs->init(adev);
2166 if (r) {
2167 DRM_ERROR("Failed to init rlc BOs!\n");
2168 return r;
2171 r = gfx_v9_0_mec_init(adev);
2172 if (r) {
2173 DRM_ERROR("Failed to init MEC BOs!\n");
2174 return r;
2177 /* set up the gfx ring */
2178 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2179 ring = &adev->gfx.gfx_ring[i];
2180 ring->ring_obj = NULL;
2181 if (!i)
2182 sprintf(ring->name, "gfx");
2183 else
2184 sprintf(ring->name, "gfx_%d", i);
2185 ring->use_doorbell = true;
2186 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2187 r = amdgpu_ring_init(adev, ring, 1024,
2188 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2189 if (r)
2190 return r;
2193 /* set up the compute queues - allocate horizontally across pipes */
2194 ring_id = 0;
2195 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2196 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2197 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2198 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2199 continue;
2201 r = gfx_v9_0_compute_ring_init(adev,
2202 ring_id,
2203 i, k, j);
2204 if (r)
2205 return r;
2207 ring_id++;
2212 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2213 if (r) {
2214 DRM_ERROR("Failed to init KIQ BOs!\n");
2215 return r;
2218 kiq = &adev->gfx.kiq;
2219 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2220 if (r)
2221 return r;
2223 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2224 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2225 if (r)
2226 return r;
2228 adev->gfx.ce_ram_size = 0x8000;
2230 r = gfx_v9_0_gpu_early_init(adev);
2231 if (r)
2232 return r;
2234 return 0;
2238 static int gfx_v9_0_sw_fini(void *handle)
2240 int i;
2241 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2243 amdgpu_gfx_ras_fini(adev);
2245 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2246 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2247 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2248 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2250 amdgpu_gfx_mqd_sw_fini(adev);
2251 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2252 amdgpu_gfx_kiq_fini(adev);
2254 gfx_v9_0_mec_fini(adev);
2255 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2256 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2257 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2258 &adev->gfx.rlc.cp_table_gpu_addr,
2259 (void **)&adev->gfx.rlc.cp_table_ptr);
2261 gfx_v9_0_free_microcode(adev);
2263 return 0;
2267 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2269 /* TODO */
2272 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2274 u32 data;
2276 if (instance == 0xffffffff)
2277 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2278 else
2279 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2281 if (se_num == 0xffffffff)
2282 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2283 else
2284 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2286 if (sh_num == 0xffffffff)
2287 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2288 else
2289 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2291 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2294 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2296 u32 data, mask;
2298 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2299 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2301 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2302 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2304 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2305 adev->gfx.config.max_sh_per_se);
2307 return (~data) & mask;
2310 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2312 int i, j;
2313 u32 data;
2314 u32 active_rbs = 0;
2315 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2316 adev->gfx.config.max_sh_per_se;
2318 mutex_lock(&adev->grbm_idx_mutex);
2319 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2320 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2321 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2322 data = gfx_v9_0_get_rb_active_bitmap(adev);
2323 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2324 rb_bitmap_width_per_sh);
2327 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2328 mutex_unlock(&adev->grbm_idx_mutex);
2330 adev->gfx.config.backend_enable_mask = active_rbs;
2331 adev->gfx.config.num_rbs = hweight32(active_rbs);
2334 #define DEFAULT_SH_MEM_BASES (0x6000)
2335 #define FIRST_COMPUTE_VMID (8)
2336 #define LAST_COMPUTE_VMID (16)
2337 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2339 int i;
2340 uint32_t sh_mem_config;
2341 uint32_t sh_mem_bases;
2344 * Configure apertures:
2345 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2346 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2347 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2349 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2351 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2352 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2353 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2355 mutex_lock(&adev->srbm_mutex);
2356 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2357 soc15_grbm_select(adev, 0, 0, 0, i);
2358 /* CP and shaders */
2359 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2360 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2362 soc15_grbm_select(adev, 0, 0, 0, 0);
2363 mutex_unlock(&adev->srbm_mutex);
2365 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2366 acccess. These should be enabled by FW for target VMIDs. */
2367 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2368 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2369 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2370 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2371 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2375 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2377 int vmid;
2380 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2381 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2382 * the driver can enable them for graphics. VMID0 should maintain
2383 * access so that HWS firmware can save/restore entries.
2385 for (vmid = 1; vmid < 16; vmid++) {
2386 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2387 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2388 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2389 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2393 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2395 u32 tmp;
2396 int i;
2398 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2400 gfx_v9_0_tiling_mode_table_init(adev);
2402 gfx_v9_0_setup_rb(adev);
2403 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2404 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2406 /* XXX SH_MEM regs */
2407 /* where to put LDS, scratch, GPUVM in FSA64 space */
2408 mutex_lock(&adev->srbm_mutex);
2409 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2410 soc15_grbm_select(adev, 0, 0, 0, i);
2411 /* CP and shaders */
2412 if (i == 0) {
2413 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2414 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2415 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2416 !!amdgpu_noretry);
2417 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2418 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2419 } else {
2420 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2421 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2422 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2423 !!amdgpu_noretry);
2424 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2425 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2426 (adev->gmc.private_aperture_start >> 48));
2427 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2428 (adev->gmc.shared_aperture_start >> 48));
2429 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2432 soc15_grbm_select(adev, 0, 0, 0, 0);
2434 mutex_unlock(&adev->srbm_mutex);
2436 gfx_v9_0_init_compute_vmid(adev);
2437 gfx_v9_0_init_gds_vmid(adev);
2440 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2442 u32 i, j, k;
2443 u32 mask;
2445 mutex_lock(&adev->grbm_idx_mutex);
2446 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2447 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2448 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2449 for (k = 0; k < adev->usec_timeout; k++) {
2450 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2451 break;
2452 udelay(1);
2454 if (k == adev->usec_timeout) {
2455 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2456 0xffffffff, 0xffffffff);
2457 mutex_unlock(&adev->grbm_idx_mutex);
2458 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2459 i, j);
2460 return;
2464 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2465 mutex_unlock(&adev->grbm_idx_mutex);
2467 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2468 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2469 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2470 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2471 for (k = 0; k < adev->usec_timeout; k++) {
2472 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2473 break;
2474 udelay(1);
2478 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2479 bool enable)
2481 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2483 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2484 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2485 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2486 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2488 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2491 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2493 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2494 /* csib */
2495 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2496 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2497 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2498 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2499 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2500 adev->gfx.rlc.clear_state_size);
2503 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2504 int indirect_offset,
2505 int list_size,
2506 int *unique_indirect_regs,
2507 int unique_indirect_reg_count,
2508 int *indirect_start_offsets,
2509 int *indirect_start_offsets_count,
2510 int max_start_offsets_count)
2512 int idx;
2514 for (; indirect_offset < list_size; indirect_offset++) {
2515 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2516 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2517 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2519 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2520 indirect_offset += 2;
2522 /* look for the matching indice */
2523 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2524 if (unique_indirect_regs[idx] ==
2525 register_list_format[indirect_offset] ||
2526 !unique_indirect_regs[idx])
2527 break;
2530 BUG_ON(idx >= unique_indirect_reg_count);
2532 if (!unique_indirect_regs[idx])
2533 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2535 indirect_offset++;
2540 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2542 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2543 int unique_indirect_reg_count = 0;
2545 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2546 int indirect_start_offsets_count = 0;
2548 int list_size = 0;
2549 int i = 0, j = 0;
2550 u32 tmp = 0;
2552 u32 *register_list_format =
2553 kmemdup(adev->gfx.rlc.register_list_format,
2554 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2555 if (!register_list_format)
2556 return -ENOMEM;
2558 /* setup unique_indirect_regs array and indirect_start_offsets array */
2559 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2560 gfx_v9_1_parse_ind_reg_list(register_list_format,
2561 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2562 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2563 unique_indirect_regs,
2564 unique_indirect_reg_count,
2565 indirect_start_offsets,
2566 &indirect_start_offsets_count,
2567 ARRAY_SIZE(indirect_start_offsets));
2569 /* enable auto inc in case it is disabled */
2570 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2571 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2572 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2574 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2575 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2576 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2577 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2578 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2579 adev->gfx.rlc.register_restore[i]);
2581 /* load indirect register */
2582 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2583 adev->gfx.rlc.reg_list_format_start);
2585 /* direct register portion */
2586 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2587 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2588 register_list_format[i]);
2590 /* indirect register portion */
2591 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2592 if (register_list_format[i] == 0xFFFFFFFF) {
2593 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2594 continue;
2597 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2598 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2600 for (j = 0; j < unique_indirect_reg_count; j++) {
2601 if (register_list_format[i] == unique_indirect_regs[j]) {
2602 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2603 break;
2607 BUG_ON(j >= unique_indirect_reg_count);
2609 i++;
2612 /* set save/restore list size */
2613 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2614 list_size = list_size >> 1;
2615 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2616 adev->gfx.rlc.reg_restore_list_size);
2617 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2619 /* write the starting offsets to RLC scratch ram */
2620 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2621 adev->gfx.rlc.starting_offsets_start);
2622 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2623 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2624 indirect_start_offsets[i]);
2626 /* load unique indirect regs*/
2627 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2628 if (unique_indirect_regs[i] != 0) {
2629 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2630 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2631 unique_indirect_regs[i] & 0x3FFFF);
2633 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2634 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2635 unique_indirect_regs[i] >> 20);
2639 kfree(register_list_format);
2640 return 0;
2643 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2645 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2648 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2649 bool enable)
2651 uint32_t data = 0;
2652 uint32_t default_data = 0;
2654 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2655 if (enable == true) {
2656 /* enable GFXIP control over CGPG */
2657 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2658 if(default_data != data)
2659 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2661 /* update status */
2662 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2663 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2664 if(default_data != data)
2665 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2666 } else {
2667 /* restore GFXIP control over GCPG */
2668 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2669 if(default_data != data)
2670 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2674 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2676 uint32_t data = 0;
2678 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2679 AMD_PG_SUPPORT_GFX_SMG |
2680 AMD_PG_SUPPORT_GFX_DMG)) {
2681 /* init IDLE_POLL_COUNT = 60 */
2682 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2683 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2684 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2685 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2687 /* init RLC PG Delay */
2688 data = 0;
2689 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2690 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2691 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2692 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2693 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2695 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2696 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2697 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2698 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2700 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2701 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2702 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2703 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2705 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2706 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2708 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2709 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2710 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2712 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2716 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2717 bool enable)
2719 uint32_t data = 0;
2720 uint32_t default_data = 0;
2722 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2723 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2724 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2725 enable ? 1 : 0);
2726 if (default_data != data)
2727 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2730 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2731 bool enable)
2733 uint32_t data = 0;
2734 uint32_t default_data = 0;
2736 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2737 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2738 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2739 enable ? 1 : 0);
2740 if(default_data != data)
2741 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2744 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2745 bool enable)
2747 uint32_t data = 0;
2748 uint32_t default_data = 0;
2750 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2751 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2752 CP_PG_DISABLE,
2753 enable ? 0 : 1);
2754 if(default_data != data)
2755 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2758 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2759 bool enable)
2761 uint32_t data, default_data;
2763 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2764 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2765 GFX_POWER_GATING_ENABLE,
2766 enable ? 1 : 0);
2767 if(default_data != data)
2768 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2771 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2772 bool enable)
2774 uint32_t data, default_data;
2776 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2777 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2778 GFX_PIPELINE_PG_ENABLE,
2779 enable ? 1 : 0);
2780 if(default_data != data)
2781 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2783 if (!enable)
2784 /* read any GFX register to wake up GFX */
2785 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2788 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2789 bool enable)
2791 uint32_t data, default_data;
2793 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2794 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2795 STATIC_PER_CU_PG_ENABLE,
2796 enable ? 1 : 0);
2797 if(default_data != data)
2798 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2801 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2802 bool enable)
2804 uint32_t data, default_data;
2806 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2807 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2808 DYN_PER_CU_PG_ENABLE,
2809 enable ? 1 : 0);
2810 if(default_data != data)
2811 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2814 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2816 gfx_v9_0_init_csb(adev);
2819 * Rlc save restore list is workable since v2_1.
2820 * And it's needed by gfxoff feature.
2822 if (adev->gfx.rlc.is_rlc_v2_1) {
2823 if (adev->asic_type == CHIP_VEGA12 ||
2824 (adev->asic_type == CHIP_RAVEN &&
2825 adev->rev_id >= 8))
2826 gfx_v9_1_init_rlc_save_restore_list(adev);
2827 gfx_v9_0_enable_save_restore_machine(adev);
2830 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2831 AMD_PG_SUPPORT_GFX_SMG |
2832 AMD_PG_SUPPORT_GFX_DMG |
2833 AMD_PG_SUPPORT_CP |
2834 AMD_PG_SUPPORT_GDS |
2835 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2836 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2837 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2838 gfx_v9_0_init_gfx_power_gating(adev);
2842 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2844 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2845 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2846 gfx_v9_0_wait_for_rlc_serdes(adev);
2849 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2851 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2852 udelay(50);
2853 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2854 udelay(50);
2857 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2859 #ifdef AMDGPU_RLC_DEBUG_RETRY
2860 u32 rlc_ucode_ver;
2861 #endif
2863 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2864 udelay(50);
2866 /* carrizo do enable cp interrupt after cp inited */
2867 if (!(adev->flags & AMD_IS_APU)) {
2868 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2869 udelay(50);
2872 #ifdef AMDGPU_RLC_DEBUG_RETRY
2873 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2874 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2875 if(rlc_ucode_ver == 0x108) {
2876 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2877 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2878 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2879 * default is 0x9C4 to create a 100us interval */
2880 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2881 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2882 * to disable the page fault retry interrupts, default is
2883 * 0x100 (256) */
2884 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2886 #endif
2889 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2891 const struct rlc_firmware_header_v2_0 *hdr;
2892 const __le32 *fw_data;
2893 unsigned i, fw_size;
2895 if (!adev->gfx.rlc_fw)
2896 return -EINVAL;
2898 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2899 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2901 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2902 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2903 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2905 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2906 RLCG_UCODE_LOADING_START_ADDRESS);
2907 for (i = 0; i < fw_size; i++)
2908 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2909 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2911 return 0;
2914 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2916 int r;
2918 if (amdgpu_sriov_vf(adev)) {
2919 gfx_v9_0_init_csb(adev);
2920 return 0;
2923 adev->gfx.rlc.funcs->stop(adev);
2925 /* disable CG */
2926 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2928 gfx_v9_0_init_pg(adev);
2930 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2931 /* legacy rlc firmware loading */
2932 r = gfx_v9_0_rlc_load_microcode(adev);
2933 if (r)
2934 return r;
2937 switch (adev->asic_type) {
2938 case CHIP_RAVEN:
2939 if (amdgpu_lbpw == 0)
2940 gfx_v9_0_enable_lbpw(adev, false);
2941 else
2942 gfx_v9_0_enable_lbpw(adev, true);
2943 break;
2944 case CHIP_VEGA20:
2945 if (amdgpu_lbpw > 0)
2946 gfx_v9_0_enable_lbpw(adev, true);
2947 else
2948 gfx_v9_0_enable_lbpw(adev, false);
2949 break;
2950 default:
2951 break;
2954 adev->gfx.rlc.funcs->start(adev);
2956 return 0;
2959 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2961 int i;
2962 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2964 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2965 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2966 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2967 if (!enable) {
2968 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2969 adev->gfx.gfx_ring[i].sched.ready = false;
2971 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2972 udelay(50);
2975 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2977 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2978 const struct gfx_firmware_header_v1_0 *ce_hdr;
2979 const struct gfx_firmware_header_v1_0 *me_hdr;
2980 const __le32 *fw_data;
2981 unsigned i, fw_size;
2983 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2984 return -EINVAL;
2986 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2987 adev->gfx.pfp_fw->data;
2988 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2989 adev->gfx.ce_fw->data;
2990 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2991 adev->gfx.me_fw->data;
2993 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2994 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2995 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2997 gfx_v9_0_cp_gfx_enable(adev, false);
2999 /* PFP */
3000 fw_data = (const __le32 *)
3001 (adev->gfx.pfp_fw->data +
3002 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3003 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3004 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3005 for (i = 0; i < fw_size; i++)
3006 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3007 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3009 /* CE */
3010 fw_data = (const __le32 *)
3011 (adev->gfx.ce_fw->data +
3012 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3013 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3014 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3015 for (i = 0; i < fw_size; i++)
3016 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3017 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3019 /* ME */
3020 fw_data = (const __le32 *)
3021 (adev->gfx.me_fw->data +
3022 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3023 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3024 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3025 for (i = 0; i < fw_size; i++)
3026 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3027 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3029 return 0;
3032 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3034 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3035 const struct cs_section_def *sect = NULL;
3036 const struct cs_extent_def *ext = NULL;
3037 int r, i, tmp;
3039 /* init the CP */
3040 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3041 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3043 gfx_v9_0_cp_gfx_enable(adev, true);
3045 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3046 if (r) {
3047 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3048 return r;
3051 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3052 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3054 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3055 amdgpu_ring_write(ring, 0x80000000);
3056 amdgpu_ring_write(ring, 0x80000000);
3058 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3059 for (ext = sect->section; ext->extent != NULL; ++ext) {
3060 if (sect->id == SECT_CONTEXT) {
3061 amdgpu_ring_write(ring,
3062 PACKET3(PACKET3_SET_CONTEXT_REG,
3063 ext->reg_count));
3064 amdgpu_ring_write(ring,
3065 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3066 for (i = 0; i < ext->reg_count; i++)
3067 amdgpu_ring_write(ring, ext->extent[i]);
3072 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3075 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3076 amdgpu_ring_write(ring, 0);
3078 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3079 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3080 amdgpu_ring_write(ring, 0x8000);
3081 amdgpu_ring_write(ring, 0x8000);
3083 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3084 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3085 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3086 amdgpu_ring_write(ring, tmp);
3087 amdgpu_ring_write(ring, 0);
3089 amdgpu_ring_commit(ring);
3091 return 0;
3094 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3096 struct amdgpu_ring *ring;
3097 u32 tmp;
3098 u32 rb_bufsz;
3099 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3101 /* Set the write pointer delay */
3102 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3104 /* set the RB to use vmid 0 */
3105 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3107 /* Set ring buffer size */
3108 ring = &adev->gfx.gfx_ring[0];
3109 rb_bufsz = order_base_2(ring->ring_size / 8);
3110 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3111 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3112 #ifdef __BIG_ENDIAN
3113 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3114 #endif
3115 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3117 /* Initialize the ring buffer's write pointers */
3118 ring->wptr = 0;
3119 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3120 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3122 /* set the wb address wether it's enabled or not */
3123 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3124 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3125 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3127 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3128 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3129 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3131 mdelay(1);
3132 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3134 rb_addr = ring->gpu_addr >> 8;
3135 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3136 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3138 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3139 if (ring->use_doorbell) {
3140 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3141 DOORBELL_OFFSET, ring->doorbell_index);
3142 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3143 DOORBELL_EN, 1);
3144 } else {
3145 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3147 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3149 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3150 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3151 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3153 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3154 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3157 /* start the ring */
3158 gfx_v9_0_cp_gfx_start(adev);
3159 ring->sched.ready = true;
3161 return 0;
3164 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3166 int i;
3168 if (enable) {
3169 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3170 } else {
3171 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3172 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3173 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3174 adev->gfx.compute_ring[i].sched.ready = false;
3175 adev->gfx.kiq.ring.sched.ready = false;
3177 udelay(50);
3180 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3182 const struct gfx_firmware_header_v1_0 *mec_hdr;
3183 const __le32 *fw_data;
3184 unsigned i;
3185 u32 tmp;
3187 if (!adev->gfx.mec_fw)
3188 return -EINVAL;
3190 gfx_v9_0_cp_compute_enable(adev, false);
3192 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3193 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3195 fw_data = (const __le32 *)
3196 (adev->gfx.mec_fw->data +
3197 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3198 tmp = 0;
3199 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3200 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3201 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3203 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3204 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3205 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3206 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3208 /* MEC1 */
3209 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3210 mec_hdr->jt_offset);
3211 for (i = 0; i < mec_hdr->jt_size; i++)
3212 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3213 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3215 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3216 adev->gfx.mec_fw_version);
3217 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3219 return 0;
3222 /* KIQ functions */
3223 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3225 uint32_t tmp;
3226 struct amdgpu_device *adev = ring->adev;
3228 /* tell RLC which is KIQ queue */
3229 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3230 tmp &= 0xffffff00;
3231 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3232 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3233 tmp |= 0x80;
3234 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3237 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3239 struct amdgpu_device *adev = ring->adev;
3240 struct v9_mqd *mqd = ring->mqd_ptr;
3241 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3242 uint32_t tmp;
3244 mqd->header = 0xC0310800;
3245 mqd->compute_pipelinestat_enable = 0x00000001;
3246 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3247 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3248 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3249 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3250 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3251 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3252 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3253 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3254 mqd->compute_misc_reserved = 0x00000003;
3256 mqd->dynamic_cu_mask_addr_lo =
3257 lower_32_bits(ring->mqd_gpu_addr
3258 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3259 mqd->dynamic_cu_mask_addr_hi =
3260 upper_32_bits(ring->mqd_gpu_addr
3261 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3263 eop_base_addr = ring->eop_gpu_addr >> 8;
3264 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3265 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3267 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3268 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3269 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3270 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3272 mqd->cp_hqd_eop_control = tmp;
3274 /* enable doorbell? */
3275 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3277 if (ring->use_doorbell) {
3278 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3279 DOORBELL_OFFSET, ring->doorbell_index);
3280 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3281 DOORBELL_EN, 1);
3282 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3283 DOORBELL_SOURCE, 0);
3284 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3285 DOORBELL_HIT, 0);
3286 } else {
3287 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3288 DOORBELL_EN, 0);
3291 mqd->cp_hqd_pq_doorbell_control = tmp;
3293 /* disable the queue if it's active */
3294 ring->wptr = 0;
3295 mqd->cp_hqd_dequeue_request = 0;
3296 mqd->cp_hqd_pq_rptr = 0;
3297 mqd->cp_hqd_pq_wptr_lo = 0;
3298 mqd->cp_hqd_pq_wptr_hi = 0;
3300 /* set the pointer to the MQD */
3301 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3302 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3304 /* set MQD vmid to 0 */
3305 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3306 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3307 mqd->cp_mqd_control = tmp;
3309 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3310 hqd_gpu_addr = ring->gpu_addr >> 8;
3311 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3312 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3314 /* set up the HQD, this is similar to CP_RB0_CNTL */
3315 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3316 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3317 (order_base_2(ring->ring_size / 4) - 1));
3318 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3319 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3320 #ifdef __BIG_ENDIAN
3321 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3322 #endif
3323 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3324 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3325 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3326 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3327 mqd->cp_hqd_pq_control = tmp;
3329 /* set the wb address whether it's enabled or not */
3330 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3331 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3332 mqd->cp_hqd_pq_rptr_report_addr_hi =
3333 upper_32_bits(wb_gpu_addr) & 0xffff;
3335 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3336 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3337 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3338 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3340 tmp = 0;
3341 /* enable the doorbell if requested */
3342 if (ring->use_doorbell) {
3343 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3344 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3345 DOORBELL_OFFSET, ring->doorbell_index);
3347 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3348 DOORBELL_EN, 1);
3349 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3350 DOORBELL_SOURCE, 0);
3351 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3352 DOORBELL_HIT, 0);
3355 mqd->cp_hqd_pq_doorbell_control = tmp;
3357 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3358 ring->wptr = 0;
3359 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3361 /* set the vmid for the queue */
3362 mqd->cp_hqd_vmid = 0;
3364 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3365 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3366 mqd->cp_hqd_persistent_state = tmp;
3368 /* set MIN_IB_AVAIL_SIZE */
3369 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3370 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3371 mqd->cp_hqd_ib_control = tmp;
3373 /* map_queues packet doesn't need activate the queue,
3374 * so only kiq need set this field.
3376 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3377 mqd->cp_hqd_active = 1;
3379 return 0;
3382 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3384 struct amdgpu_device *adev = ring->adev;
3385 struct v9_mqd *mqd = ring->mqd_ptr;
3386 int j;
3388 /* disable wptr polling */
3389 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3391 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3392 mqd->cp_hqd_eop_base_addr_lo);
3393 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3394 mqd->cp_hqd_eop_base_addr_hi);
3396 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3397 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3398 mqd->cp_hqd_eop_control);
3400 /* enable doorbell? */
3401 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3402 mqd->cp_hqd_pq_doorbell_control);
3404 /* disable the queue if it's active */
3405 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3406 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3407 for (j = 0; j < adev->usec_timeout; j++) {
3408 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3409 break;
3410 udelay(1);
3412 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3413 mqd->cp_hqd_dequeue_request);
3414 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3415 mqd->cp_hqd_pq_rptr);
3416 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3417 mqd->cp_hqd_pq_wptr_lo);
3418 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3419 mqd->cp_hqd_pq_wptr_hi);
3422 /* set the pointer to the MQD */
3423 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3424 mqd->cp_mqd_base_addr_lo);
3425 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3426 mqd->cp_mqd_base_addr_hi);
3428 /* set MQD vmid to 0 */
3429 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3430 mqd->cp_mqd_control);
3432 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3433 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3434 mqd->cp_hqd_pq_base_lo);
3435 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3436 mqd->cp_hqd_pq_base_hi);
3438 /* set up the HQD, this is similar to CP_RB0_CNTL */
3439 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3440 mqd->cp_hqd_pq_control);
3442 /* set the wb address whether it's enabled or not */
3443 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3444 mqd->cp_hqd_pq_rptr_report_addr_lo);
3445 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3446 mqd->cp_hqd_pq_rptr_report_addr_hi);
3448 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3449 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3450 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3451 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3452 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3454 /* enable the doorbell if requested */
3455 if (ring->use_doorbell) {
3456 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3457 (adev->doorbell_index.kiq * 2) << 2);
3458 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3459 (adev->doorbell_index.userqueue_end * 2) << 2);
3462 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3463 mqd->cp_hqd_pq_doorbell_control);
3465 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3466 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3467 mqd->cp_hqd_pq_wptr_lo);
3468 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3469 mqd->cp_hqd_pq_wptr_hi);
3471 /* set the vmid for the queue */
3472 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3474 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3475 mqd->cp_hqd_persistent_state);
3477 /* activate the queue */
3478 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3479 mqd->cp_hqd_active);
3481 if (ring->use_doorbell)
3482 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3484 return 0;
3487 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3489 struct amdgpu_device *adev = ring->adev;
3490 int j;
3492 /* disable the queue if it's active */
3493 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3495 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3497 for (j = 0; j < adev->usec_timeout; j++) {
3498 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3499 break;
3500 udelay(1);
3503 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3504 DRM_DEBUG("KIQ dequeue request failed.\n");
3506 /* Manual disable if dequeue request times out */
3507 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3510 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3514 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3515 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3516 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3517 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3518 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3519 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3520 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3521 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3523 return 0;
3526 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3528 struct amdgpu_device *adev = ring->adev;
3529 struct v9_mqd *mqd = ring->mqd_ptr;
3530 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3532 gfx_v9_0_kiq_setting(ring);
3534 if (adev->in_gpu_reset) { /* for GPU_RESET case */
3535 /* reset MQD to a clean status */
3536 if (adev->gfx.mec.mqd_backup[mqd_idx])
3537 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3539 /* reset ring buffer */
3540 ring->wptr = 0;
3541 amdgpu_ring_clear_ring(ring);
3543 mutex_lock(&adev->srbm_mutex);
3544 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3545 gfx_v9_0_kiq_init_register(ring);
3546 soc15_grbm_select(adev, 0, 0, 0, 0);
3547 mutex_unlock(&adev->srbm_mutex);
3548 } else {
3549 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3550 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3551 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3552 mutex_lock(&adev->srbm_mutex);
3553 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3554 gfx_v9_0_mqd_init(ring);
3555 gfx_v9_0_kiq_init_register(ring);
3556 soc15_grbm_select(adev, 0, 0, 0, 0);
3557 mutex_unlock(&adev->srbm_mutex);
3559 if (adev->gfx.mec.mqd_backup[mqd_idx])
3560 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3563 return 0;
3566 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3568 struct amdgpu_device *adev = ring->adev;
3569 struct v9_mqd *mqd = ring->mqd_ptr;
3570 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3572 if (!adev->in_gpu_reset && !adev->in_suspend) {
3573 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3574 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3575 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3576 mutex_lock(&adev->srbm_mutex);
3577 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3578 gfx_v9_0_mqd_init(ring);
3579 soc15_grbm_select(adev, 0, 0, 0, 0);
3580 mutex_unlock(&adev->srbm_mutex);
3582 if (adev->gfx.mec.mqd_backup[mqd_idx])
3583 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3584 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3585 /* reset MQD to a clean status */
3586 if (adev->gfx.mec.mqd_backup[mqd_idx])
3587 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3589 /* reset ring buffer */
3590 ring->wptr = 0;
3591 amdgpu_ring_clear_ring(ring);
3592 } else {
3593 amdgpu_ring_clear_ring(ring);
3596 return 0;
3599 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3601 struct amdgpu_ring *ring;
3602 int r;
3604 ring = &adev->gfx.kiq.ring;
3606 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3607 if (unlikely(r != 0))
3608 return r;
3610 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3611 if (unlikely(r != 0))
3612 return r;
3614 gfx_v9_0_kiq_init_queue(ring);
3615 amdgpu_bo_kunmap(ring->mqd_obj);
3616 ring->mqd_ptr = NULL;
3617 amdgpu_bo_unreserve(ring->mqd_obj);
3618 ring->sched.ready = true;
3619 return 0;
3622 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3624 struct amdgpu_ring *ring = NULL;
3625 int r = 0, i;
3627 gfx_v9_0_cp_compute_enable(adev, true);
3629 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3630 ring = &adev->gfx.compute_ring[i];
3632 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3633 if (unlikely(r != 0))
3634 goto done;
3635 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3636 if (!r) {
3637 r = gfx_v9_0_kcq_init_queue(ring);
3638 amdgpu_bo_kunmap(ring->mqd_obj);
3639 ring->mqd_ptr = NULL;
3641 amdgpu_bo_unreserve(ring->mqd_obj);
3642 if (r)
3643 goto done;
3646 r = amdgpu_gfx_enable_kcq(adev);
3647 done:
3648 return r;
3651 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3653 int r, i;
3654 struct amdgpu_ring *ring;
3656 if (!(adev->flags & AMD_IS_APU))
3657 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3659 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3660 if (adev->asic_type != CHIP_ARCTURUS) {
3661 /* legacy firmware loading */
3662 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3663 if (r)
3664 return r;
3667 r = gfx_v9_0_cp_compute_load_microcode(adev);
3668 if (r)
3669 return r;
3672 r = gfx_v9_0_kiq_resume(adev);
3673 if (r)
3674 return r;
3676 if (adev->asic_type != CHIP_ARCTURUS) {
3677 r = gfx_v9_0_cp_gfx_resume(adev);
3678 if (r)
3679 return r;
3682 r = gfx_v9_0_kcq_resume(adev);
3683 if (r)
3684 return r;
3686 if (adev->asic_type != CHIP_ARCTURUS) {
3687 ring = &adev->gfx.gfx_ring[0];
3688 r = amdgpu_ring_test_helper(ring);
3689 if (r)
3690 return r;
3693 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3694 ring = &adev->gfx.compute_ring[i];
3695 amdgpu_ring_test_helper(ring);
3698 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3700 return 0;
3703 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3705 u32 tmp;
3707 if (adev->asic_type != CHIP_ARCTURUS)
3708 return;
3710 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3711 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3712 adev->df.hash_status.hash_64k);
3713 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3714 adev->df.hash_status.hash_2m);
3715 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3716 adev->df.hash_status.hash_1g);
3717 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3720 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3722 if (adev->asic_type != CHIP_ARCTURUS)
3723 gfx_v9_0_cp_gfx_enable(adev, enable);
3724 gfx_v9_0_cp_compute_enable(adev, enable);
3727 static int gfx_v9_0_hw_init(void *handle)
3729 int r;
3730 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3732 if (!amdgpu_sriov_vf(adev))
3733 gfx_v9_0_init_golden_registers(adev);
3735 gfx_v9_0_constants_init(adev);
3737 gfx_v9_0_init_tcp_config(adev);
3739 r = adev->gfx.rlc.funcs->resume(adev);
3740 if (r)
3741 return r;
3743 r = gfx_v9_0_cp_resume(adev);
3744 if (r)
3745 return r;
3747 return r;
3750 static int gfx_v9_0_hw_fini(void *handle)
3752 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3754 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3755 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3756 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3758 /* DF freeze and kcq disable will fail */
3759 if (!amdgpu_ras_intr_triggered())
3760 /* disable KCQ to avoid CPC touch memory not valid anymore */
3761 amdgpu_gfx_disable_kcq(adev);
3763 if (amdgpu_sriov_vf(adev)) {
3764 gfx_v9_0_cp_gfx_enable(adev, false);
3765 /* must disable polling for SRIOV when hw finished, otherwise
3766 * CPC engine may still keep fetching WB address which is already
3767 * invalid after sw finished and trigger DMAR reading error in
3768 * hypervisor side.
3770 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3771 return 0;
3774 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3775 * otherwise KIQ is hanging when binding back
3777 if (!adev->in_gpu_reset && !adev->in_suspend) {
3778 mutex_lock(&adev->srbm_mutex);
3779 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3780 adev->gfx.kiq.ring.pipe,
3781 adev->gfx.kiq.ring.queue, 0);
3782 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3783 soc15_grbm_select(adev, 0, 0, 0, 0);
3784 mutex_unlock(&adev->srbm_mutex);
3787 gfx_v9_0_cp_enable(adev, false);
3788 adev->gfx.rlc.funcs->stop(adev);
3790 return 0;
3793 static int gfx_v9_0_suspend(void *handle)
3795 return gfx_v9_0_hw_fini(handle);
3798 static int gfx_v9_0_resume(void *handle)
3800 return gfx_v9_0_hw_init(handle);
3803 static bool gfx_v9_0_is_idle(void *handle)
3805 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3807 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3808 GRBM_STATUS, GUI_ACTIVE))
3809 return false;
3810 else
3811 return true;
3814 static int gfx_v9_0_wait_for_idle(void *handle)
3816 unsigned i;
3817 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3819 for (i = 0; i < adev->usec_timeout; i++) {
3820 if (gfx_v9_0_is_idle(handle))
3821 return 0;
3822 udelay(1);
3824 return -ETIMEDOUT;
3827 static int gfx_v9_0_soft_reset(void *handle)
3829 u32 grbm_soft_reset = 0;
3830 u32 tmp;
3831 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3833 /* GRBM_STATUS */
3834 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3835 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3836 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3837 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3838 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3839 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3840 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3841 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3842 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3843 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3844 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3847 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3848 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3849 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3852 /* GRBM_STATUS2 */
3853 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3854 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3855 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3856 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3859 if (grbm_soft_reset) {
3860 /* stop the rlc */
3861 adev->gfx.rlc.funcs->stop(adev);
3863 if (adev->asic_type != CHIP_ARCTURUS)
3864 /* Disable GFX parsing/prefetching */
3865 gfx_v9_0_cp_gfx_enable(adev, false);
3867 /* Disable MEC parsing/prefetching */
3868 gfx_v9_0_cp_compute_enable(adev, false);
3870 if (grbm_soft_reset) {
3871 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3872 tmp |= grbm_soft_reset;
3873 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3874 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3875 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3877 udelay(50);
3879 tmp &= ~grbm_soft_reset;
3880 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3881 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3884 /* Wait a little for things to settle down */
3885 udelay(50);
3887 return 0;
3890 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3892 uint64_t clock;
3894 mutex_lock(&adev->gfx.gpu_clock_mutex);
3895 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3896 uint32_t tmp, lsb, msb, i = 0;
3897 do {
3898 if (i != 0)
3899 udelay(1);
3900 tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3901 lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3902 msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3903 i++;
3904 } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3905 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3906 } else {
3907 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3908 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3909 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3911 mutex_unlock(&adev->gfx.gpu_clock_mutex);
3912 return clock;
3915 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3916 uint32_t vmid,
3917 uint32_t gds_base, uint32_t gds_size,
3918 uint32_t gws_base, uint32_t gws_size,
3919 uint32_t oa_base, uint32_t oa_size)
3921 struct amdgpu_device *adev = ring->adev;
3923 /* GDS Base */
3924 gfx_v9_0_write_data_to_reg(ring, 0, false,
3925 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3926 gds_base);
3928 /* GDS Size */
3929 gfx_v9_0_write_data_to_reg(ring, 0, false,
3930 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3931 gds_size);
3933 /* GWS */
3934 gfx_v9_0_write_data_to_reg(ring, 0, false,
3935 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3936 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3938 /* OA */
3939 gfx_v9_0_write_data_to_reg(ring, 0, false,
3940 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3941 (1 << (oa_size + oa_base)) - (1 << oa_base));
3944 static const u32 vgpr_init_compute_shader[] =
3946 0xb07c0000, 0xbe8000ff,
3947 0x000000f8, 0xbf110800,
3948 0x7e000280, 0x7e020280,
3949 0x7e040280, 0x7e060280,
3950 0x7e080280, 0x7e0a0280,
3951 0x7e0c0280, 0x7e0e0280,
3952 0x80808800, 0xbe803200,
3953 0xbf84fff5, 0xbf9c0000,
3954 0xd28c0001, 0x0001007f,
3955 0xd28d0001, 0x0002027e,
3956 0x10020288, 0xb8810904,
3957 0xb7814000, 0xd1196a01,
3958 0x00000301, 0xbe800087,
3959 0xbefc00c1, 0xd89c4000,
3960 0x00020201, 0xd89cc080,
3961 0x00040401, 0x320202ff,
3962 0x00000800, 0x80808100,
3963 0xbf84fff8, 0x7e020280,
3964 0xbf810000, 0x00000000,
3967 static const u32 sgpr_init_compute_shader[] =
3969 0xb07c0000, 0xbe8000ff,
3970 0x0000005f, 0xbee50080,
3971 0xbe812c65, 0xbe822c65,
3972 0xbe832c65, 0xbe842c65,
3973 0xbe852c65, 0xb77c0005,
3974 0x80808500, 0xbf84fff8,
3975 0xbe800080, 0xbf810000,
3978 /* When below register arrays changed, please update gpr_reg_size,
3979 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
3980 to cover all gfx9 ASICs */
3981 static const struct soc15_reg_entry vgpr_init_regs[] = {
3982 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
3983 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
3984 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
3985 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3986 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
3987 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
3988 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3989 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3990 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3991 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3992 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
3993 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
3994 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
3995 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
3998 static const struct soc15_reg_entry sgpr1_init_regs[] = {
3999 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4000 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4001 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4002 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4003 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4004 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4005 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4006 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4007 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4008 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4009 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4010 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4011 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4012 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4015 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4016 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4017 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4018 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4019 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4020 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4021 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4022 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4023 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4024 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4025 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4026 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4027 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4028 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4029 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4032 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4033 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4034 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4035 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4036 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4037 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4038 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4039 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4040 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4041 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4042 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4043 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4044 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4045 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4046 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4047 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4048 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4049 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4050 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4051 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4052 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4053 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4054 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4055 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4056 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4057 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4058 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4059 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4060 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4061 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4062 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4063 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4064 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4065 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4066 { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4069 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4071 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4072 int i, r;
4074 /* only support when RAS is enabled */
4075 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4076 return 0;
4078 r = amdgpu_ring_alloc(ring, 7);
4079 if (r) {
4080 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4081 ring->name, r);
4082 return r;
4085 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4086 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4088 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4089 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4090 PACKET3_DMA_DATA_DST_SEL(1) |
4091 PACKET3_DMA_DATA_SRC_SEL(2) |
4092 PACKET3_DMA_DATA_ENGINE(0)));
4093 amdgpu_ring_write(ring, 0);
4094 amdgpu_ring_write(ring, 0);
4095 amdgpu_ring_write(ring, 0);
4096 amdgpu_ring_write(ring, 0);
4097 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4098 adev->gds.gds_size);
4100 amdgpu_ring_commit(ring);
4102 for (i = 0; i < adev->usec_timeout; i++) {
4103 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4104 break;
4105 udelay(1);
4108 if (i >= adev->usec_timeout)
4109 r = -ETIMEDOUT;
4111 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4113 return r;
4116 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4118 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4119 struct amdgpu_ib ib;
4120 struct dma_fence *f = NULL;
4121 int r, i, j, k;
4122 unsigned total_size, vgpr_offset, sgpr_offset;
4123 u64 gpu_addr;
4125 int compute_dim_x = adev->gfx.config.max_shader_engines *
4126 adev->gfx.config.max_cu_per_sh *
4127 adev->gfx.config.max_sh_per_se;
4128 int sgpr_work_group_size = 5;
4129 int gpr_reg_size = compute_dim_x / 16 + 6;
4131 /* only support when RAS is enabled */
4132 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4133 return 0;
4135 /* bail if the compute ring is not ready */
4136 if (!ring->sched.ready)
4137 return 0;
4139 total_size =
4140 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4141 total_size +=
4142 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4143 total_size +=
4144 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4145 total_size = ALIGN(total_size, 256);
4146 vgpr_offset = total_size;
4147 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4148 sgpr_offset = total_size;
4149 total_size += sizeof(sgpr_init_compute_shader);
4151 /* allocate an indirect buffer to put the commands in */
4152 memset(&ib, 0, sizeof(ib));
4153 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4154 if (r) {
4155 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4156 return r;
4159 /* load the compute shaders */
4160 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4161 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4163 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4164 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4166 /* init the ib length to 0 */
4167 ib.length_dw = 0;
4169 /* VGPR */
4170 /* write the register state for the compute dispatch */
4171 for (i = 0; i < gpr_reg_size; i++) {
4172 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4173 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4174 - PACKET3_SET_SH_REG_START;
4175 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4177 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4178 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4179 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4180 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4181 - PACKET3_SET_SH_REG_START;
4182 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4183 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4185 /* write dispatch packet */
4186 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4187 ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4188 ib.ptr[ib.length_dw++] = 1; /* y */
4189 ib.ptr[ib.length_dw++] = 1; /* z */
4190 ib.ptr[ib.length_dw++] =
4191 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4193 /* write CS partial flush packet */
4194 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4195 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4197 /* SGPR1 */
4198 /* write the register state for the compute dispatch */
4199 for (i = 0; i < gpr_reg_size; i++) {
4200 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4201 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4202 - PACKET3_SET_SH_REG_START;
4203 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4205 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4206 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4207 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4208 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4209 - PACKET3_SET_SH_REG_START;
4210 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4211 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4213 /* write dispatch packet */
4214 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4215 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4216 ib.ptr[ib.length_dw++] = 1; /* y */
4217 ib.ptr[ib.length_dw++] = 1; /* z */
4218 ib.ptr[ib.length_dw++] =
4219 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4221 /* write CS partial flush packet */
4222 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4223 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4225 /* SGPR2 */
4226 /* write the register state for the compute dispatch */
4227 for (i = 0; i < gpr_reg_size; i++) {
4228 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4229 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4230 - PACKET3_SET_SH_REG_START;
4231 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4233 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4234 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4235 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4236 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4237 - PACKET3_SET_SH_REG_START;
4238 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4239 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4241 /* write dispatch packet */
4242 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4243 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4244 ib.ptr[ib.length_dw++] = 1; /* y */
4245 ib.ptr[ib.length_dw++] = 1; /* z */
4246 ib.ptr[ib.length_dw++] =
4247 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4249 /* write CS partial flush packet */
4250 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4251 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4253 /* shedule the ib on the ring */
4254 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4255 if (r) {
4256 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4257 goto fail;
4260 /* wait for the GPU to finish processing the IB */
4261 r = dma_fence_wait(f, false);
4262 if (r) {
4263 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4264 goto fail;
4267 /* read back registers to clear the counters */
4268 mutex_lock(&adev->grbm_idx_mutex);
4269 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4270 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4271 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4272 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4273 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4277 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4278 mutex_unlock(&adev->grbm_idx_mutex);
4280 fail:
4281 amdgpu_ib_free(adev, &ib, NULL);
4282 dma_fence_put(f);
4284 return r;
4287 static int gfx_v9_0_early_init(void *handle)
4289 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4291 if (adev->asic_type == CHIP_ARCTURUS)
4292 adev->gfx.num_gfx_rings = 0;
4293 else
4294 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4295 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4296 gfx_v9_0_set_kiq_pm4_funcs(adev);
4297 gfx_v9_0_set_ring_funcs(adev);
4298 gfx_v9_0_set_irq_funcs(adev);
4299 gfx_v9_0_set_gds_init(adev);
4300 gfx_v9_0_set_rlc_funcs(adev);
4302 return 0;
4305 static int gfx_v9_0_ecc_late_init(void *handle)
4307 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4308 int r;
4310 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4311 if (r)
4312 return r;
4314 /* requires IBs so do in late init after IB pool is initialized */
4315 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4316 if (r)
4317 return r;
4319 r = amdgpu_gfx_ras_late_init(adev);
4320 if (r)
4321 return r;
4323 return 0;
4326 static int gfx_v9_0_late_init(void *handle)
4328 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4329 int r;
4331 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4332 if (r)
4333 return r;
4335 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4336 if (r)
4337 return r;
4339 r = gfx_v9_0_ecc_late_init(handle);
4340 if (r)
4341 return r;
4343 return 0;
4346 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4348 uint32_t rlc_setting;
4350 /* if RLC is not enabled, do nothing */
4351 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4352 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4353 return false;
4355 return true;
4358 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4360 uint32_t data;
4361 unsigned i;
4363 data = RLC_SAFE_MODE__CMD_MASK;
4364 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4365 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4367 /* wait for RLC_SAFE_MODE */
4368 for (i = 0; i < adev->usec_timeout; i++) {
4369 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4370 break;
4371 udelay(1);
4375 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4377 uint32_t data;
4379 data = RLC_SAFE_MODE__CMD_MASK;
4380 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4383 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4384 bool enable)
4386 amdgpu_gfx_rlc_enter_safe_mode(adev);
4388 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4389 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4390 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4391 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4392 } else {
4393 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4394 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4395 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4398 amdgpu_gfx_rlc_exit_safe_mode(adev);
4401 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4402 bool enable)
4404 /* TODO: double check if we need to perform under safe mode */
4405 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4407 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4408 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4409 else
4410 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4412 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4413 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4414 else
4415 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4417 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4420 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4421 bool enable)
4423 uint32_t data, def;
4425 amdgpu_gfx_rlc_enter_safe_mode(adev);
4427 /* It is disabled by HW by default */
4428 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4429 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4430 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4432 if (adev->asic_type != CHIP_VEGA12)
4433 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4435 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4436 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4437 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4439 /* only for Vega10 & Raven1 */
4440 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4442 if (def != data)
4443 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4445 /* MGLS is a global flag to control all MGLS in GFX */
4446 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4447 /* 2 - RLC memory Light sleep */
4448 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4449 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4450 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4451 if (def != data)
4452 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4454 /* 3 - CP memory Light sleep */
4455 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4456 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4457 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4458 if (def != data)
4459 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4462 } else {
4463 /* 1 - MGCG_OVERRIDE */
4464 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4466 if (adev->asic_type != CHIP_VEGA12)
4467 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4469 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4470 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4471 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4472 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4474 if (def != data)
4475 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4477 /* 2 - disable MGLS in RLC */
4478 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4479 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4480 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4481 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4484 /* 3 - disable MGLS in CP */
4485 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4486 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4487 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4488 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4492 amdgpu_gfx_rlc_exit_safe_mode(adev);
4495 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4496 bool enable)
4498 uint32_t data, def;
4500 if (adev->asic_type == CHIP_ARCTURUS)
4501 return;
4503 amdgpu_gfx_rlc_enter_safe_mode(adev);
4505 /* Enable 3D CGCG/CGLS */
4506 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4507 /* write cmd to clear cgcg/cgls ov */
4508 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4509 /* unset CGCG override */
4510 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4511 /* update CGCG and CGLS override bits */
4512 if (def != data)
4513 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4515 /* enable 3Dcgcg FSM(0x0000363f) */
4516 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4518 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4519 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4520 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4521 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4522 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4523 if (def != data)
4524 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4526 /* set IDLE_POLL_COUNT(0x00900100) */
4527 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4528 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4529 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4530 if (def != data)
4531 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4532 } else {
4533 /* Disable CGCG/CGLS */
4534 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4535 /* disable cgcg, cgls should be disabled */
4536 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4537 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4538 /* disable cgcg and cgls in FSM */
4539 if (def != data)
4540 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4543 amdgpu_gfx_rlc_exit_safe_mode(adev);
4546 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4547 bool enable)
4549 uint32_t def, data;
4551 amdgpu_gfx_rlc_enter_safe_mode(adev);
4553 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4554 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4555 /* unset CGCG override */
4556 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4557 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4558 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4559 else
4560 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4561 /* update CGCG and CGLS override bits */
4562 if (def != data)
4563 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4565 /* enable cgcg FSM(0x0000363F) */
4566 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4568 if (adev->asic_type == CHIP_ARCTURUS)
4569 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4570 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4571 else
4572 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4573 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4574 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4575 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4576 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4577 if (def != data)
4578 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4580 /* set IDLE_POLL_COUNT(0x00900100) */
4581 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4582 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4583 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4584 if (def != data)
4585 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4586 } else {
4587 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4588 /* reset CGCG/CGLS bits */
4589 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4590 /* disable cgcg and cgls in FSM */
4591 if (def != data)
4592 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4595 amdgpu_gfx_rlc_exit_safe_mode(adev);
4598 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4599 bool enable)
4601 if (enable) {
4602 /* CGCG/CGLS should be enabled after MGCG/MGLS
4603 * === MGCG + MGLS ===
4605 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4606 /* === CGCG /CGLS for GFX 3D Only === */
4607 gfx_v9_0_update_3d_clock_gating(adev, enable);
4608 /* === CGCG + CGLS === */
4609 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4610 } else {
4611 /* CGCG/CGLS should be disabled before MGCG/MGLS
4612 * === CGCG + CGLS ===
4614 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4615 /* === CGCG /CGLS for GFX 3D Only === */
4616 gfx_v9_0_update_3d_clock_gating(adev, enable);
4617 /* === MGCG + MGLS === */
4618 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4620 return 0;
4623 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4624 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4625 .set_safe_mode = gfx_v9_0_set_safe_mode,
4626 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4627 .init = gfx_v9_0_rlc_init,
4628 .get_csb_size = gfx_v9_0_get_csb_size,
4629 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4630 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4631 .resume = gfx_v9_0_rlc_resume,
4632 .stop = gfx_v9_0_rlc_stop,
4633 .reset = gfx_v9_0_rlc_reset,
4634 .start = gfx_v9_0_rlc_start
4637 static int gfx_v9_0_set_powergating_state(void *handle,
4638 enum amd_powergating_state state)
4640 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4641 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4643 switch (adev->asic_type) {
4644 case CHIP_RAVEN:
4645 case CHIP_RENOIR:
4646 if (!enable) {
4647 amdgpu_gfx_off_ctrl(adev, false);
4648 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4650 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4651 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4652 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4653 } else {
4654 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4655 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4658 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4659 gfx_v9_0_enable_cp_power_gating(adev, true);
4660 else
4661 gfx_v9_0_enable_cp_power_gating(adev, false);
4663 /* update gfx cgpg state */
4664 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4666 /* update mgcg state */
4667 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4669 if (enable)
4670 amdgpu_gfx_off_ctrl(adev, true);
4671 break;
4672 case CHIP_VEGA12:
4673 if (!enable) {
4674 amdgpu_gfx_off_ctrl(adev, false);
4675 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4676 } else {
4677 amdgpu_gfx_off_ctrl(adev, true);
4679 break;
4680 default:
4681 break;
4684 return 0;
4687 static int gfx_v9_0_set_clockgating_state(void *handle,
4688 enum amd_clockgating_state state)
4690 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4692 if (amdgpu_sriov_vf(adev))
4693 return 0;
4695 switch (adev->asic_type) {
4696 case CHIP_VEGA10:
4697 case CHIP_VEGA12:
4698 case CHIP_VEGA20:
4699 case CHIP_RAVEN:
4700 case CHIP_ARCTURUS:
4701 case CHIP_RENOIR:
4702 gfx_v9_0_update_gfx_clock_gating(adev,
4703 state == AMD_CG_STATE_GATE ? true : false);
4704 break;
4705 default:
4706 break;
4708 return 0;
4711 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4713 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4714 int data;
4716 if (amdgpu_sriov_vf(adev))
4717 *flags = 0;
4719 /* AMD_CG_SUPPORT_GFX_MGCG */
4720 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4721 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4722 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4724 /* AMD_CG_SUPPORT_GFX_CGCG */
4725 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4726 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4727 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4729 /* AMD_CG_SUPPORT_GFX_CGLS */
4730 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4731 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4733 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4734 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4735 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4736 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4738 /* AMD_CG_SUPPORT_GFX_CP_LS */
4739 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4740 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4741 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4743 if (adev->asic_type != CHIP_ARCTURUS) {
4744 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4745 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4746 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4747 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4749 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4750 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4751 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4755 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4757 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4760 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4762 struct amdgpu_device *adev = ring->adev;
4763 u64 wptr;
4765 /* XXX check if swapping is necessary on BE */
4766 if (ring->use_doorbell) {
4767 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4768 } else {
4769 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4770 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4773 return wptr;
4776 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4778 struct amdgpu_device *adev = ring->adev;
4780 if (ring->use_doorbell) {
4781 /* XXX check if swapping is necessary on BE */
4782 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4783 WDOORBELL64(ring->doorbell_index, ring->wptr);
4784 } else {
4785 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4786 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4790 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4792 struct amdgpu_device *adev = ring->adev;
4793 u32 ref_and_mask, reg_mem_engine;
4794 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4796 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4797 switch (ring->me) {
4798 case 1:
4799 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4800 break;
4801 case 2:
4802 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4803 break;
4804 default:
4805 return;
4807 reg_mem_engine = 0;
4808 } else {
4809 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4810 reg_mem_engine = 1; /* pfp */
4813 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4814 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4815 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4816 ref_and_mask, ref_and_mask, 0x20);
4819 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4820 struct amdgpu_job *job,
4821 struct amdgpu_ib *ib,
4822 uint32_t flags)
4824 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4825 u32 header, control = 0;
4827 if (ib->flags & AMDGPU_IB_FLAG_CE)
4828 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4829 else
4830 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4832 control |= ib->length_dw | (vmid << 24);
4834 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4835 control |= INDIRECT_BUFFER_PRE_ENB(1);
4837 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4838 gfx_v9_0_ring_emit_de_meta(ring);
4841 amdgpu_ring_write(ring, header);
4842 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4843 amdgpu_ring_write(ring,
4844 #ifdef __BIG_ENDIAN
4845 (2 << 0) |
4846 #endif
4847 lower_32_bits(ib->gpu_addr));
4848 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4849 amdgpu_ring_write(ring, control);
4852 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4853 struct amdgpu_job *job,
4854 struct amdgpu_ib *ib,
4855 uint32_t flags)
4857 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4858 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4860 /* Currently, there is a high possibility to get wave ID mismatch
4861 * between ME and GDS, leading to a hw deadlock, because ME generates
4862 * different wave IDs than the GDS expects. This situation happens
4863 * randomly when at least 5 compute pipes use GDS ordered append.
4864 * The wave IDs generated by ME are also wrong after suspend/resume.
4865 * Those are probably bugs somewhere else in the kernel driver.
4867 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4868 * GDS to 0 for this ring (me/pipe).
4870 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4871 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4872 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4873 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4876 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4877 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4878 amdgpu_ring_write(ring,
4879 #ifdef __BIG_ENDIAN
4880 (2 << 0) |
4881 #endif
4882 lower_32_bits(ib->gpu_addr));
4883 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4884 amdgpu_ring_write(ring, control);
4887 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4888 u64 seq, unsigned flags)
4890 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4891 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4892 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4894 /* RELEASE_MEM - flush caches, send int */
4895 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4896 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4897 EOP_TC_NC_ACTION_EN) :
4898 (EOP_TCL1_ACTION_EN |
4899 EOP_TC_ACTION_EN |
4900 EOP_TC_WB_ACTION_EN |
4901 EOP_TC_MD_ACTION_EN)) |
4902 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4903 EVENT_INDEX(5)));
4904 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4907 * the address should be Qword aligned if 64bit write, Dword
4908 * aligned if only send 32bit data low (discard data high)
4910 if (write64bit)
4911 BUG_ON(addr & 0x7);
4912 else
4913 BUG_ON(addr & 0x3);
4914 amdgpu_ring_write(ring, lower_32_bits(addr));
4915 amdgpu_ring_write(ring, upper_32_bits(addr));
4916 amdgpu_ring_write(ring, lower_32_bits(seq));
4917 amdgpu_ring_write(ring, upper_32_bits(seq));
4918 amdgpu_ring_write(ring, 0);
4921 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4923 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4924 uint32_t seq = ring->fence_drv.sync_seq;
4925 uint64_t addr = ring->fence_drv.gpu_addr;
4927 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4928 lower_32_bits(addr), upper_32_bits(addr),
4929 seq, 0xffffffff, 4);
4932 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4933 unsigned vmid, uint64_t pd_addr)
4935 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4937 /* compute doesn't have PFP */
4938 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4939 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4940 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4941 amdgpu_ring_write(ring, 0x0);
4945 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4947 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4950 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4952 u64 wptr;
4954 /* XXX check if swapping is necessary on BE */
4955 if (ring->use_doorbell)
4956 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4957 else
4958 BUG();
4959 return wptr;
4962 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4963 bool acquire)
4965 struct amdgpu_device *adev = ring->adev;
4966 int pipe_num, tmp, reg;
4967 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4969 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4971 /* first me only has 2 entries, GFX and HP3D */
4972 if (ring->me > 0)
4973 pipe_num -= 2;
4975 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4976 tmp = RREG32(reg);
4977 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4978 WREG32(reg, tmp);
4981 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4982 struct amdgpu_ring *ring,
4983 bool acquire)
4985 int i, pipe;
4986 bool reserve;
4987 struct amdgpu_ring *iring;
4989 mutex_lock(&adev->gfx.pipe_reserve_mutex);
4990 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4991 if (acquire)
4992 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4993 else
4994 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4996 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4997 /* Clear all reservations - everyone reacquires all resources */
4998 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4999 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5000 true);
5002 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5003 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5004 true);
5005 } else {
5006 /* Lower all pipes without a current reservation */
5007 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5008 iring = &adev->gfx.gfx_ring[i];
5009 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5010 iring->me,
5011 iring->pipe,
5013 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5014 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5017 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5018 iring = &adev->gfx.compute_ring[i];
5019 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5020 iring->me,
5021 iring->pipe,
5023 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5024 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5028 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5031 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5032 struct amdgpu_ring *ring,
5033 bool acquire)
5035 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5036 uint32_t queue_priority = acquire ? 0xf : 0x0;
5038 mutex_lock(&adev->srbm_mutex);
5039 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5041 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5042 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5044 soc15_grbm_select(adev, 0, 0, 0, 0);
5045 mutex_unlock(&adev->srbm_mutex);
5048 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5049 enum drm_sched_priority priority)
5051 struct amdgpu_device *adev = ring->adev;
5052 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5054 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5055 return;
5057 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5058 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5061 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5063 struct amdgpu_device *adev = ring->adev;
5065 /* XXX check if swapping is necessary on BE */
5066 if (ring->use_doorbell) {
5067 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5068 WDOORBELL64(ring->doorbell_index, ring->wptr);
5069 } else{
5070 BUG(); /* only DOORBELL method supported on gfx9 now */
5074 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5075 u64 seq, unsigned int flags)
5077 struct amdgpu_device *adev = ring->adev;
5079 /* we only allocate 32bit for each seq wb address */
5080 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5082 /* write fence seq to the "addr" */
5083 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5084 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5085 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5086 amdgpu_ring_write(ring, lower_32_bits(addr));
5087 amdgpu_ring_write(ring, upper_32_bits(addr));
5088 amdgpu_ring_write(ring, lower_32_bits(seq));
5090 if (flags & AMDGPU_FENCE_FLAG_INT) {
5091 /* set register to trigger INT */
5092 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5093 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5094 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5095 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5096 amdgpu_ring_write(ring, 0);
5097 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5101 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5103 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5104 amdgpu_ring_write(ring, 0);
5107 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5109 struct v9_ce_ib_state ce_payload = {0};
5110 uint64_t csa_addr;
5111 int cnt;
5113 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5114 csa_addr = amdgpu_csa_vaddr(ring->adev);
5116 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5117 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5118 WRITE_DATA_DST_SEL(8) |
5119 WR_CONFIRM) |
5120 WRITE_DATA_CACHE_POLICY(0));
5121 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5122 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5123 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5126 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5128 struct v9_de_ib_state de_payload = {0};
5129 uint64_t csa_addr, gds_addr;
5130 int cnt;
5132 csa_addr = amdgpu_csa_vaddr(ring->adev);
5133 gds_addr = csa_addr + 4096;
5134 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5135 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5137 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5138 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5139 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5140 WRITE_DATA_DST_SEL(8) |
5141 WR_CONFIRM) |
5142 WRITE_DATA_CACHE_POLICY(0));
5143 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5144 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5145 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5148 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5150 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5151 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5154 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5156 uint32_t dw2 = 0;
5158 if (amdgpu_sriov_vf(ring->adev))
5159 gfx_v9_0_ring_emit_ce_meta(ring);
5161 gfx_v9_0_ring_emit_tmz(ring, true);
5163 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5164 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5165 /* set load_global_config & load_global_uconfig */
5166 dw2 |= 0x8001;
5167 /* set load_cs_sh_regs */
5168 dw2 |= 0x01000000;
5169 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5170 dw2 |= 0x10002;
5172 /* set load_ce_ram if preamble presented */
5173 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5174 dw2 |= 0x10000000;
5175 } else {
5176 /* still load_ce_ram if this is the first time preamble presented
5177 * although there is no context switch happens.
5179 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5180 dw2 |= 0x10000000;
5183 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5184 amdgpu_ring_write(ring, dw2);
5185 amdgpu_ring_write(ring, 0);
5188 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5190 unsigned ret;
5191 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5192 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5193 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5194 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5195 ret = ring->wptr & ring->buf_mask;
5196 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5197 return ret;
5200 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5202 unsigned cur;
5203 BUG_ON(offset > ring->buf_mask);
5204 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5206 cur = (ring->wptr & ring->buf_mask) - 1;
5207 if (likely(cur > offset))
5208 ring->ring[offset] = cur - offset;
5209 else
5210 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5213 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5215 struct amdgpu_device *adev = ring->adev;
5217 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5218 amdgpu_ring_write(ring, 0 | /* src: register*/
5219 (5 << 8) | /* dst: memory */
5220 (1 << 20)); /* write confirm */
5221 amdgpu_ring_write(ring, reg);
5222 amdgpu_ring_write(ring, 0);
5223 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5224 adev->virt.reg_val_offs * 4));
5225 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5226 adev->virt.reg_val_offs * 4));
5229 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5230 uint32_t val)
5232 uint32_t cmd = 0;
5234 switch (ring->funcs->type) {
5235 case AMDGPU_RING_TYPE_GFX:
5236 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5237 break;
5238 case AMDGPU_RING_TYPE_KIQ:
5239 cmd = (1 << 16); /* no inc addr */
5240 break;
5241 default:
5242 cmd = WR_CONFIRM;
5243 break;
5245 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5246 amdgpu_ring_write(ring, cmd);
5247 amdgpu_ring_write(ring, reg);
5248 amdgpu_ring_write(ring, 0);
5249 amdgpu_ring_write(ring, val);
5252 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5253 uint32_t val, uint32_t mask)
5255 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5258 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5259 uint32_t reg0, uint32_t reg1,
5260 uint32_t ref, uint32_t mask)
5262 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5263 struct amdgpu_device *adev = ring->adev;
5264 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5265 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5267 if (fw_version_ok)
5268 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5269 ref, mask, 0x20);
5270 else
5271 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5272 ref, mask);
5275 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5277 struct amdgpu_device *adev = ring->adev;
5278 uint32_t value = 0;
5280 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5281 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5282 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5283 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5284 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5287 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5288 enum amdgpu_interrupt_state state)
5290 switch (state) {
5291 case AMDGPU_IRQ_STATE_DISABLE:
5292 case AMDGPU_IRQ_STATE_ENABLE:
5293 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5294 TIME_STAMP_INT_ENABLE,
5295 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5296 break;
5297 default:
5298 break;
5302 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5303 int me, int pipe,
5304 enum amdgpu_interrupt_state state)
5306 u32 mec_int_cntl, mec_int_cntl_reg;
5309 * amdgpu controls only the first MEC. That's why this function only
5310 * handles the setting of interrupts for this specific MEC. All other
5311 * pipes' interrupts are set by amdkfd.
5314 if (me == 1) {
5315 switch (pipe) {
5316 case 0:
5317 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5318 break;
5319 case 1:
5320 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5321 break;
5322 case 2:
5323 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5324 break;
5325 case 3:
5326 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5327 break;
5328 default:
5329 DRM_DEBUG("invalid pipe %d\n", pipe);
5330 return;
5332 } else {
5333 DRM_DEBUG("invalid me %d\n", me);
5334 return;
5337 switch (state) {
5338 case AMDGPU_IRQ_STATE_DISABLE:
5339 mec_int_cntl = RREG32(mec_int_cntl_reg);
5340 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5341 TIME_STAMP_INT_ENABLE, 0);
5342 WREG32(mec_int_cntl_reg, mec_int_cntl);
5343 break;
5344 case AMDGPU_IRQ_STATE_ENABLE:
5345 mec_int_cntl = RREG32(mec_int_cntl_reg);
5346 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5347 TIME_STAMP_INT_ENABLE, 1);
5348 WREG32(mec_int_cntl_reg, mec_int_cntl);
5349 break;
5350 default:
5351 break;
5355 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5356 struct amdgpu_irq_src *source,
5357 unsigned type,
5358 enum amdgpu_interrupt_state state)
5360 switch (state) {
5361 case AMDGPU_IRQ_STATE_DISABLE:
5362 case AMDGPU_IRQ_STATE_ENABLE:
5363 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5364 PRIV_REG_INT_ENABLE,
5365 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5366 break;
5367 default:
5368 break;
5371 return 0;
5374 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5375 struct amdgpu_irq_src *source,
5376 unsigned type,
5377 enum amdgpu_interrupt_state state)
5379 switch (state) {
5380 case AMDGPU_IRQ_STATE_DISABLE:
5381 case AMDGPU_IRQ_STATE_ENABLE:
5382 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5383 PRIV_INSTR_INT_ENABLE,
5384 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5385 default:
5386 break;
5389 return 0;
5392 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5393 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5394 CP_ECC_ERROR_INT_ENABLE, 1)
5396 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5397 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5398 CP_ECC_ERROR_INT_ENABLE, 0)
5400 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5401 struct amdgpu_irq_src *source,
5402 unsigned type,
5403 enum amdgpu_interrupt_state state)
5405 switch (state) {
5406 case AMDGPU_IRQ_STATE_DISABLE:
5407 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5408 CP_ECC_ERROR_INT_ENABLE, 0);
5409 DISABLE_ECC_ON_ME_PIPE(1, 0);
5410 DISABLE_ECC_ON_ME_PIPE(1, 1);
5411 DISABLE_ECC_ON_ME_PIPE(1, 2);
5412 DISABLE_ECC_ON_ME_PIPE(1, 3);
5413 break;
5415 case AMDGPU_IRQ_STATE_ENABLE:
5416 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5417 CP_ECC_ERROR_INT_ENABLE, 1);
5418 ENABLE_ECC_ON_ME_PIPE(1, 0);
5419 ENABLE_ECC_ON_ME_PIPE(1, 1);
5420 ENABLE_ECC_ON_ME_PIPE(1, 2);
5421 ENABLE_ECC_ON_ME_PIPE(1, 3);
5422 break;
5423 default:
5424 break;
5427 return 0;
5431 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5432 struct amdgpu_irq_src *src,
5433 unsigned type,
5434 enum amdgpu_interrupt_state state)
5436 switch (type) {
5437 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5438 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5439 break;
5440 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5441 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5442 break;
5443 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5444 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5445 break;
5446 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5447 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5448 break;
5449 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5450 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5451 break;
5452 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5453 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5454 break;
5455 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5456 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5457 break;
5458 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5459 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5460 break;
5461 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5462 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5463 break;
5464 default:
5465 break;
5467 return 0;
5470 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5471 struct amdgpu_irq_src *source,
5472 struct amdgpu_iv_entry *entry)
5474 int i;
5475 u8 me_id, pipe_id, queue_id;
5476 struct amdgpu_ring *ring;
5478 DRM_DEBUG("IH: CP EOP\n");
5479 me_id = (entry->ring_id & 0x0c) >> 2;
5480 pipe_id = (entry->ring_id & 0x03) >> 0;
5481 queue_id = (entry->ring_id & 0x70) >> 4;
5483 switch (me_id) {
5484 case 0:
5485 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5486 break;
5487 case 1:
5488 case 2:
5489 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5490 ring = &adev->gfx.compute_ring[i];
5491 /* Per-queue interrupt is supported for MEC starting from VI.
5492 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5494 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5495 amdgpu_fence_process(ring);
5497 break;
5499 return 0;
5502 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5503 struct amdgpu_iv_entry *entry)
5505 u8 me_id, pipe_id, queue_id;
5506 struct amdgpu_ring *ring;
5507 int i;
5509 me_id = (entry->ring_id & 0x0c) >> 2;
5510 pipe_id = (entry->ring_id & 0x03) >> 0;
5511 queue_id = (entry->ring_id & 0x70) >> 4;
5513 switch (me_id) {
5514 case 0:
5515 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5516 break;
5517 case 1:
5518 case 2:
5519 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5520 ring = &adev->gfx.compute_ring[i];
5521 if (ring->me == me_id && ring->pipe == pipe_id &&
5522 ring->queue == queue_id)
5523 drm_sched_fault(&ring->sched);
5525 break;
5529 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5530 struct amdgpu_irq_src *source,
5531 struct amdgpu_iv_entry *entry)
5533 DRM_ERROR("Illegal register access in command stream\n");
5534 gfx_v9_0_fault(adev, entry);
5535 return 0;
5538 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5539 struct amdgpu_irq_src *source,
5540 struct amdgpu_iv_entry *entry)
5542 DRM_ERROR("Illegal instruction in command stream\n");
5543 gfx_v9_0_fault(adev, entry);
5544 return 0;
5548 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = {
5549 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5550 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5551 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5553 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5554 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5555 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5557 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5558 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5559 0, 0
5561 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5562 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5563 0, 0
5565 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5566 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5567 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5569 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5570 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5571 0, 0
5573 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5574 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5575 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5577 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5578 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5579 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5581 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5582 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5583 0, 0
5585 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5586 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5587 0, 0
5589 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5590 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5591 0, 0
5593 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5594 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5595 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5597 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5598 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5599 0, 0
5601 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5602 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5603 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5605 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5606 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5607 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5608 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5610 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5611 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5612 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5613 0, 0
5615 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5616 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5617 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5618 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5620 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5621 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5622 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5623 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5625 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5626 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5627 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5628 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5630 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5631 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5632 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5633 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5635 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5636 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5637 0, 0
5639 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5640 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5641 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5643 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5644 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5645 0, 0
5647 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5648 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5649 0, 0
5651 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5652 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5653 0, 0
5655 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5656 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5657 0, 0
5659 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5660 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5661 0, 0
5663 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5664 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5665 0, 0
5667 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5668 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5669 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5671 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5672 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5673 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5675 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5676 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5677 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5679 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5680 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5681 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5683 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5684 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5685 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5687 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5688 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5689 0, 0
5691 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5692 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5693 0, 0
5695 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5696 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5697 0, 0
5699 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5700 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5701 0, 0
5703 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5704 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5705 0, 0
5707 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5708 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5709 0, 0
5711 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5712 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5713 0, 0
5715 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5716 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5717 0, 0
5719 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5720 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5721 0, 0
5723 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5724 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5725 0, 0
5727 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5728 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5729 0, 0
5731 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5732 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5733 0, 0
5735 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5736 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5737 0, 0
5739 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5740 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5741 0, 0
5743 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5744 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5745 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5747 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5748 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5749 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5751 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5752 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5753 0, 0
5755 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5756 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5757 0, 0
5759 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5760 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5761 0, 0
5763 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5764 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5765 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5767 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5768 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5769 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5771 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5772 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5773 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5775 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5776 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5777 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5779 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5780 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5781 0, 0
5783 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5784 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5785 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5787 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5788 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5789 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5791 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5792 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5793 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5795 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5796 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5797 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5799 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5800 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5801 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5803 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5804 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5805 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5807 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5808 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5809 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5811 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5812 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5813 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5815 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5816 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5817 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5819 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5820 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5821 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5823 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5824 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5825 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5827 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5828 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5829 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5831 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5832 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5833 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5835 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5836 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5837 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5839 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5840 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5841 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5843 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5844 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5845 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5847 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5848 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5849 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5851 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5852 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5853 0, 0
5855 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5856 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5857 0, 0
5859 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5860 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5861 0, 0
5863 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5864 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5865 0, 0
5867 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5868 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5869 0, 0
5871 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5872 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5873 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5875 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5876 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5877 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5879 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5880 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5881 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5883 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5884 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5885 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5887 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5888 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5889 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5891 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5892 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5893 0, 0
5895 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5896 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5897 0, 0
5899 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5900 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5901 0, 0
5903 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5904 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5905 0, 0
5907 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5908 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5909 0, 0
5911 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5912 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5913 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5915 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5916 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5917 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5919 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5920 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5921 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5923 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5924 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5925 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5927 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5928 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5929 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5931 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5932 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5933 0, 0
5935 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5936 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5937 0, 0
5939 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5940 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5941 0, 0
5943 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5944 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5945 0, 0
5947 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5948 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5949 0, 0
5951 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5952 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5953 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5955 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5956 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5957 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5959 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5960 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5961 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5963 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5964 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5965 0, 0
5967 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5968 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5969 0, 0
5971 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5972 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5973 0, 0
5975 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5976 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5977 0, 0
5979 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5980 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5981 0, 0
5983 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5984 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5985 0, 0
5989 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5990 void *inject_if)
5992 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5993 int ret;
5994 struct ta_ras_trigger_error_input block_info = { 0 };
5996 if (adev->asic_type != CHIP_VEGA20)
5997 return -EINVAL;
5999 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6000 return -EINVAL;
6002 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6003 return -EPERM;
6005 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6006 info->head.type)) {
6007 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6008 ras_gfx_subblocks[info->head.sub_block_index].name,
6009 info->head.type);
6010 return -EPERM;
6013 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6014 info->head.type)) {
6015 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6016 ras_gfx_subblocks[info->head.sub_block_index].name,
6017 info->head.type);
6018 return -EPERM;
6021 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6022 block_info.sub_block_index =
6023 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6024 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6025 block_info.address = info->address;
6026 block_info.value = info->value;
6028 mutex_lock(&adev->grbm_idx_mutex);
6029 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6030 mutex_unlock(&adev->grbm_idx_mutex);
6032 return ret;
6035 static const char *vml2_mems[] = {
6036 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6037 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6038 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6039 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6040 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6041 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6042 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6043 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6044 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6045 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6046 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6047 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6048 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6049 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6050 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6051 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6054 static const char *vml2_walker_mems[] = {
6055 "UTC_VML2_CACHE_PDE0_MEM0",
6056 "UTC_VML2_CACHE_PDE0_MEM1",
6057 "UTC_VML2_CACHE_PDE1_MEM0",
6058 "UTC_VML2_CACHE_PDE1_MEM1",
6059 "UTC_VML2_CACHE_PDE2_MEM0",
6060 "UTC_VML2_CACHE_PDE2_MEM1",
6061 "UTC_VML2_RDIF_LOG_FIFO",
6064 static const char *atc_l2_cache_2m_mems[] = {
6065 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6066 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6067 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6068 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6071 static const char *atc_l2_cache_4k_mems[] = {
6072 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6073 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6074 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6075 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6076 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6077 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6078 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6079 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6080 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6081 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6082 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6083 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6084 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6085 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6086 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6087 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6088 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6089 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6090 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6091 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6092 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6093 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6094 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6095 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6096 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6097 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6098 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6099 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6100 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6101 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6102 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6103 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6106 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6107 struct ras_err_data *err_data)
6109 uint32_t i, data;
6110 uint32_t sec_count, ded_count;
6112 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6113 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6114 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6115 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6116 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6117 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6118 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6119 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6121 for (i = 0; i < 16; i++) {
6122 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6123 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6125 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6126 if (sec_count) {
6127 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6128 vml2_mems[i], sec_count);
6129 err_data->ce_count += sec_count;
6132 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6133 if (ded_count) {
6134 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6135 vml2_mems[i], ded_count);
6136 err_data->ue_count += ded_count;
6140 for (i = 0; i < 7; i++) {
6141 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6142 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6144 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6145 SEC_COUNT);
6146 if (sec_count) {
6147 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6148 vml2_walker_mems[i], sec_count);
6149 err_data->ce_count += sec_count;
6152 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6153 DED_COUNT);
6154 if (ded_count) {
6155 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6156 vml2_walker_mems[i], ded_count);
6157 err_data->ue_count += ded_count;
6161 for (i = 0; i < 4; i++) {
6162 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6163 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6165 sec_count = (data & 0x00006000L) >> 0xd;
6166 if (sec_count) {
6167 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6168 atc_l2_cache_2m_mems[i], sec_count);
6169 err_data->ce_count += sec_count;
6173 for (i = 0; i < 32; i++) {
6174 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6175 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6177 sec_count = (data & 0x00006000L) >> 0xd;
6178 if (sec_count) {
6179 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6180 atc_l2_cache_4k_mems[i], sec_count);
6181 err_data->ce_count += sec_count;
6184 ded_count = (data & 0x00018000L) >> 0xf;
6185 if (ded_count) {
6186 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6187 atc_l2_cache_4k_mems[i], ded_count);
6188 err_data->ue_count += ded_count;
6192 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6193 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6194 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6195 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6197 return 0;
6200 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6201 uint32_t se_id, uint32_t inst_id, uint32_t value,
6202 uint32_t *sec_count, uint32_t *ded_count)
6204 uint32_t i;
6205 uint32_t sec_cnt, ded_cnt;
6207 for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) {
6208 if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset ||
6209 gc_ras_fields_vg20[i].seg != reg->seg ||
6210 gc_ras_fields_vg20[i].inst != reg->inst)
6211 continue;
6213 sec_cnt = (value &
6214 gc_ras_fields_vg20[i].sec_count_mask) >>
6215 gc_ras_fields_vg20[i].sec_count_shift;
6216 if (sec_cnt) {
6217 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6218 gc_ras_fields_vg20[i].name,
6219 se_id, inst_id,
6220 sec_cnt);
6221 *sec_count += sec_cnt;
6224 ded_cnt = (value &
6225 gc_ras_fields_vg20[i].ded_count_mask) >>
6226 gc_ras_fields_vg20[i].ded_count_shift;
6227 if (ded_cnt) {
6228 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6229 gc_ras_fields_vg20[i].name,
6230 se_id, inst_id,
6231 ded_cnt);
6232 *ded_count += ded_cnt;
6236 return 0;
6239 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6240 void *ras_error_status)
6242 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6243 uint32_t sec_count = 0, ded_count = 0;
6244 uint32_t i, j, k;
6245 uint32_t reg_value;
6247 if (adev->asic_type != CHIP_VEGA20)
6248 return -EINVAL;
6250 err_data->ue_count = 0;
6251 err_data->ce_count = 0;
6253 mutex_lock(&adev->grbm_idx_mutex);
6255 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6256 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6257 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6258 gfx_v9_0_select_se_sh(adev, j, 0, k);
6259 reg_value =
6260 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6261 if (reg_value)
6262 __get_ras_error_count(&sec_ded_counter_registers[i],
6263 j, k, reg_value,
6264 &sec_count, &ded_count);
6269 err_data->ce_count += sec_count;
6270 err_data->ue_count += ded_count;
6272 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6273 mutex_unlock(&adev->grbm_idx_mutex);
6275 gfx_v9_0_query_utc_edc_status(adev, err_data);
6277 return 0;
6280 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6281 .name = "gfx_v9_0",
6282 .early_init = gfx_v9_0_early_init,
6283 .late_init = gfx_v9_0_late_init,
6284 .sw_init = gfx_v9_0_sw_init,
6285 .sw_fini = gfx_v9_0_sw_fini,
6286 .hw_init = gfx_v9_0_hw_init,
6287 .hw_fini = gfx_v9_0_hw_fini,
6288 .suspend = gfx_v9_0_suspend,
6289 .resume = gfx_v9_0_resume,
6290 .is_idle = gfx_v9_0_is_idle,
6291 .wait_for_idle = gfx_v9_0_wait_for_idle,
6292 .soft_reset = gfx_v9_0_soft_reset,
6293 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6294 .set_powergating_state = gfx_v9_0_set_powergating_state,
6295 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6298 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6299 .type = AMDGPU_RING_TYPE_GFX,
6300 .align_mask = 0xff,
6301 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6302 .support_64bit_ptrs = true,
6303 .vmhub = AMDGPU_GFXHUB_0,
6304 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6305 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6306 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6307 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6308 5 + /* COND_EXEC */
6309 7 + /* PIPELINE_SYNC */
6310 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6311 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6312 2 + /* VM_FLUSH */
6313 8 + /* FENCE for VM_FLUSH */
6314 20 + /* GDS switch */
6315 4 + /* double SWITCH_BUFFER,
6316 the first COND_EXEC jump to the place just
6317 prior to this double SWITCH_BUFFER */
6318 5 + /* COND_EXEC */
6319 7 + /* HDP_flush */
6320 4 + /* VGT_flush */
6321 14 + /* CE_META */
6322 31 + /* DE_META */
6323 3 + /* CNTX_CTRL */
6324 5 + /* HDP_INVL */
6325 8 + 8 + /* FENCE x2 */
6326 2, /* SWITCH_BUFFER */
6327 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6328 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6329 .emit_fence = gfx_v9_0_ring_emit_fence,
6330 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6331 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6332 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6333 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6334 .test_ring = gfx_v9_0_ring_test_ring,
6335 .test_ib = gfx_v9_0_ring_test_ib,
6336 .insert_nop = amdgpu_ring_insert_nop,
6337 .pad_ib = amdgpu_ring_generic_pad_ib,
6338 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6339 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6340 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6341 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6342 .emit_tmz = gfx_v9_0_ring_emit_tmz,
6343 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6344 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6345 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6346 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6349 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6350 .type = AMDGPU_RING_TYPE_COMPUTE,
6351 .align_mask = 0xff,
6352 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6353 .support_64bit_ptrs = true,
6354 .vmhub = AMDGPU_GFXHUB_0,
6355 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6356 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6357 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6358 .emit_frame_size =
6359 20 + /* gfx_v9_0_ring_emit_gds_switch */
6360 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6361 5 + /* hdp invalidate */
6362 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6363 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6364 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6365 2 + /* gfx_v9_0_ring_emit_vm_flush */
6366 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6367 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6368 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6369 .emit_fence = gfx_v9_0_ring_emit_fence,
6370 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6371 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6372 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6373 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6374 .test_ring = gfx_v9_0_ring_test_ring,
6375 .test_ib = gfx_v9_0_ring_test_ib,
6376 .insert_nop = amdgpu_ring_insert_nop,
6377 .pad_ib = amdgpu_ring_generic_pad_ib,
6378 .set_priority = gfx_v9_0_ring_set_priority_compute,
6379 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6380 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6381 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6384 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6385 .type = AMDGPU_RING_TYPE_KIQ,
6386 .align_mask = 0xff,
6387 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6388 .support_64bit_ptrs = true,
6389 .vmhub = AMDGPU_GFXHUB_0,
6390 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6391 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6392 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6393 .emit_frame_size =
6394 20 + /* gfx_v9_0_ring_emit_gds_switch */
6395 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6396 5 + /* hdp invalidate */
6397 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6398 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6399 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6400 2 + /* gfx_v9_0_ring_emit_vm_flush */
6401 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6402 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6403 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6404 .test_ring = gfx_v9_0_ring_test_ring,
6405 .insert_nop = amdgpu_ring_insert_nop,
6406 .pad_ib = amdgpu_ring_generic_pad_ib,
6407 .emit_rreg = gfx_v9_0_ring_emit_rreg,
6408 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6409 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6410 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6413 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6415 int i;
6417 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6419 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6420 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6422 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6423 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6426 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6427 .set = gfx_v9_0_set_eop_interrupt_state,
6428 .process = gfx_v9_0_eop_irq,
6431 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6432 .set = gfx_v9_0_set_priv_reg_fault_state,
6433 .process = gfx_v9_0_priv_reg_irq,
6436 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6437 .set = gfx_v9_0_set_priv_inst_fault_state,
6438 .process = gfx_v9_0_priv_inst_irq,
6441 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6442 .set = gfx_v9_0_set_cp_ecc_error_state,
6443 .process = amdgpu_gfx_cp_ecc_error_irq,
6447 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6449 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6450 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6452 adev->gfx.priv_reg_irq.num_types = 1;
6453 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6455 adev->gfx.priv_inst_irq.num_types = 1;
6456 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6458 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6459 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6462 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6464 switch (adev->asic_type) {
6465 case CHIP_VEGA10:
6466 case CHIP_VEGA12:
6467 case CHIP_VEGA20:
6468 case CHIP_RAVEN:
6469 case CHIP_ARCTURUS:
6470 case CHIP_RENOIR:
6471 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6472 break;
6473 default:
6474 break;
6478 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6480 /* init asci gds info */
6481 switch (adev->asic_type) {
6482 case CHIP_VEGA10:
6483 case CHIP_VEGA12:
6484 case CHIP_VEGA20:
6485 adev->gds.gds_size = 0x10000;
6486 break;
6487 case CHIP_RAVEN:
6488 case CHIP_ARCTURUS:
6489 adev->gds.gds_size = 0x1000;
6490 break;
6491 default:
6492 adev->gds.gds_size = 0x10000;
6493 break;
6496 switch (adev->asic_type) {
6497 case CHIP_VEGA10:
6498 case CHIP_VEGA20:
6499 adev->gds.gds_compute_max_wave_id = 0x7ff;
6500 break;
6501 case CHIP_VEGA12:
6502 adev->gds.gds_compute_max_wave_id = 0x27f;
6503 break;
6504 case CHIP_RAVEN:
6505 if (adev->rev_id >= 0x8)
6506 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6507 else
6508 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6509 break;
6510 case CHIP_ARCTURUS:
6511 adev->gds.gds_compute_max_wave_id = 0xfff;
6512 break;
6513 default:
6514 /* this really depends on the chip */
6515 adev->gds.gds_compute_max_wave_id = 0x7ff;
6516 break;
6519 adev->gds.gws_size = 64;
6520 adev->gds.oa_size = 16;
6523 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6524 u32 bitmap)
6526 u32 data;
6528 if (!bitmap)
6529 return;
6531 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6532 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6534 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6537 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6539 u32 data, mask;
6541 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6542 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6544 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6545 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6547 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6549 return (~data) & mask;
6552 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6553 struct amdgpu_cu_info *cu_info)
6555 int i, j, k, counter, active_cu_number = 0;
6556 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6557 unsigned disable_masks[4 * 4];
6559 if (!adev || !cu_info)
6560 return -EINVAL;
6563 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6565 if (adev->gfx.config.max_shader_engines *
6566 adev->gfx.config.max_sh_per_se > 16)
6567 return -EINVAL;
6569 amdgpu_gfx_parse_disable_cu(disable_masks,
6570 adev->gfx.config.max_shader_engines,
6571 adev->gfx.config.max_sh_per_se);
6573 mutex_lock(&adev->grbm_idx_mutex);
6574 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6575 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6576 mask = 1;
6577 ao_bitmap = 0;
6578 counter = 0;
6579 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6580 gfx_v9_0_set_user_cu_inactive_bitmap(
6581 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6582 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6585 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6586 * 4x4 size array, and it's usually suitable for Vega
6587 * ASICs which has 4*2 SE/SH layout.
6588 * But for Arcturus, SE/SH layout is changed to 8*1.
6589 * To mostly reduce the impact, we make it compatible
6590 * with current bitmap array as below:
6591 * SE4,SH0 --> bitmap[0][1]
6592 * SE5,SH0 --> bitmap[1][1]
6593 * SE6,SH0 --> bitmap[2][1]
6594 * SE7,SH0 --> bitmap[3][1]
6596 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6598 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6599 if (bitmap & mask) {
6600 if (counter < adev->gfx.config.max_cu_per_sh)
6601 ao_bitmap |= mask;
6602 counter ++;
6604 mask <<= 1;
6606 active_cu_number += counter;
6607 if (i < 2 && j < 2)
6608 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6609 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6612 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6613 mutex_unlock(&adev->grbm_idx_mutex);
6615 cu_info->number = active_cu_number;
6616 cu_info->ao_cu_mask = ao_cu_mask;
6617 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6619 return 0;
6622 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6624 .type = AMD_IP_BLOCK_TYPE_GFX,
6625 .major = 9,
6626 .minor = 0,
6627 .rev = 0,
6628 .funcs = &gfx_v9_0_ip_funcs,