2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
30 #include "amdgpu_atomfirmware.h"
32 #include "gc/gc_9_0_offset.h"
33 #include "gc/gc_9_0_sh_mask.h"
34 #include "vega10_enum.h"
35 #include "hdp/hdp_4_0_offset.h"
37 #include "soc15_common.h"
38 #include "clearstate_gfx9.h"
39 #include "v9_structs.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
43 #define GFX9_NUM_GFX_RINGS 1
44 #define GFX9_MEC_HPD_SIZE 4096
45 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
46 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
48 #define mmPWR_MISC_CNTL_STATUS 0x0183
49 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
50 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
51 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
52 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
53 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
55 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
56 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
57 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
58 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
59 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
60 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
62 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
63 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
65 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
66 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
67 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
69 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
77 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/raven_me.bin");
79 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
80 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
83 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
84 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
86 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
87 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
89 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
91 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
92 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
94 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
95 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
98 static const struct soc15_reg_golden golden_settings_gc_9_0
[] =
100 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xf00fffff, 0x00000400),
101 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0x80000000, 0x80000000),
102 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_GPU_ID
, 0x0000000f, 0x00000000),
103 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
, 0x00000003, 0x82400024),
104 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x00000001),
105 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
106 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSH_MEM_CONFIG
, 0x00001000, 0x00001000),
107 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_0
, 0x0007ffff, 0x00000800),
108 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_1
, 0x0007ffff, 0x00000800),
109 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0x01ffffff, 0x0000ff87),
110 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0x01ffffff, 0x0000ff8f),
111 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQC_CONFIG
, 0x03000000, 0x020a2000),
112 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfffffeef, 0x010b0000),
113 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x4a2c0e68),
114 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0xb5d3f197),
115 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_CACHE_INVALIDATION
, 0x3fff3af3, 0x19200000),
116 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_GS_MAX_WAVE_ID
, 0x00000fff, 0x000003ff),
117 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_MEC1_F32_INT_DIS
, 0x00000000, 0x00000800),
118 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_MEC2_F32_INT_DIS
, 0x00000000, 0x00000800),
119 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_DEBUG
, 0x00000000, 0x00008000)
122 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10
[] =
124 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL
, 0x0000f000, 0x00012107),
125 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_3
, 0x30000000, 0x10000000),
126 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPC_UTCL1_CNTL
, 0x08000000, 0x08000080),
127 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPF_UTCL1_CNTL
, 0x08000000, 0x08000080),
128 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPG_UTCL1_CNTL
, 0x08000000, 0x08000080),
129 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xffff77ff, 0x2a114042),
130 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xffff77ff, 0x2a114042),
131 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmIA_UTCL1_CNTL
, 0x08000000, 0x08000080),
132 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0x00008000, 0x00048000),
133 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_0
, 0x08000000, 0x08000080),
134 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_1
, 0x08000000, 0x08000080),
135 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_2
, 0x08000000, 0x08000080),
136 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_PREWALKER_UTCL1_CNTL
, 0x08000000, 0x08000080),
137 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_SPM_UTCL1_CNTL
, 0x08000000, 0x08000080),
138 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_UTCL1_CNTL2
, 0x00030000, 0x00020000),
139 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0x0000000f, 0x01000107),
140 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x00001800, 0x00000800),
141 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmWD_UTCL1_CNTL
, 0x08000000, 0x08000080)
144 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20
[] =
146 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_DCC_CONFIG
, 0x0f000080, 0x04000080),
147 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_2
, 0x0f000000, 0x0a000000),
148 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_3
, 0x30000000, 0x10000000),
149 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xf3e777ff, 0x22014042),
150 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xf3e777ff, 0x22014042),
151 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0x00003e00, 0x00000400),
152 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xff840000, 0x04040000),
153 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_UTCL1_CNTL2
, 0x00030000, 0x00030000),
154 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0xffff010f, 0x01000107),
155 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0x000b0000, 0x000b0000),
156 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x01000000, 0x01000000)
159 static const struct soc15_reg_golden golden_settings_gc_9_1
[] =
161 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL
, 0xfffdf3cf, 0x00014104),
162 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPC_UTCL1_CNTL
, 0x08000000, 0x08000080),
163 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPF_UTCL1_CNTL
, 0x08000000, 0x08000080),
164 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPG_UTCL1_CNTL
, 0x08000000, 0x08000080),
165 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xf00fffff, 0x00000420),
166 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_GPU_ID
, 0x0000000f, 0x00000000),
167 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmIA_UTCL1_CNTL
, 0x08000000, 0x08000080),
168 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
, 0x00000003, 0x82400024),
169 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x00000001),
170 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
171 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_0
, 0x08000000, 0x08000080),
172 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_1
, 0x08000000, 0x08000080),
173 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_2
, 0x08000000, 0x08000080),
174 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_PREWALKER_UTCL1_CNTL
, 0x08000000, 0x08000080),
175 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_SPM_UTCL1_CNTL
, 0x08000000, 0x08000080),
176 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfffffeef, 0x010b0000),
177 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000),
178 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00003120),
179 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_CACHE_INVALIDATION
, 0x3fff3af3, 0x19200000),
180 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_GS_MAX_WAVE_ID
, 0x00000fff, 0x000000ff),
181 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmWD_UTCL1_CNTL
, 0x08000000, 0x08000080),
182 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_MEC1_F32_INT_DIS
, 0x00000000, 0x00000800),
183 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_MEC2_F32_INT_DIS
, 0x00000000, 0x00000800),
184 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_DEBUG
, 0x00000000, 0x00008000)
187 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1
[] =
189 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_3
, 0x30000000, 0x10000000),
190 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xffff77ff, 0x24000042),
191 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xffff77ff, 0x24000042),
192 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xffffffff, 0x04048000),
193 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_MODE_CNTL_1
, 0x06000000, 0x06000000),
194 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_UTCL1_CNTL2
, 0x00030000, 0x00020000),
195 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x01bd9f33, 0x00000800)
198 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2
[] =
200 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_DCC_CONFIG
, 0xff7fffff, 0x04000000),
201 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL
, 0xfffdf3cf, 0x00014104),
202 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_2
, 0xff7fffff, 0x0a000000),
203 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPC_UTCL1_CNTL
, 0x7f0fffff, 0x08000080),
204 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPF_UTCL1_CNTL
, 0xff8fffff, 0x08000080),
205 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPG_UTCL1_CNTL
, 0x7f8fffff, 0x08000080),
206 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xffff77ff, 0x26013041),
207 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xffff77ff, 0x26013041),
208 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmIA_UTCL1_CNTL
, 0x3f8fffff, 0x08000080),
209 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xffffffff, 0x04040000),
210 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_0
, 0xff0fffff, 0x08000080),
211 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_1
, 0xff0fffff, 0x08000080),
212 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_2
, 0xff0fffff, 0x08000080),
213 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_PREWALKER_UTCL1_CNTL
, 0xff0fffff, 0x08000080),
214 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_SPM_UTCL1_CNTL
, 0xff0fffff, 0x08000080),
215 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000),
216 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00000010),
217 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x01bd9f33, 0x01000000),
218 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmWD_UTCL1_CNTL
, 0x3f8fffff, 0x08000080),
221 static const struct soc15_reg_golden golden_settings_gc_9_x_common
[] =
223 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_SD_CNTL
, 0xffffffff, 0x000001ff),
224 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGRBM_CAM_INDEX
, 0xffffffff, 0x00000000),
225 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGRBM_CAM_DATA
, 0xffffffff, 0x2544c382)
228 static const struct soc15_reg_golden golden_settings_gc_9_2_1
[] =
230 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xf00fffff, 0x00000420),
231 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_GPU_ID
, 0x0000000f, 0x00000000),
232 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
, 0x00000003, 0x82400024),
233 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x00000001),
234 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
235 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSH_MEM_CONFIG
, 0x00001000, 0x00001000),
236 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_0
, 0x0007ffff, 0x00000800),
237 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_1
, 0x0007ffff, 0x00000800),
238 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0x01ffffff, 0x0000ff87),
239 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0x01ffffff, 0x0000ff8f),
240 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQC_CONFIG
, 0x03000000, 0x020a2000),
241 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfffffeef, 0x010b0000),
242 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x4a2c0e68),
243 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0xb5d3f197),
244 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_CACHE_INVALIDATION
, 0x3fff3af3, 0x19200000),
245 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_GS_MAX_WAVE_ID
, 0x00000fff, 0x000003ff)
248 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12
[] =
250 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_DCC_CONFIG
, 0x00000080, 0x04000080),
251 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL
, 0xfffdf3cf, 0x00014104),
252 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_2
, 0x0f000000, 0x0a000000),
253 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xffff77ff, 0x24104041),
254 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xffff77ff, 0x24104041),
255 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xffffffff, 0x04040000),
256 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0xffff03ff, 0x01000107),
257 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000),
258 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x76325410),
259 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x01bd9f33, 0x01000000),
260 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_MEC1_F32_INT_DIS
, 0x00000000, 0x00000800),
261 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_MEC2_F32_INT_DIS
, 0x00000000, 0x00000800),
262 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_DEBUG
, 0x00000000, 0x00008000)
265 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS
[] =
267 mmRLC_SRM_INDEX_CNTL_ADDR_0
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
268 mmRLC_SRM_INDEX_CNTL_ADDR_1
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
269 mmRLC_SRM_INDEX_CNTL_ADDR_2
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
270 mmRLC_SRM_INDEX_CNTL_ADDR_3
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
271 mmRLC_SRM_INDEX_CNTL_ADDR_4
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
272 mmRLC_SRM_INDEX_CNTL_ADDR_5
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
273 mmRLC_SRM_INDEX_CNTL_ADDR_6
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
274 mmRLC_SRM_INDEX_CNTL_ADDR_7
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
277 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS
[] =
279 mmRLC_SRM_INDEX_CNTL_DATA_0
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
280 mmRLC_SRM_INDEX_CNTL_DATA_1
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
281 mmRLC_SRM_INDEX_CNTL_DATA_2
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
282 mmRLC_SRM_INDEX_CNTL_DATA_3
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
283 mmRLC_SRM_INDEX_CNTL_DATA_4
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
284 mmRLC_SRM_INDEX_CNTL_DATA_5
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
285 mmRLC_SRM_INDEX_CNTL_DATA_6
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
286 mmRLC_SRM_INDEX_CNTL_DATA_7
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
289 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
290 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
291 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
292 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
294 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
);
295 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
);
296 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
);
297 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
);
298 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
299 struct amdgpu_cu_info
*cu_info
);
300 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
);
301 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
);
302 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring
*ring
);
304 static void gfx_v9_0_init_golden_registers(struct amdgpu_device
*adev
)
306 switch (adev
->asic_type
) {
308 soc15_program_register_sequence(adev
,
309 golden_settings_gc_9_0
,
310 ARRAY_SIZE(golden_settings_gc_9_0
));
311 soc15_program_register_sequence(adev
,
312 golden_settings_gc_9_0_vg10
,
313 ARRAY_SIZE(golden_settings_gc_9_0_vg10
));
316 soc15_program_register_sequence(adev
,
317 golden_settings_gc_9_2_1
,
318 ARRAY_SIZE(golden_settings_gc_9_2_1
));
319 soc15_program_register_sequence(adev
,
320 golden_settings_gc_9_2_1_vg12
,
321 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12
));
324 soc15_program_register_sequence(adev
,
325 golden_settings_gc_9_0
,
326 ARRAY_SIZE(golden_settings_gc_9_0
));
327 soc15_program_register_sequence(adev
,
328 golden_settings_gc_9_0_vg20
,
329 ARRAY_SIZE(golden_settings_gc_9_0_vg20
));
332 soc15_program_register_sequence(adev
, golden_settings_gc_9_1
,
333 ARRAY_SIZE(golden_settings_gc_9_1
));
334 if (adev
->rev_id
>= 8)
335 soc15_program_register_sequence(adev
,
336 golden_settings_gc_9_1_rv2
,
337 ARRAY_SIZE(golden_settings_gc_9_1_rv2
));
339 soc15_program_register_sequence(adev
,
340 golden_settings_gc_9_1_rv1
,
341 ARRAY_SIZE(golden_settings_gc_9_1_rv1
));
347 soc15_program_register_sequence(adev
, golden_settings_gc_9_x_common
,
348 (const u32
)ARRAY_SIZE(golden_settings_gc_9_x_common
));
351 static void gfx_v9_0_scratch_init(struct amdgpu_device
*adev
)
353 adev
->gfx
.scratch
.num_reg
= 8;
354 adev
->gfx
.scratch
.reg_base
= SOC15_REG_OFFSET(GC
, 0, mmSCRATCH_REG0
);
355 adev
->gfx
.scratch
.free_mask
= (1u << adev
->gfx
.scratch
.num_reg
) - 1;
358 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring
*ring
, int eng_sel
,
359 bool wc
, uint32_t reg
, uint32_t val
)
361 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
362 amdgpu_ring_write(ring
, WRITE_DATA_ENGINE_SEL(eng_sel
) |
363 WRITE_DATA_DST_SEL(0) |
364 (wc
? WR_CONFIRM
: 0));
365 amdgpu_ring_write(ring
, reg
);
366 amdgpu_ring_write(ring
, 0);
367 amdgpu_ring_write(ring
, val
);
370 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring
*ring
, int eng_sel
,
371 int mem_space
, int opt
, uint32_t addr0
,
372 uint32_t addr1
, uint32_t ref
, uint32_t mask
,
375 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
376 amdgpu_ring_write(ring
,
377 /* memory (1) or register (0) */
378 (WAIT_REG_MEM_MEM_SPACE(mem_space
) |
379 WAIT_REG_MEM_OPERATION(opt
) | /* wait */
380 WAIT_REG_MEM_FUNCTION(3) | /* equal */
381 WAIT_REG_MEM_ENGINE(eng_sel
)));
384 BUG_ON(addr0
& 0x3); /* Dword align */
385 amdgpu_ring_write(ring
, addr0
);
386 amdgpu_ring_write(ring
, addr1
);
387 amdgpu_ring_write(ring
, ref
);
388 amdgpu_ring_write(ring
, mask
);
389 amdgpu_ring_write(ring
, inv
); /* poll interval */
392 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring
*ring
)
394 struct amdgpu_device
*adev
= ring
->adev
;
400 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
404 WREG32(scratch
, 0xCAFEDEAD);
405 r
= amdgpu_ring_alloc(ring
, 3);
407 goto error_free_scratch
;
409 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
410 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
411 amdgpu_ring_write(ring
, 0xDEADBEEF);
412 amdgpu_ring_commit(ring
);
414 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
415 tmp
= RREG32(scratch
);
416 if (tmp
== 0xDEADBEEF)
421 if (i
>= adev
->usec_timeout
)
425 amdgpu_gfx_scratch_free(adev
, scratch
);
429 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
431 struct amdgpu_device
*adev
= ring
->adev
;
433 struct dma_fence
*f
= NULL
;
440 r
= amdgpu_device_wb_get(adev
, &index
);
444 gpu_addr
= adev
->wb
.gpu_addr
+ (index
* 4);
445 adev
->wb
.wb
[index
] = cpu_to_le32(0xCAFEDEAD);
446 memset(&ib
, 0, sizeof(ib
));
447 r
= amdgpu_ib_get(adev
, NULL
, 16, &ib
);
451 ib
.ptr
[0] = PACKET3(PACKET3_WRITE_DATA
, 3);
452 ib
.ptr
[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM
;
453 ib
.ptr
[2] = lower_32_bits(gpu_addr
);
454 ib
.ptr
[3] = upper_32_bits(gpu_addr
);
455 ib
.ptr
[4] = 0xDEADBEEF;
458 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
462 r
= dma_fence_wait_timeout(f
, false, timeout
);
470 tmp
= adev
->wb
.wb
[index
];
471 if (tmp
== 0xDEADBEEF)
477 amdgpu_ib_free(adev
, &ib
, NULL
);
480 amdgpu_device_wb_free(adev
, index
);
485 static void gfx_v9_0_free_microcode(struct amdgpu_device
*adev
)
487 release_firmware(adev
->gfx
.pfp_fw
);
488 adev
->gfx
.pfp_fw
= NULL
;
489 release_firmware(adev
->gfx
.me_fw
);
490 adev
->gfx
.me_fw
= NULL
;
491 release_firmware(adev
->gfx
.ce_fw
);
492 adev
->gfx
.ce_fw
= NULL
;
493 release_firmware(adev
->gfx
.rlc_fw
);
494 adev
->gfx
.rlc_fw
= NULL
;
495 release_firmware(adev
->gfx
.mec_fw
);
496 adev
->gfx
.mec_fw
= NULL
;
497 release_firmware(adev
->gfx
.mec2_fw
);
498 adev
->gfx
.mec2_fw
= NULL
;
500 kfree(adev
->gfx
.rlc
.register_list_format
);
503 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device
*adev
)
505 const struct rlc_firmware_header_v2_1
*rlc_hdr
;
507 rlc_hdr
= (const struct rlc_firmware_header_v2_1
*)adev
->gfx
.rlc_fw
->data
;
508 adev
->gfx
.rlc_srlc_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_ucode_ver
);
509 adev
->gfx
.rlc_srlc_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_feature_ver
);
510 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_size_bytes
);
511 adev
->gfx
.rlc
.save_restore_list_cntl
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_cntl_offset_bytes
);
512 adev
->gfx
.rlc_srlg_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_ucode_ver
);
513 adev
->gfx
.rlc_srlg_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_feature_ver
);
514 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_size_bytes
);
515 adev
->gfx
.rlc
.save_restore_list_gpm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_gpm_offset_bytes
);
516 adev
->gfx
.rlc_srls_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_ucode_ver
);
517 adev
->gfx
.rlc_srls_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_feature_ver
);
518 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_size_bytes
);
519 adev
->gfx
.rlc
.save_restore_list_srm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_srm_offset_bytes
);
520 adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
=
521 le32_to_cpu(rlc_hdr
->reg_list_format_direct_reg_list_length
);
524 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device
*adev
)
526 adev
->gfx
.me_fw_write_wait
= false;
527 adev
->gfx
.mec_fw_write_wait
= false;
529 switch (adev
->asic_type
) {
531 if ((adev
->gfx
.me_fw_version
>= 0x0000009c) &&
532 (adev
->gfx
.me_feature_version
>= 42) &&
533 (adev
->gfx
.pfp_fw_version
>= 0x000000b1) &&
534 (adev
->gfx
.pfp_feature_version
>= 42))
535 adev
->gfx
.me_fw_write_wait
= true;
537 if ((adev
->gfx
.mec_fw_version
>= 0x00000193) &&
538 (adev
->gfx
.mec_feature_version
>= 42))
539 adev
->gfx
.mec_fw_write_wait
= true;
542 if ((adev
->gfx
.me_fw_version
>= 0x0000009c) &&
543 (adev
->gfx
.me_feature_version
>= 44) &&
544 (adev
->gfx
.pfp_fw_version
>= 0x000000b2) &&
545 (adev
->gfx
.pfp_feature_version
>= 44))
546 adev
->gfx
.me_fw_write_wait
= true;
548 if ((adev
->gfx
.mec_fw_version
>= 0x00000196) &&
549 (adev
->gfx
.mec_feature_version
>= 44))
550 adev
->gfx
.mec_fw_write_wait
= true;
553 if ((adev
->gfx
.me_fw_version
>= 0x0000009c) &&
554 (adev
->gfx
.me_feature_version
>= 44) &&
555 (adev
->gfx
.pfp_fw_version
>= 0x000000b2) &&
556 (adev
->gfx
.pfp_feature_version
>= 44))
557 adev
->gfx
.me_fw_write_wait
= true;
559 if ((adev
->gfx
.mec_fw_version
>= 0x00000197) &&
560 (adev
->gfx
.mec_feature_version
>= 44))
561 adev
->gfx
.mec_fw_write_wait
= true;
564 if ((adev
->gfx
.me_fw_version
>= 0x0000009c) &&
565 (adev
->gfx
.me_feature_version
>= 42) &&
566 (adev
->gfx
.pfp_fw_version
>= 0x000000b1) &&
567 (adev
->gfx
.pfp_feature_version
>= 42))
568 adev
->gfx
.me_fw_write_wait
= true;
570 if ((adev
->gfx
.mec_fw_version
>= 0x00000192) &&
571 (adev
->gfx
.mec_feature_version
>= 42))
572 adev
->gfx
.mec_fw_write_wait
= true;
579 static int gfx_v9_0_init_microcode(struct amdgpu_device
*adev
)
581 const char *chip_name
;
584 struct amdgpu_firmware_info
*info
= NULL
;
585 const struct common_firmware_header
*header
= NULL
;
586 const struct gfx_firmware_header_v1_0
*cp_hdr
;
587 const struct rlc_firmware_header_v2_0
*rlc_hdr
;
588 unsigned int *tmp
= NULL
;
590 uint16_t version_major
;
591 uint16_t version_minor
;
595 switch (adev
->asic_type
) {
597 chip_name
= "vega10";
600 chip_name
= "vega12";
603 chip_name
= "vega20";
606 if (adev
->rev_id
>= 8)
607 chip_name
= "raven2";
608 else if (adev
->pdev
->device
== 0x15d8)
609 chip_name
= "picasso";
617 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp.bin", chip_name
);
618 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
621 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
624 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
625 adev
->gfx
.pfp_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
626 adev
->gfx
.pfp_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
628 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me.bin", chip_name
);
629 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
632 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
635 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
636 adev
->gfx
.me_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
637 adev
->gfx
.me_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
639 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce.bin", chip_name
);
640 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
643 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
646 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
647 adev
->gfx
.ce_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
648 adev
->gfx
.ce_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
651 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
652 * instead of picasso_rlc.bin.
654 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
655 * or revision >= 0xD8 && revision <= 0xDF
656 * otherwise is PCO FP5
658 if (!strcmp(chip_name
, "picasso") &&
659 (((adev
->pdev
->revision
>= 0xC8) && (adev
->pdev
->revision
<= 0xCF)) ||
660 ((adev
->pdev
->revision
>= 0xD8) && (adev
->pdev
->revision
<= 0xDF))))
661 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc_am4.bin", chip_name
);
663 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc.bin", chip_name
);
664 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
667 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
668 rlc_hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
670 version_major
= le16_to_cpu(rlc_hdr
->header
.header_version_major
);
671 version_minor
= le16_to_cpu(rlc_hdr
->header
.header_version_minor
);
672 if (version_major
== 2 && version_minor
== 1)
673 adev
->gfx
.rlc
.is_rlc_v2_1
= true;
675 adev
->gfx
.rlc_fw_version
= le32_to_cpu(rlc_hdr
->header
.ucode_version
);
676 adev
->gfx
.rlc_feature_version
= le32_to_cpu(rlc_hdr
->ucode_feature_version
);
677 adev
->gfx
.rlc
.save_and_restore_offset
=
678 le32_to_cpu(rlc_hdr
->save_and_restore_offset
);
679 adev
->gfx
.rlc
.clear_state_descriptor_offset
=
680 le32_to_cpu(rlc_hdr
->clear_state_descriptor_offset
);
681 adev
->gfx
.rlc
.avail_scratch_ram_locations
=
682 le32_to_cpu(rlc_hdr
->avail_scratch_ram_locations
);
683 adev
->gfx
.rlc
.reg_restore_list_size
=
684 le32_to_cpu(rlc_hdr
->reg_restore_list_size
);
685 adev
->gfx
.rlc
.reg_list_format_start
=
686 le32_to_cpu(rlc_hdr
->reg_list_format_start
);
687 adev
->gfx
.rlc
.reg_list_format_separate_start
=
688 le32_to_cpu(rlc_hdr
->reg_list_format_separate_start
);
689 adev
->gfx
.rlc
.starting_offsets_start
=
690 le32_to_cpu(rlc_hdr
->starting_offsets_start
);
691 adev
->gfx
.rlc
.reg_list_format_size_bytes
=
692 le32_to_cpu(rlc_hdr
->reg_list_format_size_bytes
);
693 adev
->gfx
.rlc
.reg_list_size_bytes
=
694 le32_to_cpu(rlc_hdr
->reg_list_size_bytes
);
695 adev
->gfx
.rlc
.register_list_format
=
696 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
+
697 adev
->gfx
.rlc
.reg_list_size_bytes
, GFP_KERNEL
);
698 if (!adev
->gfx
.rlc
.register_list_format
) {
703 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
704 le32_to_cpu(rlc_hdr
->reg_list_format_array_offset_bytes
));
705 for (i
= 0 ; i
< (adev
->gfx
.rlc
.reg_list_format_size_bytes
>> 2); i
++)
706 adev
->gfx
.rlc
.register_list_format
[i
] = le32_to_cpu(tmp
[i
]);
708 adev
->gfx
.rlc
.register_restore
= adev
->gfx
.rlc
.register_list_format
+ i
;
710 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
711 le32_to_cpu(rlc_hdr
->reg_list_array_offset_bytes
));
712 for (i
= 0 ; i
< (adev
->gfx
.rlc
.reg_list_size_bytes
>> 2); i
++)
713 adev
->gfx
.rlc
.register_restore
[i
] = le32_to_cpu(tmp
[i
]);
715 if (adev
->gfx
.rlc
.is_rlc_v2_1
)
716 gfx_v9_0_init_rlc_ext_microcode(adev
);
718 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec.bin", chip_name
);
719 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
722 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
725 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
726 adev
->gfx
.mec_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
727 adev
->gfx
.mec_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
730 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2.bin", chip_name
);
731 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
733 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
736 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
737 adev
->gfx
.mec2_fw
->data
;
738 adev
->gfx
.mec2_fw_version
=
739 le32_to_cpu(cp_hdr
->header
.ucode_version
);
740 adev
->gfx
.mec2_feature_version
=
741 le32_to_cpu(cp_hdr
->ucode_feature_version
);
744 adev
->gfx
.mec2_fw
= NULL
;
747 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
748 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
749 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
750 info
->fw
= adev
->gfx
.pfp_fw
;
751 header
= (const struct common_firmware_header
*)info
->fw
->data
;
752 adev
->firmware
.fw_size
+=
753 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
755 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
756 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
757 info
->fw
= adev
->gfx
.me_fw
;
758 header
= (const struct common_firmware_header
*)info
->fw
->data
;
759 adev
->firmware
.fw_size
+=
760 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
762 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
763 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
764 info
->fw
= adev
->gfx
.ce_fw
;
765 header
= (const struct common_firmware_header
*)info
->fw
->data
;
766 adev
->firmware
.fw_size
+=
767 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
769 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
770 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
771 info
->fw
= adev
->gfx
.rlc_fw
;
772 header
= (const struct common_firmware_header
*)info
->fw
->data
;
773 adev
->firmware
.fw_size
+=
774 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
776 if (adev
->gfx
.rlc
.is_rlc_v2_1
&&
777 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
&&
778 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
&&
779 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
) {
780 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
];
781 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
;
782 info
->fw
= adev
->gfx
.rlc_fw
;
783 adev
->firmware
.fw_size
+=
784 ALIGN(adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
, PAGE_SIZE
);
786 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
];
787 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
;
788 info
->fw
= adev
->gfx
.rlc_fw
;
789 adev
->firmware
.fw_size
+=
790 ALIGN(adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
, PAGE_SIZE
);
792 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
];
793 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
;
794 info
->fw
= adev
->gfx
.rlc_fw
;
795 adev
->firmware
.fw_size
+=
796 ALIGN(adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
, PAGE_SIZE
);
799 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
800 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
801 info
->fw
= adev
->gfx
.mec_fw
;
802 header
= (const struct common_firmware_header
*)info
->fw
->data
;
803 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
804 adev
->firmware
.fw_size
+=
805 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
807 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1_JT
];
808 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1_JT
;
809 info
->fw
= adev
->gfx
.mec_fw
;
810 adev
->firmware
.fw_size
+=
811 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
813 if (adev
->gfx
.mec2_fw
) {
814 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
815 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
816 info
->fw
= adev
->gfx
.mec2_fw
;
817 header
= (const struct common_firmware_header
*)info
->fw
->data
;
818 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
819 adev
->firmware
.fw_size
+=
820 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
821 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2_JT
];
822 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2_JT
;
823 info
->fw
= adev
->gfx
.mec2_fw
;
824 adev
->firmware
.fw_size
+=
825 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
831 gfx_v9_0_check_fw_write_wait(adev
);
834 "gfx9: Failed to load firmware \"%s\"\n",
836 release_firmware(adev
->gfx
.pfp_fw
);
837 adev
->gfx
.pfp_fw
= NULL
;
838 release_firmware(adev
->gfx
.me_fw
);
839 adev
->gfx
.me_fw
= NULL
;
840 release_firmware(adev
->gfx
.ce_fw
);
841 adev
->gfx
.ce_fw
= NULL
;
842 release_firmware(adev
->gfx
.rlc_fw
);
843 adev
->gfx
.rlc_fw
= NULL
;
844 release_firmware(adev
->gfx
.mec_fw
);
845 adev
->gfx
.mec_fw
= NULL
;
846 release_firmware(adev
->gfx
.mec2_fw
);
847 adev
->gfx
.mec2_fw
= NULL
;
852 static u32
gfx_v9_0_get_csb_size(struct amdgpu_device
*adev
)
855 const struct cs_section_def
*sect
= NULL
;
856 const struct cs_extent_def
*ext
= NULL
;
858 /* begin clear state */
860 /* context control state */
863 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
864 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
865 if (sect
->id
== SECT_CONTEXT
)
866 count
+= 2 + ext
->reg_count
;
872 /* end clear state */
880 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device
*adev
,
881 volatile u32
*buffer
)
884 const struct cs_section_def
*sect
= NULL
;
885 const struct cs_extent_def
*ext
= NULL
;
887 if (adev
->gfx
.rlc
.cs_data
== NULL
)
892 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
893 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
895 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
896 buffer
[count
++] = cpu_to_le32(0x80000000);
897 buffer
[count
++] = cpu_to_le32(0x80000000);
899 for (sect
= adev
->gfx
.rlc
.cs_data
; sect
->section
!= NULL
; ++sect
) {
900 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
901 if (sect
->id
== SECT_CONTEXT
) {
903 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, ext
->reg_count
));
904 buffer
[count
++] = cpu_to_le32(ext
->reg_index
-
905 PACKET3_SET_CONTEXT_REG_START
);
906 for (i
= 0; i
< ext
->reg_count
; i
++)
907 buffer
[count
++] = cpu_to_le32(ext
->extent
[i
]);
914 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
915 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE
);
917 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE
, 0));
918 buffer
[count
++] = cpu_to_le32(0);
921 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device
*adev
)
923 struct amdgpu_cu_info
*cu_info
= &adev
->gfx
.cu_info
;
924 uint32_t pg_always_on_cu_num
= 2;
925 uint32_t always_on_cu_num
;
927 uint32_t mask
, cu_bitmap
, counter
;
929 if (adev
->flags
& AMD_IS_APU
)
930 always_on_cu_num
= 4;
931 else if (adev
->asic_type
== CHIP_VEGA12
)
932 always_on_cu_num
= 8;
934 always_on_cu_num
= 12;
936 mutex_lock(&adev
->grbm_idx_mutex
);
937 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
938 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
942 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
944 for (k
= 0; k
< adev
->gfx
.config
.max_cu_per_sh
; k
++) {
945 if (cu_info
->bitmap
[i
][j
] & mask
) {
946 if (counter
== pg_always_on_cu_num
)
947 WREG32_SOC15(GC
, 0, mmRLC_PG_ALWAYS_ON_CU_MASK
, cu_bitmap
);
948 if (counter
< always_on_cu_num
)
957 WREG32_SOC15(GC
, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK
, cu_bitmap
);
958 cu_info
->ao_cu_bitmap
[i
][j
] = cu_bitmap
;
961 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
962 mutex_unlock(&adev
->grbm_idx_mutex
);
965 static void gfx_v9_0_init_lbpw(struct amdgpu_device
*adev
)
969 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
970 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_1
, 0x0000007F);
971 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_2
, 0x0333A5A7);
972 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_3
, 0x00000077);
973 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_4
, (0x30 | 0x40 << 8 | 0x02FA << 16));
975 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
976 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTR_INIT
, 0x00000000);
978 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
979 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTR_MAX
, 0x00000500);
981 mutex_lock(&adev
->grbm_idx_mutex
);
982 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
983 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
984 WREG32_SOC15(GC
, 0, mmRLC_LB_INIT_CU_MASK
, 0xffffffff);
986 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
987 data
= REG_SET_FIELD(0, RLC_LB_PARAMS
, FIFO_SAMPLES
, 0x0003);
988 data
|= REG_SET_FIELD(data
, RLC_LB_PARAMS
, PG_IDLE_SAMPLES
, 0x0010);
989 data
|= REG_SET_FIELD(data
, RLC_LB_PARAMS
, PG_IDLE_SAMPLE_INTERVAL
, 0x033F);
990 WREG32_SOC15(GC
, 0, mmRLC_LB_PARAMS
, data
);
992 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
993 data
= RREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_7
);
996 WREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_7
, data
);
999 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1000 * programmed in gfx_v9_0_init_always_on_cu_mask()
1003 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1004 * but used for RLC_LB_CNTL configuration */
1005 data
= RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK
;
1006 data
|= REG_SET_FIELD(data
, RLC_LB_CNTL
, CU_MASK_USED_OFF_HYST
, 0x09);
1007 data
|= REG_SET_FIELD(data
, RLC_LB_CNTL
, RESERVED
, 0x80000);
1008 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTL
, data
);
1009 mutex_unlock(&adev
->grbm_idx_mutex
);
1011 gfx_v9_0_init_always_on_cu_mask(adev
);
1014 static void gfx_v9_4_init_lbpw(struct amdgpu_device
*adev
)
1018 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1019 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_1
, 0x0000007F);
1020 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_2
, 0x033388F8);
1021 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_3
, 0x00000077);
1022 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_4
, (0x10 | 0x27 << 8 | 0x02FA << 16));
1024 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1025 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTR_INIT
, 0x00000000);
1027 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1028 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTR_MAX
, 0x00000800);
1030 mutex_lock(&adev
->grbm_idx_mutex
);
1031 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1032 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1033 WREG32_SOC15(GC
, 0, mmRLC_LB_INIT_CU_MASK
, 0xffffffff);
1035 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1036 data
= REG_SET_FIELD(0, RLC_LB_PARAMS
, FIFO_SAMPLES
, 0x0003);
1037 data
|= REG_SET_FIELD(data
, RLC_LB_PARAMS
, PG_IDLE_SAMPLES
, 0x0010);
1038 data
|= REG_SET_FIELD(data
, RLC_LB_PARAMS
, PG_IDLE_SAMPLE_INTERVAL
, 0x033F);
1039 WREG32_SOC15(GC
, 0, mmRLC_LB_PARAMS
, data
);
1041 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1042 data
= RREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_7
);
1045 WREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_7
, data
);
1048 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1049 * programmed in gfx_v9_0_init_always_on_cu_mask()
1052 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1053 * but used for RLC_LB_CNTL configuration */
1054 data
= RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK
;
1055 data
|= REG_SET_FIELD(data
, RLC_LB_CNTL
, CU_MASK_USED_OFF_HYST
, 0x09);
1056 data
|= REG_SET_FIELD(data
, RLC_LB_CNTL
, RESERVED
, 0x80000);
1057 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTL
, data
);
1058 mutex_unlock(&adev
->grbm_idx_mutex
);
1060 gfx_v9_0_init_always_on_cu_mask(adev
);
1063 static void gfx_v9_0_enable_lbpw(struct amdgpu_device
*adev
, bool enable
)
1065 WREG32_FIELD15(GC
, 0, RLC_LB_CNTL
, LOAD_BALANCE_ENABLE
, enable
? 1 : 0);
1068 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device
*adev
)
1073 static int gfx_v9_0_rlc_init(struct amdgpu_device
*adev
)
1075 const struct cs_section_def
*cs_data
;
1078 adev
->gfx
.rlc
.cs_data
= gfx9_cs_data
;
1080 cs_data
= adev
->gfx
.rlc
.cs_data
;
1083 /* init clear state block */
1084 r
= amdgpu_gfx_rlc_init_csb(adev
);
1089 if (adev
->asic_type
== CHIP_RAVEN
) {
1090 /* TODO: double check the cp_table_size for RV */
1091 adev
->gfx
.rlc
.cp_table_size
= ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1092 r
= amdgpu_gfx_rlc_init_cpt(adev
);
1097 switch (adev
->asic_type
) {
1099 gfx_v9_0_init_lbpw(adev
);
1102 gfx_v9_4_init_lbpw(adev
);
1111 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device
*adev
)
1115 r
= amdgpu_bo_reserve(adev
->gfx
.rlc
.clear_state_obj
, false);
1116 if (unlikely(r
!= 0))
1119 r
= amdgpu_bo_pin(adev
->gfx
.rlc
.clear_state_obj
,
1120 AMDGPU_GEM_DOMAIN_VRAM
);
1122 adev
->gfx
.rlc
.clear_state_gpu_addr
=
1123 amdgpu_bo_gpu_offset(adev
->gfx
.rlc
.clear_state_obj
);
1125 amdgpu_bo_unreserve(adev
->gfx
.rlc
.clear_state_obj
);
1130 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device
*adev
)
1134 if (!adev
->gfx
.rlc
.clear_state_obj
)
1137 r
= amdgpu_bo_reserve(adev
->gfx
.rlc
.clear_state_obj
, true);
1138 if (likely(r
== 0)) {
1139 amdgpu_bo_unpin(adev
->gfx
.rlc
.clear_state_obj
);
1140 amdgpu_bo_unreserve(adev
->gfx
.rlc
.clear_state_obj
);
1144 static void gfx_v9_0_mec_fini(struct amdgpu_device
*adev
)
1146 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.hpd_eop_obj
, NULL
, NULL
);
1147 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.mec_fw_obj
, NULL
, NULL
);
1150 static int gfx_v9_0_mec_init(struct amdgpu_device
*adev
)
1154 const __le32
*fw_data
;
1157 size_t mec_hpd_size
;
1159 const struct gfx_firmware_header_v1_0
*mec_hdr
;
1161 bitmap_zero(adev
->gfx
.mec
.queue_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
);
1163 /* take ownership of the relevant compute queues */
1164 amdgpu_gfx_compute_queue_acquire(adev
);
1165 mec_hpd_size
= adev
->gfx
.num_compute_rings
* GFX9_MEC_HPD_SIZE
;
1167 r
= amdgpu_bo_create_reserved(adev
, mec_hpd_size
, PAGE_SIZE
,
1168 AMDGPU_GEM_DOMAIN_VRAM
,
1169 &adev
->gfx
.mec
.hpd_eop_obj
,
1170 &adev
->gfx
.mec
.hpd_eop_gpu_addr
,
1173 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
1174 gfx_v9_0_mec_fini(adev
);
1178 memset(hpd
, 0, adev
->gfx
.mec
.hpd_eop_obj
->tbo
.mem
.size
);
1180 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
1181 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
1183 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1185 fw_data
= (const __le32
*)
1186 (adev
->gfx
.mec_fw
->data
+
1187 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
1188 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
) / 4;
1190 r
= amdgpu_bo_create_reserved(adev
, mec_hdr
->header
.ucode_size_bytes
,
1191 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
1192 &adev
->gfx
.mec
.mec_fw_obj
,
1193 &adev
->gfx
.mec
.mec_fw_gpu_addr
,
1196 dev_warn(adev
->dev
, "(%d) create mec firmware bo failed\n", r
);
1197 gfx_v9_0_mec_fini(adev
);
1201 memcpy(fw
, fw_data
, fw_size
);
1203 amdgpu_bo_kunmap(adev
->gfx
.mec
.mec_fw_obj
);
1204 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
1209 static uint32_t wave_read_ind(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t address
)
1211 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1212 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1213 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
1214 (address
<< SQ_IND_INDEX__INDEX__SHIFT
) |
1215 (SQ_IND_INDEX__FORCE_READ_MASK
));
1216 return RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1219 static void wave_read_regs(struct amdgpu_device
*adev
, uint32_t simd
,
1220 uint32_t wave
, uint32_t thread
,
1221 uint32_t regno
, uint32_t num
, uint32_t *out
)
1223 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1224 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1225 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
1226 (regno
<< SQ_IND_INDEX__INDEX__SHIFT
) |
1227 (thread
<< SQ_IND_INDEX__THREAD_ID__SHIFT
) |
1228 (SQ_IND_INDEX__FORCE_READ_MASK
) |
1229 (SQ_IND_INDEX__AUTO_INCR_MASK
));
1231 *(out
++) = RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1234 static void gfx_v9_0_read_wave_data(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t *dst
, int *no_fields
)
1236 /* type 1 wave data */
1237 dst
[(*no_fields
)++] = 1;
1238 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_STATUS
);
1239 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_LO
);
1240 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_HI
);
1241 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_LO
);
1242 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_HI
);
1243 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_HW_ID
);
1244 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW0
);
1245 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW1
);
1246 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_GPR_ALLOC
);
1247 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_LDS_ALLOC
);
1248 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TRAPSTS
);
1249 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_STS
);
1250 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_DBG0
);
1251 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_M0
);
1254 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1255 uint32_t wave
, uint32_t start
,
1256 uint32_t size
, uint32_t *dst
)
1259 adev
, simd
, wave
, 0,
1260 start
+ SQIND_WAVE_SGPRS_OFFSET
, size
, dst
);
1263 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1264 uint32_t wave
, uint32_t thread
,
1265 uint32_t start
, uint32_t size
,
1269 adev
, simd
, wave
, thread
,
1270 start
+ SQIND_WAVE_VGPRS_OFFSET
, size
, dst
);
1273 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device
*adev
,
1274 u32 me
, u32 pipe
, u32 q
)
1276 soc15_grbm_select(adev
, me
, pipe
, q
, 0);
1279 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs
= {
1280 .get_gpu_clock_counter
= &gfx_v9_0_get_gpu_clock_counter
,
1281 .select_se_sh
= &gfx_v9_0_select_se_sh
,
1282 .read_wave_data
= &gfx_v9_0_read_wave_data
,
1283 .read_wave_sgprs
= &gfx_v9_0_read_wave_sgprs
,
1284 .read_wave_vgprs
= &gfx_v9_0_read_wave_vgprs
,
1285 .select_me_pipe_q
= &gfx_v9_0_select_me_pipe_q
1288 static int gfx_v9_0_gpu_early_init(struct amdgpu_device
*adev
)
1293 adev
->gfx
.funcs
= &gfx_v9_0_gfx_funcs
;
1295 switch (adev
->asic_type
) {
1297 adev
->gfx
.config
.max_hw_contexts
= 8;
1298 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1299 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1300 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1301 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1302 gb_addr_config
= VEGA10_GB_ADDR_CONFIG_GOLDEN
;
1305 adev
->gfx
.config
.max_hw_contexts
= 8;
1306 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1307 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1308 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1309 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1310 gb_addr_config
= VEGA12_GB_ADDR_CONFIG_GOLDEN
;
1311 DRM_INFO("fix gfx.config for vega12\n");
1314 adev
->gfx
.config
.max_hw_contexts
= 8;
1315 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1316 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1317 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1318 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1319 gb_addr_config
= RREG32_SOC15(GC
, 0, mmGB_ADDR_CONFIG
);
1320 gb_addr_config
&= ~0xf3e777ff;
1321 gb_addr_config
|= 0x22014042;
1322 /* check vbios table if gpu info is not available */
1323 err
= amdgpu_atomfirmware_get_gfx_info(adev
);
1328 adev
->gfx
.config
.max_hw_contexts
= 8;
1329 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1330 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1331 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1332 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1333 if (adev
->rev_id
>= 8)
1334 gb_addr_config
= RAVEN2_GB_ADDR_CONFIG_GOLDEN
;
1336 gb_addr_config
= RAVEN_GB_ADDR_CONFIG_GOLDEN
;
1343 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
1345 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
= 1 <<
1347 adev
->gfx
.config
.gb_addr_config
,
1351 adev
->gfx
.config
.max_tile_pipes
=
1352 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
;
1354 adev
->gfx
.config
.gb_addr_config_fields
.num_banks
= 1 <<
1356 adev
->gfx
.config
.gb_addr_config
,
1359 adev
->gfx
.config
.gb_addr_config_fields
.max_compress_frags
= 1 <<
1361 adev
->gfx
.config
.gb_addr_config
,
1363 MAX_COMPRESSED_FRAGS
);
1364 adev
->gfx
.config
.gb_addr_config_fields
.num_rb_per_se
= 1 <<
1366 adev
->gfx
.config
.gb_addr_config
,
1369 adev
->gfx
.config
.gb_addr_config_fields
.num_se
= 1 <<
1371 adev
->gfx
.config
.gb_addr_config
,
1373 NUM_SHADER_ENGINES
);
1374 adev
->gfx
.config
.gb_addr_config_fields
.pipe_interleave_size
= 1 << (8 +
1376 adev
->gfx
.config
.gb_addr_config
,
1378 PIPE_INTERLEAVE_SIZE
));
1383 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device
*adev
,
1384 struct amdgpu_ngg_buf
*ngg_buf
,
1386 int default_size_se
)
1391 dev_err(adev
->dev
, "Buffer size is invalid: %d\n", size_se
);
1394 size_se
= size_se
? size_se
: default_size_se
;
1396 ngg_buf
->size
= size_se
* adev
->gfx
.config
.max_shader_engines
;
1397 r
= amdgpu_bo_create_kernel(adev
, ngg_buf
->size
,
1398 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_VRAM
,
1403 dev_err(adev
->dev
, "(%d) failed to create NGG buffer\n", r
);
1406 ngg_buf
->bo_size
= amdgpu_bo_size(ngg_buf
->bo
);
1411 static int gfx_v9_0_ngg_fini(struct amdgpu_device
*adev
)
1415 for (i
= 0; i
< NGG_BUF_MAX
; i
++)
1416 amdgpu_bo_free_kernel(&adev
->gfx
.ngg
.buf
[i
].bo
,
1417 &adev
->gfx
.ngg
.buf
[i
].gpu_addr
,
1420 memset(&adev
->gfx
.ngg
.buf
[0], 0,
1421 sizeof(struct amdgpu_ngg_buf
) * NGG_BUF_MAX
);
1423 adev
->gfx
.ngg
.init
= false;
1428 static int gfx_v9_0_ngg_init(struct amdgpu_device
*adev
)
1432 if (!amdgpu_ngg
|| adev
->gfx
.ngg
.init
== true)
1435 /* GDS reserve memory: 64 bytes alignment */
1436 adev
->gfx
.ngg
.gds_reserve_size
= ALIGN(5 * 4, 0x40);
1437 adev
->gds
.mem
.total_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
1438 adev
->gds
.mem
.gfx_partition_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
1439 adev
->gfx
.ngg
.gds_reserve_addr
= RREG32_SOC15(GC
, 0, mmGDS_VMID0_BASE
);
1440 adev
->gfx
.ngg
.gds_reserve_addr
+= RREG32_SOC15(GC
, 0, mmGDS_VMID0_SIZE
);
1442 /* Primitive Buffer */
1443 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_PRIM
],
1444 amdgpu_prim_buf_per_se
,
1447 dev_err(adev
->dev
, "Failed to create Primitive Buffer\n");
1451 /* Position Buffer */
1452 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_POS
],
1453 amdgpu_pos_buf_per_se
,
1456 dev_err(adev
->dev
, "Failed to create Position Buffer\n");
1460 /* Control Sideband */
1461 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_CNTL
],
1462 amdgpu_cntl_sb_buf_per_se
,
1465 dev_err(adev
->dev
, "Failed to create Control Sideband Buffer\n");
1469 /* Parameter Cache, not created by default */
1470 if (amdgpu_param_buf_per_se
<= 0)
1473 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_PARAM
],
1474 amdgpu_param_buf_per_se
,
1477 dev_err(adev
->dev
, "Failed to create Parameter Cache\n");
1482 adev
->gfx
.ngg
.init
= true;
1485 gfx_v9_0_ngg_fini(adev
);
1489 static int gfx_v9_0_ngg_en(struct amdgpu_device
*adev
)
1491 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
1498 /* Program buffer size */
1499 data
= REG_SET_FIELD(0, WD_BUF_RESOURCE_1
, INDEX_BUF_SIZE
,
1500 adev
->gfx
.ngg
.buf
[NGG_PRIM
].size
>> 8);
1501 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_1
, POS_BUF_SIZE
,
1502 adev
->gfx
.ngg
.buf
[NGG_POS
].size
>> 8);
1503 WREG32_SOC15(GC
, 0, mmWD_BUF_RESOURCE_1
, data
);
1505 data
= REG_SET_FIELD(0, WD_BUF_RESOURCE_2
, CNTL_SB_BUF_SIZE
,
1506 adev
->gfx
.ngg
.buf
[NGG_CNTL
].size
>> 8);
1507 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_2
, PARAM_BUF_SIZE
,
1508 adev
->gfx
.ngg
.buf
[NGG_PARAM
].size
>> 10);
1509 WREG32_SOC15(GC
, 0, mmWD_BUF_RESOURCE_2
, data
);
1511 /* Program buffer base address */
1512 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_PRIM
].gpu_addr
);
1513 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE
, BASE
, base
);
1514 WREG32_SOC15(GC
, 0, mmWD_INDEX_BUF_BASE
, data
);
1516 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_PRIM
].gpu_addr
);
1517 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI
, BASE_HI
, base
);
1518 WREG32_SOC15(GC
, 0, mmWD_INDEX_BUF_BASE_HI
, data
);
1520 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_POS
].gpu_addr
);
1521 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE
, BASE
, base
);
1522 WREG32_SOC15(GC
, 0, mmWD_POS_BUF_BASE
, data
);
1524 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_POS
].gpu_addr
);
1525 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE_HI
, BASE_HI
, base
);
1526 WREG32_SOC15(GC
, 0, mmWD_POS_BUF_BASE_HI
, data
);
1528 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_CNTL
].gpu_addr
);
1529 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE
, BASE
, base
);
1530 WREG32_SOC15(GC
, 0, mmWD_CNTL_SB_BUF_BASE
, data
);
1532 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_CNTL
].gpu_addr
);
1533 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI
, BASE_HI
, base
);
1534 WREG32_SOC15(GC
, 0, mmWD_CNTL_SB_BUF_BASE_HI
, data
);
1536 /* Clear GDS reserved memory */
1537 r
= amdgpu_ring_alloc(ring
, 17);
1539 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1544 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1545 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
),
1546 (adev
->gds
.mem
.total_size
+
1547 adev
->gfx
.ngg
.gds_reserve_size
));
1549 amdgpu_ring_write(ring
, PACKET3(PACKET3_DMA_DATA
, 5));
1550 amdgpu_ring_write(ring
, (PACKET3_DMA_DATA_CP_SYNC
|
1551 PACKET3_DMA_DATA_DST_SEL(1) |
1552 PACKET3_DMA_DATA_SRC_SEL(2)));
1553 amdgpu_ring_write(ring
, 0);
1554 amdgpu_ring_write(ring
, 0);
1555 amdgpu_ring_write(ring
, adev
->gfx
.ngg
.gds_reserve_addr
);
1556 amdgpu_ring_write(ring
, 0);
1557 amdgpu_ring_write(ring
, PACKET3_DMA_DATA_CMD_RAW_WAIT
|
1558 adev
->gfx
.ngg
.gds_reserve_size
);
1560 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1561 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
), 0);
1563 amdgpu_ring_commit(ring
);
1568 static int gfx_v9_0_compute_ring_init(struct amdgpu_device
*adev
, int ring_id
,
1569 int mec
, int pipe
, int queue
)
1573 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[ring_id
];
1575 ring
= &adev
->gfx
.compute_ring
[ring_id
];
1580 ring
->queue
= queue
;
1582 ring
->ring_obj
= NULL
;
1583 ring
->use_doorbell
= true;
1584 ring
->doorbell_index
= (adev
->doorbell_index
.mec_ring0
+ ring_id
) << 1;
1585 ring
->eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
1586 + (ring_id
* GFX9_MEC_HPD_SIZE
);
1587 sprintf(ring
->name
, "comp_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1589 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1590 + ((ring
->me
- 1) * adev
->gfx
.mec
.num_pipe_per_mec
)
1593 /* type-2 packets are deprecated on MEC, use type-3 instead */
1594 r
= amdgpu_ring_init(adev
, ring
, 1024,
1595 &adev
->gfx
.eop_irq
, irq_type
);
1603 static int gfx_v9_0_sw_init(void *handle
)
1605 int i
, j
, k
, r
, ring_id
;
1606 struct amdgpu_ring
*ring
;
1607 struct amdgpu_kiq
*kiq
;
1608 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1610 switch (adev
->asic_type
) {
1615 adev
->gfx
.mec
.num_mec
= 2;
1618 adev
->gfx
.mec
.num_mec
= 1;
1622 adev
->gfx
.mec
.num_pipe_per_mec
= 4;
1623 adev
->gfx
.mec
.num_queue_per_pipe
= 8;
1626 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_9_0__SRCID__CP_EOP_INTERRUPT
, &adev
->gfx
.eop_irq
);
1630 /* Privileged reg */
1631 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_9_0__SRCID__CP_PRIV_REG_FAULT
,
1632 &adev
->gfx
.priv_reg_irq
);
1636 /* Privileged inst */
1637 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT
,
1638 &adev
->gfx
.priv_inst_irq
);
1642 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
1644 gfx_v9_0_scratch_init(adev
);
1646 r
= gfx_v9_0_init_microcode(adev
);
1648 DRM_ERROR("Failed to load gfx firmware!\n");
1652 r
= adev
->gfx
.rlc
.funcs
->init(adev
);
1654 DRM_ERROR("Failed to init rlc BOs!\n");
1658 r
= gfx_v9_0_mec_init(adev
);
1660 DRM_ERROR("Failed to init MEC BOs!\n");
1664 /* set up the gfx ring */
1665 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
1666 ring
= &adev
->gfx
.gfx_ring
[i
];
1667 ring
->ring_obj
= NULL
;
1669 sprintf(ring
->name
, "gfx");
1671 sprintf(ring
->name
, "gfx_%d", i
);
1672 ring
->use_doorbell
= true;
1673 ring
->doorbell_index
= adev
->doorbell_index
.gfx_ring0
<< 1;
1674 r
= amdgpu_ring_init(adev
, ring
, 1024,
1675 &adev
->gfx
.eop_irq
, AMDGPU_CP_IRQ_GFX_EOP
);
1680 /* set up the compute queues - allocate horizontally across pipes */
1682 for (i
= 0; i
< adev
->gfx
.mec
.num_mec
; ++i
) {
1683 for (j
= 0; j
< adev
->gfx
.mec
.num_queue_per_pipe
; j
++) {
1684 for (k
= 0; k
< adev
->gfx
.mec
.num_pipe_per_mec
; k
++) {
1685 if (!amdgpu_gfx_is_mec_queue_enabled(adev
, i
, k
, j
))
1688 r
= gfx_v9_0_compute_ring_init(adev
,
1699 r
= amdgpu_gfx_kiq_init(adev
, GFX9_MEC_HPD_SIZE
);
1701 DRM_ERROR("Failed to init KIQ BOs!\n");
1705 kiq
= &adev
->gfx
.kiq
;
1706 r
= amdgpu_gfx_kiq_init_ring(adev
, &kiq
->ring
, &kiq
->irq
);
1710 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1711 r
= amdgpu_gfx_compute_mqd_sw_init(adev
, sizeof(struct v9_mqd_allocation
));
1715 adev
->gfx
.ce_ram_size
= 0x8000;
1717 r
= gfx_v9_0_gpu_early_init(adev
);
1721 r
= gfx_v9_0_ngg_init(adev
);
1729 static int gfx_v9_0_sw_fini(void *handle
)
1732 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1734 amdgpu_bo_free_kernel(&adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
1735 amdgpu_bo_free_kernel(&adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
1736 amdgpu_bo_free_kernel(&adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
1738 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1739 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
1740 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1741 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
1743 amdgpu_gfx_compute_mqd_sw_fini(adev
);
1744 amdgpu_gfx_kiq_free_ring(&adev
->gfx
.kiq
.ring
, &adev
->gfx
.kiq
.irq
);
1745 amdgpu_gfx_kiq_fini(adev
);
1747 gfx_v9_0_mec_fini(adev
);
1748 gfx_v9_0_ngg_fini(adev
);
1749 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.clear_state_obj
,
1750 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
1751 (void **)&adev
->gfx
.rlc
.cs_ptr
);
1752 if (adev
->asic_type
== CHIP_RAVEN
) {
1753 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.cp_table_obj
,
1754 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
1755 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
1757 gfx_v9_0_free_microcode(adev
);
1763 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
1768 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
)
1772 if (instance
== 0xffffffff)
1773 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
, 1);
1775 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_INDEX
, instance
);
1777 if (se_num
== 0xffffffff)
1778 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1780 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1782 if (sh_num
== 0xffffffff)
1783 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1785 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1787 WREG32_SOC15(GC
, 0, mmGRBM_GFX_INDEX
, data
);
1790 static u32
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device
*adev
)
1794 data
= RREG32_SOC15(GC
, 0, mmCC_RB_BACKEND_DISABLE
);
1795 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_RB_BACKEND_DISABLE
);
1797 data
&= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK
;
1798 data
>>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT
;
1800 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_backends_per_se
/
1801 adev
->gfx
.config
.max_sh_per_se
);
1803 return (~data
) & mask
;
1806 static void gfx_v9_0_setup_rb(struct amdgpu_device
*adev
)
1811 u32 rb_bitmap_width_per_sh
= adev
->gfx
.config
.max_backends_per_se
/
1812 adev
->gfx
.config
.max_sh_per_se
;
1814 mutex_lock(&adev
->grbm_idx_mutex
);
1815 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1816 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1817 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1818 data
= gfx_v9_0_get_rb_active_bitmap(adev
);
1819 active_rbs
|= data
<< ((i
* adev
->gfx
.config
.max_sh_per_se
+ j
) *
1820 rb_bitmap_width_per_sh
);
1823 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1824 mutex_unlock(&adev
->grbm_idx_mutex
);
1826 adev
->gfx
.config
.backend_enable_mask
= active_rbs
;
1827 adev
->gfx
.config
.num_rbs
= hweight32(active_rbs
);
1830 #define DEFAULT_SH_MEM_BASES (0x6000)
1831 #define FIRST_COMPUTE_VMID (8)
1832 #define LAST_COMPUTE_VMID (16)
1833 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device
*adev
)
1836 uint32_t sh_mem_config
;
1837 uint32_t sh_mem_bases
;
1840 * Configure apertures:
1841 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1842 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1843 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1845 sh_mem_bases
= DEFAULT_SH_MEM_BASES
| (DEFAULT_SH_MEM_BASES
<< 16);
1847 sh_mem_config
= SH_MEM_ADDRESS_MODE_64
|
1848 SH_MEM_ALIGNMENT_MODE_UNALIGNED
<<
1849 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT
;
1851 mutex_lock(&adev
->srbm_mutex
);
1852 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1853 soc15_grbm_select(adev
, 0, 0, 0, i
);
1854 /* CP and shaders */
1855 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, sh_mem_config
);
1856 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, sh_mem_bases
);
1858 soc15_grbm_select(adev
, 0, 0, 0, 0);
1859 mutex_unlock(&adev
->srbm_mutex
);
1862 static void gfx_v9_0_constants_init(struct amdgpu_device
*adev
)
1867 WREG32_FIELD15(GC
, 0, GRBM_CNTL
, READ_TIMEOUT
, 0xff);
1869 gfx_v9_0_tiling_mode_table_init(adev
);
1871 gfx_v9_0_setup_rb(adev
);
1872 gfx_v9_0_get_cu_info(adev
, &adev
->gfx
.cu_info
);
1873 adev
->gfx
.config
.db_debug2
= RREG32_SOC15(GC
, 0, mmDB_DEBUG2
);
1875 /* XXX SH_MEM regs */
1876 /* where to put LDS, scratch, GPUVM in FSA64 space */
1877 mutex_lock(&adev
->srbm_mutex
);
1878 for (i
= 0; i
< adev
->vm_manager
.id_mgr
[AMDGPU_GFXHUB
].num_ids
; i
++) {
1879 soc15_grbm_select(adev
, 0, 0, 0, i
);
1880 /* CP and shaders */
1882 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
1883 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
1884 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, tmp
);
1885 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, 0);
1887 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
1888 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
1889 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, tmp
);
1890 tmp
= REG_SET_FIELD(0, SH_MEM_BASES
, PRIVATE_BASE
,
1891 (adev
->gmc
.private_aperture_start
>> 48));
1892 tmp
= REG_SET_FIELD(tmp
, SH_MEM_BASES
, SHARED_BASE
,
1893 (adev
->gmc
.shared_aperture_start
>> 48));
1894 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, tmp
);
1897 soc15_grbm_select(adev
, 0, 0, 0, 0);
1899 mutex_unlock(&adev
->srbm_mutex
);
1901 gfx_v9_0_init_compute_vmid(adev
);
1903 mutex_lock(&adev
->grbm_idx_mutex
);
1905 * making sure that the following register writes will be broadcasted
1906 * to all the shaders
1908 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1910 WREG32_SOC15(GC
, 0, mmPA_SC_FIFO_SIZE
,
1911 (adev
->gfx
.config
.sc_prim_fifo_size_frontend
<<
1912 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT
) |
1913 (adev
->gfx
.config
.sc_prim_fifo_size_backend
<<
1914 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT
) |
1915 (adev
->gfx
.config
.sc_hiz_tile_fifo_size
<<
1916 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT
) |
1917 (adev
->gfx
.config
.sc_earlyz_tile_fifo_size
<<
1918 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT
));
1919 mutex_unlock(&adev
->grbm_idx_mutex
);
1923 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device
*adev
)
1928 mutex_lock(&adev
->grbm_idx_mutex
);
1929 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1930 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1931 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1932 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1933 if (RREG32_SOC15(GC
, 0, mmRLC_SERDES_CU_MASTER_BUSY
) == 0)
1937 if (k
== adev
->usec_timeout
) {
1938 gfx_v9_0_select_se_sh(adev
, 0xffffffff,
1939 0xffffffff, 0xffffffff);
1940 mutex_unlock(&adev
->grbm_idx_mutex
);
1941 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1947 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1948 mutex_unlock(&adev
->grbm_idx_mutex
);
1950 mask
= RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK
|
1951 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK
|
1952 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK
|
1953 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK
;
1954 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1955 if ((RREG32_SOC15(GC
, 0, mmRLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
1961 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
1964 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
);
1966 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
, enable
? 1 : 0);
1967 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
, enable
? 1 : 0);
1968 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
, enable
? 1 : 0);
1969 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
, enable
? 1 : 0);
1971 WREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
, tmp
);
1974 static void gfx_v9_0_init_csb(struct amdgpu_device
*adev
)
1977 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CSIB_ADDR_HI
),
1978 adev
->gfx
.rlc
.clear_state_gpu_addr
>> 32);
1979 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CSIB_ADDR_LO
),
1980 adev
->gfx
.rlc
.clear_state_gpu_addr
& 0xfffffffc);
1981 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CSIB_LENGTH
),
1982 adev
->gfx
.rlc
.clear_state_size
);
1985 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format
,
1986 int indirect_offset
,
1988 int *unique_indirect_regs
,
1989 int unique_indirect_reg_count
,
1990 int *indirect_start_offsets
,
1991 int *indirect_start_offsets_count
,
1992 int max_start_offsets_count
)
1996 for (; indirect_offset
< list_size
; indirect_offset
++) {
1997 WARN_ON(*indirect_start_offsets_count
>= max_start_offsets_count
);
1998 indirect_start_offsets
[*indirect_start_offsets_count
] = indirect_offset
;
1999 *indirect_start_offsets_count
= *indirect_start_offsets_count
+ 1;
2001 while (register_list_format
[indirect_offset
] != 0xFFFFFFFF) {
2002 indirect_offset
+= 2;
2004 /* look for the matching indice */
2005 for (idx
= 0; idx
< unique_indirect_reg_count
; idx
++) {
2006 if (unique_indirect_regs
[idx
] ==
2007 register_list_format
[indirect_offset
] ||
2008 !unique_indirect_regs
[idx
])
2012 BUG_ON(idx
>= unique_indirect_reg_count
);
2014 if (!unique_indirect_regs
[idx
])
2015 unique_indirect_regs
[idx
] = register_list_format
[indirect_offset
];
2022 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device
*adev
)
2024 int unique_indirect_regs
[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2025 int unique_indirect_reg_count
= 0;
2027 int indirect_start_offsets
[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2028 int indirect_start_offsets_count
= 0;
2034 u32
*register_list_format
=
2035 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
, GFP_KERNEL
);
2036 if (!register_list_format
)
2038 memcpy(register_list_format
, adev
->gfx
.rlc
.register_list_format
,
2039 adev
->gfx
.rlc
.reg_list_format_size_bytes
);
2041 /* setup unique_indirect_regs array and indirect_start_offsets array */
2042 unique_indirect_reg_count
= ARRAY_SIZE(unique_indirect_regs
);
2043 gfx_v9_1_parse_ind_reg_list(register_list_format
,
2044 adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
,
2045 adev
->gfx
.rlc
.reg_list_format_size_bytes
>> 2,
2046 unique_indirect_regs
,
2047 unique_indirect_reg_count
,
2048 indirect_start_offsets
,
2049 &indirect_start_offsets_count
,
2050 ARRAY_SIZE(indirect_start_offsets
));
2052 /* enable auto inc in case it is disabled */
2053 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
));
2054 tmp
|= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK
;
2055 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
), tmp
);
2057 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2058 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_ARAM_ADDR
),
2059 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET
);
2060 for (i
= 0; i
< adev
->gfx
.rlc
.reg_list_size_bytes
>> 2; i
++)
2061 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_ARAM_DATA
),
2062 adev
->gfx
.rlc
.register_restore
[i
]);
2064 /* load indirect register */
2065 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_ADDR
),
2066 adev
->gfx
.rlc
.reg_list_format_start
);
2068 /* direct register portion */
2069 for (i
= 0; i
< adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
; i
++)
2070 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_DATA
),
2071 register_list_format
[i
]);
2073 /* indirect register portion */
2074 while (i
< (adev
->gfx
.rlc
.reg_list_format_size_bytes
>> 2)) {
2075 if (register_list_format
[i
] == 0xFFFFFFFF) {
2076 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, register_list_format
[i
++]);
2080 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, register_list_format
[i
++]);
2081 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, register_list_format
[i
++]);
2083 for (j
= 0; j
< unique_indirect_reg_count
; j
++) {
2084 if (register_list_format
[i
] == unique_indirect_regs
[j
]) {
2085 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, j
);
2090 BUG_ON(j
>= unique_indirect_reg_count
);
2095 /* set save/restore list size */
2096 list_size
= adev
->gfx
.rlc
.reg_list_size_bytes
>> 2;
2097 list_size
= list_size
>> 1;
2098 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_ADDR
),
2099 adev
->gfx
.rlc
.reg_restore_list_size
);
2100 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_DATA
), list_size
);
2102 /* write the starting offsets to RLC scratch ram */
2103 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_ADDR
),
2104 adev
->gfx
.rlc
.starting_offsets_start
);
2105 for (i
= 0; i
< ARRAY_SIZE(indirect_start_offsets
); i
++)
2106 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_DATA
),
2107 indirect_start_offsets
[i
]);
2109 /* load unique indirect regs*/
2110 for (i
= 0; i
< ARRAY_SIZE(unique_indirect_regs
); i
++) {
2111 if (unique_indirect_regs
[i
] != 0) {
2112 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0
)
2113 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS
[i
],
2114 unique_indirect_regs
[i
] & 0x3FFFF);
2116 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_INDEX_CNTL_DATA_0
)
2117 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS
[i
],
2118 unique_indirect_regs
[i
] >> 20);
2122 kfree(register_list_format
);
2126 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device
*adev
)
2128 WREG32_FIELD15(GC
, 0, RLC_SRM_CNTL
, SRM_ENABLE
, 1);
2131 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device
*adev
,
2135 uint32_t default_data
= 0;
2137 default_data
= data
= RREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
));
2138 if (enable
== true) {
2139 /* enable GFXIP control over CGPG */
2140 data
|= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK
;
2141 if(default_data
!= data
)
2142 WREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
), data
);
2145 data
&= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK
;
2146 data
|= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT
);
2147 if(default_data
!= data
)
2148 WREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
), data
);
2150 /* restore GFXIP control over GCPG */
2151 data
&= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK
;
2152 if(default_data
!= data
)
2153 WREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
), data
);
2157 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device
*adev
)
2161 if (adev
->pg_flags
& (AMD_PG_SUPPORT_GFX_PG
|
2162 AMD_PG_SUPPORT_GFX_SMG
|
2163 AMD_PG_SUPPORT_GFX_DMG
)) {
2164 /* init IDLE_POLL_COUNT = 60 */
2165 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
));
2166 data
&= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK
;
2167 data
|= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
2168 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
), data
);
2170 /* init RLC PG Delay */
2172 data
|= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT
);
2173 data
|= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT
);
2174 data
|= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT
);
2175 data
|= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT
);
2176 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY
), data
);
2178 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_2
));
2179 data
&= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK
;
2180 data
|= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT
);
2181 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_2
), data
);
2183 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_3
));
2184 data
&= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK
;
2185 data
|= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT
);
2186 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_3
), data
);
2188 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_AUTO_PG_CTRL
));
2189 data
&= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK
;
2191 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2192 data
|= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT
);
2193 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_AUTO_PG_CTRL
), data
);
2195 pwr_10_0_gfxip_control_over_cgpg(adev
, true);
2199 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device
*adev
,
2203 uint32_t default_data
= 0;
2205 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2206 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2207 SMU_CLK_SLOWDOWN_ON_PU_ENABLE
,
2209 if (default_data
!= data
)
2210 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2213 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device
*adev
,
2217 uint32_t default_data
= 0;
2219 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2220 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2221 SMU_CLK_SLOWDOWN_ON_PD_ENABLE
,
2223 if(default_data
!= data
)
2224 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2227 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device
*adev
,
2231 uint32_t default_data
= 0;
2233 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2234 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2237 if(default_data
!= data
)
2238 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2241 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device
*adev
,
2244 uint32_t data
, default_data
;
2246 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2247 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2248 GFX_POWER_GATING_ENABLE
,
2250 if(default_data
!= data
)
2251 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2254 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device
*adev
,
2257 uint32_t data
, default_data
;
2259 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2260 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2261 GFX_PIPELINE_PG_ENABLE
,
2263 if(default_data
!= data
)
2264 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2267 /* read any GFX register to wake up GFX */
2268 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmDB_RENDER_CONTROL
));
2271 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device
*adev
,
2274 uint32_t data
, default_data
;
2276 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2277 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2278 STATIC_PER_CU_PG_ENABLE
,
2280 if(default_data
!= data
)
2281 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2284 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device
*adev
,
2287 uint32_t data
, default_data
;
2289 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2290 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2291 DYN_PER_CU_PG_ENABLE
,
2293 if(default_data
!= data
)
2294 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2297 static void gfx_v9_0_init_pg(struct amdgpu_device
*adev
)
2299 gfx_v9_0_init_csb(adev
);
2302 * Rlc save restore list is workable since v2_1.
2303 * And it's needed by gfxoff feature.
2305 if (adev
->gfx
.rlc
.is_rlc_v2_1
) {
2306 gfx_v9_1_init_rlc_save_restore_list(adev
);
2307 gfx_v9_0_enable_save_restore_machine(adev
);
2310 if (adev
->pg_flags
& (AMD_PG_SUPPORT_GFX_PG
|
2311 AMD_PG_SUPPORT_GFX_SMG
|
2312 AMD_PG_SUPPORT_GFX_DMG
|
2314 AMD_PG_SUPPORT_GDS
|
2315 AMD_PG_SUPPORT_RLC_SMU_HS
)) {
2316 WREG32(mmRLC_JUMP_TABLE_RESTORE
,
2317 adev
->gfx
.rlc
.cp_table_gpu_addr
>> 8);
2318 gfx_v9_0_init_gfx_power_gating(adev
);
2322 void gfx_v9_0_rlc_stop(struct amdgpu_device
*adev
)
2324 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 0);
2325 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
2326 gfx_v9_0_wait_for_rlc_serdes(adev
);
2329 static void gfx_v9_0_rlc_reset(struct amdgpu_device
*adev
)
2331 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
2333 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
2337 static void gfx_v9_0_rlc_start(struct amdgpu_device
*adev
)
2339 #ifdef AMDGPU_RLC_DEBUG_RETRY
2343 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 1);
2346 /* carrizo do enable cp interrupt after cp inited */
2347 if (!(adev
->flags
& AMD_IS_APU
)) {
2348 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
2352 #ifdef AMDGPU_RLC_DEBUG_RETRY
2353 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2354 rlc_ucode_ver
= RREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_6
);
2355 if(rlc_ucode_ver
== 0x108) {
2356 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2357 rlc_ucode_ver
, adev
->gfx
.rlc_fw_version
);
2358 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2359 * default is 0x9C4 to create a 100us interval */
2360 WREG32_SOC15(GC
, 0, mmRLC_GPM_TIMER_INT_3
, 0x9C4);
2361 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2362 * to disable the page fault retry interrupts, default is
2364 WREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_12
, 0x100);
2369 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device
*adev
)
2371 const struct rlc_firmware_header_v2_0
*hdr
;
2372 const __le32
*fw_data
;
2373 unsigned i
, fw_size
;
2375 if (!adev
->gfx
.rlc_fw
)
2378 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
2379 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
2381 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
2382 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
2383 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
2385 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
,
2386 RLCG_UCODE_LOADING_START_ADDRESS
);
2387 for (i
= 0; i
< fw_size
; i
++)
2388 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_DATA
, le32_to_cpup(fw_data
++));
2389 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
, adev
->gfx
.rlc_fw_version
);
2394 static int gfx_v9_0_rlc_resume(struct amdgpu_device
*adev
)
2398 if (amdgpu_sriov_vf(adev
)) {
2399 gfx_v9_0_init_csb(adev
);
2403 adev
->gfx
.rlc
.funcs
->stop(adev
);
2406 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, 0);
2408 adev
->gfx
.rlc
.funcs
->reset(adev
);
2410 gfx_v9_0_init_pg(adev
);
2412 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
2413 /* legacy rlc firmware loading */
2414 r
= gfx_v9_0_rlc_load_microcode(adev
);
2419 switch (adev
->asic_type
) {
2421 if (amdgpu_lbpw
== 0)
2422 gfx_v9_0_enable_lbpw(adev
, false);
2424 gfx_v9_0_enable_lbpw(adev
, true);
2427 if (amdgpu_lbpw
> 0)
2428 gfx_v9_0_enable_lbpw(adev
, true);
2430 gfx_v9_0_enable_lbpw(adev
, false);
2436 adev
->gfx
.rlc
.funcs
->start(adev
);
2441 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
2444 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_CNTL
);
2446 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, enable
? 0 : 1);
2447 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, enable
? 0 : 1);
2448 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, enable
? 0 : 1);
2450 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
2451 adev
->gfx
.gfx_ring
[i
].sched
.ready
= false;
2453 WREG32_SOC15(GC
, 0, mmCP_ME_CNTL
, tmp
);
2457 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
2459 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
2460 const struct gfx_firmware_header_v1_0
*ce_hdr
;
2461 const struct gfx_firmware_header_v1_0
*me_hdr
;
2462 const __le32
*fw_data
;
2463 unsigned i
, fw_size
;
2465 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
2468 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2469 adev
->gfx
.pfp_fw
->data
;
2470 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
2471 adev
->gfx
.ce_fw
->data
;
2472 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
2473 adev
->gfx
.me_fw
->data
;
2475 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
2476 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
2477 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
2479 gfx_v9_0_cp_gfx_enable(adev
, false);
2482 fw_data
= (const __le32
*)
2483 (adev
->gfx
.pfp_fw
->data
+
2484 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
2485 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
) / 4;
2486 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_ADDR
, 0);
2487 for (i
= 0; i
< fw_size
; i
++)
2488 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_DATA
, le32_to_cpup(fw_data
++));
2489 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_ADDR
, adev
->gfx
.pfp_fw_version
);
2492 fw_data
= (const __le32
*)
2493 (adev
->gfx
.ce_fw
->data
+
2494 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
2495 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
) / 4;
2496 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_ADDR
, 0);
2497 for (i
= 0; i
< fw_size
; i
++)
2498 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_DATA
, le32_to_cpup(fw_data
++));
2499 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_ADDR
, adev
->gfx
.ce_fw_version
);
2502 fw_data
= (const __le32
*)
2503 (adev
->gfx
.me_fw
->data
+
2504 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
2505 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
) / 4;
2506 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_WADDR
, 0);
2507 for (i
= 0; i
< fw_size
; i
++)
2508 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_DATA
, le32_to_cpup(fw_data
++));
2509 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_WADDR
, adev
->gfx
.me_fw_version
);
2514 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device
*adev
)
2516 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
2517 const struct cs_section_def
*sect
= NULL
;
2518 const struct cs_extent_def
*ext
= NULL
;
2522 WREG32_SOC15(GC
, 0, mmCP_MAX_CONTEXT
, adev
->gfx
.config
.max_hw_contexts
- 1);
2523 WREG32_SOC15(GC
, 0, mmCP_DEVICE_ID
, 1);
2525 gfx_v9_0_cp_gfx_enable(adev
, true);
2527 r
= amdgpu_ring_alloc(ring
, gfx_v9_0_get_csb_size(adev
) + 4 + 3);
2529 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
2533 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2534 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2536 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
2537 amdgpu_ring_write(ring
, 0x80000000);
2538 amdgpu_ring_write(ring
, 0x80000000);
2540 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
2541 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
2542 if (sect
->id
== SECT_CONTEXT
) {
2543 amdgpu_ring_write(ring
,
2544 PACKET3(PACKET3_SET_CONTEXT_REG
,
2546 amdgpu_ring_write(ring
,
2547 ext
->reg_index
- PACKET3_SET_CONTEXT_REG_START
);
2548 for (i
= 0; i
< ext
->reg_count
; i
++)
2549 amdgpu_ring_write(ring
, ext
->extent
[i
]);
2554 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2555 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2557 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2558 amdgpu_ring_write(ring
, 0);
2560 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
2561 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
2562 amdgpu_ring_write(ring
, 0x8000);
2563 amdgpu_ring_write(ring
, 0x8000);
2565 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
,1));
2566 tmp
= (PACKET3_SET_UCONFIG_REG_INDEX_TYPE
|
2567 (SOC15_REG_OFFSET(GC
, 0, mmVGT_INDEX_TYPE
) - PACKET3_SET_UCONFIG_REG_START
));
2568 amdgpu_ring_write(ring
, tmp
);
2569 amdgpu_ring_write(ring
, 0);
2571 amdgpu_ring_commit(ring
);
2576 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device
*adev
)
2578 struct amdgpu_ring
*ring
;
2581 u64 rb_addr
, rptr_addr
, wptr_gpu_addr
;
2583 /* Set the write pointer delay */
2584 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_DELAY
, 0);
2586 /* set the RB to use vmid 0 */
2587 WREG32_SOC15(GC
, 0, mmCP_RB_VMID
, 0);
2589 /* Set ring buffer size */
2590 ring
= &adev
->gfx
.gfx_ring
[0];
2591 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2592 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
2593 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
2595 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
2597 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2599 /* Initialize the ring buffer's write pointers */
2601 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
2602 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
2604 /* set the wb address wether it's enabled or not */
2605 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2606 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR
, lower_32_bits(rptr_addr
));
2607 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
2609 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2610 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
, lower_32_bits(wptr_gpu_addr
));
2611 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
, upper_32_bits(wptr_gpu_addr
));
2614 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2616 rb_addr
= ring
->gpu_addr
>> 8;
2617 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE
, rb_addr
);
2618 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
2620 tmp
= RREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
);
2621 if (ring
->use_doorbell
) {
2622 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2623 DOORBELL_OFFSET
, ring
->doorbell_index
);
2624 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2627 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
2629 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
, tmp
);
2631 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
2632 DOORBELL_RANGE_LOWER
, ring
->doorbell_index
);
2633 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_LOWER
, tmp
);
2635 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_UPPER
,
2636 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
2639 /* start the ring */
2640 gfx_v9_0_cp_gfx_start(adev
);
2641 ring
->sched
.ready
= true;
2646 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
2651 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
, 0);
2653 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
,
2654 (CP_MEC_CNTL__MEC_ME1_HALT_MASK
| CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
2655 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
2656 adev
->gfx
.compute_ring
[i
].sched
.ready
= false;
2657 adev
->gfx
.kiq
.ring
.sched
.ready
= false;
2662 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
2664 const struct gfx_firmware_header_v1_0
*mec_hdr
;
2665 const __le32
*fw_data
;
2669 if (!adev
->gfx
.mec_fw
)
2672 gfx_v9_0_cp_compute_enable(adev
, false);
2674 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
2675 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
2677 fw_data
= (const __le32
*)
2678 (adev
->gfx
.mec_fw
->data
+
2679 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
2681 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, VMID
, 0);
2682 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2683 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_CNTL
, tmp
);
2685 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_LO
,
2686 adev
->gfx
.mec
.mec_fw_gpu_addr
& 0xFFFFF000);
2687 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_HI
,
2688 upper_32_bits(adev
->gfx
.mec
.mec_fw_gpu_addr
));
2691 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
,
2692 mec_hdr
->jt_offset
);
2693 for (i
= 0; i
< mec_hdr
->jt_size
; i
++)
2694 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_DATA
,
2695 le32_to_cpup(fw_data
+ mec_hdr
->jt_offset
+ i
));
2697 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
,
2698 adev
->gfx
.mec_fw_version
);
2699 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2705 static void gfx_v9_0_kiq_setting(struct amdgpu_ring
*ring
)
2708 struct amdgpu_device
*adev
= ring
->adev
;
2710 /* tell RLC which is KIQ queue */
2711 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
);
2713 tmp
|= (ring
->me
<< 5) | (ring
->pipe
<< 3) | (ring
->queue
);
2714 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
2716 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
2719 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device
*adev
)
2721 struct amdgpu_ring
*kiq_ring
= &adev
->gfx
.kiq
.ring
;
2722 uint64_t queue_mask
= 0;
2725 for (i
= 0; i
< AMDGPU_MAX_COMPUTE_QUEUES
; ++i
) {
2726 if (!test_bit(i
, adev
->gfx
.mec
.queue_bitmap
))
2729 /* This situation may be hit in the future if a new HW
2730 * generation exposes more than 64 queues. If so, the
2731 * definition of queue_mask needs updating */
2732 if (WARN_ON(i
>= (sizeof(queue_mask
)*8))) {
2733 DRM_ERROR("Invalid KCQ enabled: %d\n", i
);
2737 queue_mask
|= (1ull << i
);
2740 r
= amdgpu_ring_alloc(kiq_ring
, (7 * adev
->gfx
.num_compute_rings
) + 8);
2742 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
2747 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_RESOURCES
, 6));
2748 amdgpu_ring_write(kiq_ring
, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2749 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2750 amdgpu_ring_write(kiq_ring
, lower_32_bits(queue_mask
)); /* queue mask lo */
2751 amdgpu_ring_write(kiq_ring
, upper_32_bits(queue_mask
)); /* queue mask hi */
2752 amdgpu_ring_write(kiq_ring
, 0); /* gws mask lo */
2753 amdgpu_ring_write(kiq_ring
, 0); /* gws mask hi */
2754 amdgpu_ring_write(kiq_ring
, 0); /* oac mask */
2755 amdgpu_ring_write(kiq_ring
, 0); /* gds heap base:0, gds heap size:0 */
2756 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2757 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
2758 uint64_t mqd_addr
= amdgpu_bo_gpu_offset(ring
->mqd_obj
);
2759 uint64_t wptr_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2761 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
2762 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2763 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2764 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2765 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2766 PACKET3_MAP_QUEUES_QUEUE(ring
->queue
) |
2767 PACKET3_MAP_QUEUES_PIPE(ring
->pipe
) |
2768 PACKET3_MAP_QUEUES_ME((ring
->me
== 1 ? 0 : 1)) |
2769 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2770 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2771 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2772 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2773 amdgpu_ring_write(kiq_ring
, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring
->doorbell_index
));
2774 amdgpu_ring_write(kiq_ring
, lower_32_bits(mqd_addr
));
2775 amdgpu_ring_write(kiq_ring
, upper_32_bits(mqd_addr
));
2776 amdgpu_ring_write(kiq_ring
, lower_32_bits(wptr_addr
));
2777 amdgpu_ring_write(kiq_ring
, upper_32_bits(wptr_addr
));
2780 r
= amdgpu_ring_test_helper(kiq_ring
);
2782 DRM_ERROR("KCQ enable failed\n");
2787 static int gfx_v9_0_mqd_init(struct amdgpu_ring
*ring
)
2789 struct amdgpu_device
*adev
= ring
->adev
;
2790 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2791 uint64_t hqd_gpu_addr
, wb_gpu_addr
, eop_base_addr
;
2794 mqd
->header
= 0xC0310800;
2795 mqd
->compute_pipelinestat_enable
= 0x00000001;
2796 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
2797 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
2798 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
2799 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
2800 mqd
->compute_misc_reserved
= 0x00000003;
2802 mqd
->dynamic_cu_mask_addr_lo
=
2803 lower_32_bits(ring
->mqd_gpu_addr
2804 + offsetof(struct v9_mqd_allocation
, dynamic_cu_mask
));
2805 mqd
->dynamic_cu_mask_addr_hi
=
2806 upper_32_bits(ring
->mqd_gpu_addr
2807 + offsetof(struct v9_mqd_allocation
, dynamic_cu_mask
));
2809 eop_base_addr
= ring
->eop_gpu_addr
>> 8;
2810 mqd
->cp_hqd_eop_base_addr_lo
= eop_base_addr
;
2811 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_base_addr
);
2813 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2814 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
);
2815 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
2816 (order_base_2(GFX9_MEC_HPD_SIZE
/ 4) - 1));
2818 mqd
->cp_hqd_eop_control
= tmp
;
2820 /* enable doorbell? */
2821 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
2823 if (ring
->use_doorbell
) {
2824 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2825 DOORBELL_OFFSET
, ring
->doorbell_index
);
2826 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2828 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2829 DOORBELL_SOURCE
, 0);
2830 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2833 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2837 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
2839 /* disable the queue if it's active */
2841 mqd
->cp_hqd_dequeue_request
= 0;
2842 mqd
->cp_hqd_pq_rptr
= 0;
2843 mqd
->cp_hqd_pq_wptr_lo
= 0;
2844 mqd
->cp_hqd_pq_wptr_hi
= 0;
2846 /* set the pointer to the MQD */
2847 mqd
->cp_mqd_base_addr_lo
= ring
->mqd_gpu_addr
& 0xfffffffc;
2848 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
2850 /* set MQD vmid to 0 */
2851 tmp
= RREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
);
2852 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
2853 mqd
->cp_mqd_control
= tmp
;
2855 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2856 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
2857 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
2858 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
2860 /* set up the HQD, this is similar to CP_RB0_CNTL */
2861 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
);
2862 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
2863 (order_base_2(ring
->ring_size
/ 4) - 1));
2864 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
2865 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
2867 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
2869 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
2870 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
2871 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
2872 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
2873 mqd
->cp_hqd_pq_control
= tmp
;
2875 /* set the wb address whether it's enabled or not */
2876 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2877 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
2878 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
2879 upper_32_bits(wb_gpu_addr
) & 0xffff;
2881 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2882 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2883 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
2884 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
2887 /* enable the doorbell if requested */
2888 if (ring
->use_doorbell
) {
2889 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
2890 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2891 DOORBELL_OFFSET
, ring
->doorbell_index
);
2893 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2895 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2896 DOORBELL_SOURCE
, 0);
2897 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2901 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
2903 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2905 mqd
->cp_hqd_pq_rptr
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
);
2907 /* set the vmid for the queue */
2908 mqd
->cp_hqd_vmid
= 0;
2910 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
);
2911 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
2912 mqd
->cp_hqd_persistent_state
= tmp
;
2914 /* set MIN_IB_AVAIL_SIZE */
2915 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_IB_CONTROL
);
2916 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IB_CONTROL
, MIN_IB_AVAIL_SIZE
, 3);
2917 mqd
->cp_hqd_ib_control
= tmp
;
2919 /* activate the queue */
2920 mqd
->cp_hqd_active
= 1;
2925 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring
*ring
)
2927 struct amdgpu_device
*adev
= ring
->adev
;
2928 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2931 /* disable wptr polling */
2932 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
2934 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
,
2935 mqd
->cp_hqd_eop_base_addr_lo
);
2936 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
,
2937 mqd
->cp_hqd_eop_base_addr_hi
);
2939 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2940 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
,
2941 mqd
->cp_hqd_eop_control
);
2943 /* enable doorbell? */
2944 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
2945 mqd
->cp_hqd_pq_doorbell_control
);
2947 /* disable the queue if it's active */
2948 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
2949 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
2950 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
2951 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
2955 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
,
2956 mqd
->cp_hqd_dequeue_request
);
2957 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
,
2958 mqd
->cp_hqd_pq_rptr
);
2959 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
2960 mqd
->cp_hqd_pq_wptr_lo
);
2961 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
2962 mqd
->cp_hqd_pq_wptr_hi
);
2965 /* set the pointer to the MQD */
2966 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
,
2967 mqd
->cp_mqd_base_addr_lo
);
2968 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
,
2969 mqd
->cp_mqd_base_addr_hi
);
2971 /* set MQD vmid to 0 */
2972 WREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
,
2973 mqd
->cp_mqd_control
);
2975 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2976 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE
,
2977 mqd
->cp_hqd_pq_base_lo
);
2978 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE_HI
,
2979 mqd
->cp_hqd_pq_base_hi
);
2981 /* set up the HQD, this is similar to CP_RB0_CNTL */
2982 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
,
2983 mqd
->cp_hqd_pq_control
);
2985 /* set the wb address whether it's enabled or not */
2986 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
,
2987 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
2988 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
2989 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
2991 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2992 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
,
2993 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
2994 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
,
2995 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
2997 /* enable the doorbell if requested */
2998 if (ring
->use_doorbell
) {
2999 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
,
3000 (adev
->doorbell_index
.kiq
* 2) << 2);
3001 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
,
3002 (adev
->doorbell_index
.userqueue_end
* 2) << 2);
3005 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
3006 mqd
->cp_hqd_pq_doorbell_control
);
3008 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3009 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
3010 mqd
->cp_hqd_pq_wptr_lo
);
3011 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
3012 mqd
->cp_hqd_pq_wptr_hi
);
3014 /* set the vmid for the queue */
3015 WREG32_SOC15(GC
, 0, mmCP_HQD_VMID
, mqd
->cp_hqd_vmid
);
3017 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
,
3018 mqd
->cp_hqd_persistent_state
);
3020 /* activate the queue */
3021 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
,
3022 mqd
->cp_hqd_active
);
3024 if (ring
->use_doorbell
)
3025 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
3030 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring
*ring
)
3032 struct amdgpu_device
*adev
= ring
->adev
;
3035 /* disable the queue if it's active */
3036 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
3038 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
3040 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
3041 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
3046 if (j
== AMDGPU_MAX_USEC_TIMEOUT
) {
3047 DRM_DEBUG("KIQ dequeue request failed.\n");
3049 /* Manual disable if dequeue request times out */
3050 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
, 0);
3053 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
,
3057 WREG32_SOC15(GC
, 0, mmCP_HQD_IQ_TIMER
, 0);
3058 WREG32_SOC15(GC
, 0, mmCP_HQD_IB_CONTROL
, 0);
3059 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
, 0);
3060 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
, 0x40000000);
3061 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
, 0);
3062 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
, 0);
3063 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
, 0);
3064 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
, 0);
3069 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring
*ring
)
3071 struct amdgpu_device
*adev
= ring
->adev
;
3072 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
3073 int mqd_idx
= AMDGPU_MAX_COMPUTE_RINGS
;
3075 gfx_v9_0_kiq_setting(ring
);
3077 if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
3078 /* reset MQD to a clean status */
3079 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3080 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(struct v9_mqd_allocation
));
3082 /* reset ring buffer */
3084 amdgpu_ring_clear_ring(ring
);
3086 mutex_lock(&adev
->srbm_mutex
);
3087 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3088 gfx_v9_0_kiq_init_register(ring
);
3089 soc15_grbm_select(adev
, 0, 0, 0, 0);
3090 mutex_unlock(&adev
->srbm_mutex
);
3092 memset((void *)mqd
, 0, sizeof(struct v9_mqd_allocation
));
3093 ((struct v9_mqd_allocation
*)mqd
)->dynamic_cu_mask
= 0xFFFFFFFF;
3094 ((struct v9_mqd_allocation
*)mqd
)->dynamic_rb_mask
= 0xFFFFFFFF;
3095 mutex_lock(&adev
->srbm_mutex
);
3096 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3097 gfx_v9_0_mqd_init(ring
);
3098 gfx_v9_0_kiq_init_register(ring
);
3099 soc15_grbm_select(adev
, 0, 0, 0, 0);
3100 mutex_unlock(&adev
->srbm_mutex
);
3102 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3103 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(struct v9_mqd_allocation
));
3109 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring
*ring
)
3111 struct amdgpu_device
*adev
= ring
->adev
;
3112 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
3113 int mqd_idx
= ring
- &adev
->gfx
.compute_ring
[0];
3115 if (!adev
->in_gpu_reset
&& !adev
->in_suspend
) {
3116 memset((void *)mqd
, 0, sizeof(struct v9_mqd_allocation
));
3117 ((struct v9_mqd_allocation
*)mqd
)->dynamic_cu_mask
= 0xFFFFFFFF;
3118 ((struct v9_mqd_allocation
*)mqd
)->dynamic_rb_mask
= 0xFFFFFFFF;
3119 mutex_lock(&adev
->srbm_mutex
);
3120 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3121 gfx_v9_0_mqd_init(ring
);
3122 soc15_grbm_select(adev
, 0, 0, 0, 0);
3123 mutex_unlock(&adev
->srbm_mutex
);
3125 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3126 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(struct v9_mqd_allocation
));
3127 } else if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
3128 /* reset MQD to a clean status */
3129 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3130 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(struct v9_mqd_allocation
));
3132 /* reset ring buffer */
3134 amdgpu_ring_clear_ring(ring
);
3136 amdgpu_ring_clear_ring(ring
);
3142 static int gfx_v9_0_kiq_resume(struct amdgpu_device
*adev
)
3144 struct amdgpu_ring
*ring
;
3147 ring
= &adev
->gfx
.kiq
.ring
;
3149 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3150 if (unlikely(r
!= 0))
3153 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3154 if (unlikely(r
!= 0))
3157 gfx_v9_0_kiq_init_queue(ring
);
3158 amdgpu_bo_kunmap(ring
->mqd_obj
);
3159 ring
->mqd_ptr
= NULL
;
3160 amdgpu_bo_unreserve(ring
->mqd_obj
);
3161 ring
->sched
.ready
= true;
3165 static int gfx_v9_0_kcq_resume(struct amdgpu_device
*adev
)
3167 struct amdgpu_ring
*ring
= NULL
;
3170 gfx_v9_0_cp_compute_enable(adev
, true);
3172 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3173 ring
= &adev
->gfx
.compute_ring
[i
];
3175 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3176 if (unlikely(r
!= 0))
3178 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3180 r
= gfx_v9_0_kcq_init_queue(ring
);
3181 amdgpu_bo_kunmap(ring
->mqd_obj
);
3182 ring
->mqd_ptr
= NULL
;
3184 amdgpu_bo_unreserve(ring
->mqd_obj
);
3189 r
= gfx_v9_0_kiq_kcq_enable(adev
);
3194 static int gfx_v9_0_cp_resume(struct amdgpu_device
*adev
)
3197 struct amdgpu_ring
*ring
;
3199 if (!(adev
->flags
& AMD_IS_APU
))
3200 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
3202 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
3203 /* legacy firmware loading */
3204 r
= gfx_v9_0_cp_gfx_load_microcode(adev
);
3208 r
= gfx_v9_0_cp_compute_load_microcode(adev
);
3213 r
= gfx_v9_0_kiq_resume(adev
);
3217 r
= gfx_v9_0_cp_gfx_resume(adev
);
3221 r
= gfx_v9_0_kcq_resume(adev
);
3225 ring
= &adev
->gfx
.gfx_ring
[0];
3226 r
= amdgpu_ring_test_helper(ring
);
3230 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3231 ring
= &adev
->gfx
.compute_ring
[i
];
3232 amdgpu_ring_test_helper(ring
);
3235 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
3240 static void gfx_v9_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
3242 gfx_v9_0_cp_gfx_enable(adev
, enable
);
3243 gfx_v9_0_cp_compute_enable(adev
, enable
);
3246 static int gfx_v9_0_hw_init(void *handle
)
3249 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3251 gfx_v9_0_init_golden_registers(adev
);
3253 gfx_v9_0_constants_init(adev
);
3255 r
= gfx_v9_0_csb_vram_pin(adev
);
3259 r
= adev
->gfx
.rlc
.funcs
->resume(adev
);
3263 r
= gfx_v9_0_cp_resume(adev
);
3267 r
= gfx_v9_0_ngg_en(adev
);
3274 static int gfx_v9_0_kcq_disable(struct amdgpu_device
*adev
)
3277 struct amdgpu_ring
*kiq_ring
= &adev
->gfx
.kiq
.ring
;
3279 r
= amdgpu_ring_alloc(kiq_ring
, 6 * adev
->gfx
.num_compute_rings
);
3281 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
3283 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3284 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
3286 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_UNMAP_QUEUES
, 4));
3287 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3288 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3289 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3290 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3291 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3292 amdgpu_ring_write(kiq_ring
, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring
->doorbell_index
));
3293 amdgpu_ring_write(kiq_ring
, 0);
3294 amdgpu_ring_write(kiq_ring
, 0);
3295 amdgpu_ring_write(kiq_ring
, 0);
3297 r
= amdgpu_ring_test_helper(kiq_ring
);
3299 DRM_ERROR("KCQ disable failed\n");
3304 static int gfx_v9_0_hw_fini(void *handle
)
3306 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3308 amdgpu_irq_put(adev
, &adev
->gfx
.priv_reg_irq
, 0);
3309 amdgpu_irq_put(adev
, &adev
->gfx
.priv_inst_irq
, 0);
3311 /* disable KCQ to avoid CPC touch memory not valid anymore */
3312 gfx_v9_0_kcq_disable(adev
);
3314 if (amdgpu_sriov_vf(adev
)) {
3315 gfx_v9_0_cp_gfx_enable(adev
, false);
3316 /* must disable polling for SRIOV when hw finished, otherwise
3317 * CPC engine may still keep fetching WB address which is already
3318 * invalid after sw finished and trigger DMAR reading error in
3321 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
3325 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3326 * otherwise KIQ is hanging when binding back
3328 if (!adev
->in_gpu_reset
&& !adev
->in_suspend
) {
3329 mutex_lock(&adev
->srbm_mutex
);
3330 soc15_grbm_select(adev
, adev
->gfx
.kiq
.ring
.me
,
3331 adev
->gfx
.kiq
.ring
.pipe
,
3332 adev
->gfx
.kiq
.ring
.queue
, 0);
3333 gfx_v9_0_kiq_fini_register(&adev
->gfx
.kiq
.ring
);
3334 soc15_grbm_select(adev
, 0, 0, 0, 0);
3335 mutex_unlock(&adev
->srbm_mutex
);
3338 gfx_v9_0_cp_enable(adev
, false);
3339 adev
->gfx
.rlc
.funcs
->stop(adev
);
3341 gfx_v9_0_csb_vram_unpin(adev
);
3346 static int gfx_v9_0_suspend(void *handle
)
3348 return gfx_v9_0_hw_fini(handle
);
3351 static int gfx_v9_0_resume(void *handle
)
3353 return gfx_v9_0_hw_init(handle
);
3356 static bool gfx_v9_0_is_idle(void *handle
)
3358 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3360 if (REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmGRBM_STATUS
),
3361 GRBM_STATUS
, GUI_ACTIVE
))
3367 static int gfx_v9_0_wait_for_idle(void *handle
)
3370 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3372 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3373 if (gfx_v9_0_is_idle(handle
))
3380 static int gfx_v9_0_soft_reset(void *handle
)
3382 u32 grbm_soft_reset
= 0;
3384 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3387 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
);
3388 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
3389 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
3390 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__VGT_BUSY_MASK
|
3391 GRBM_STATUS__DB_BUSY_MASK
| GRBM_STATUS__CB_BUSY_MASK
|
3392 GRBM_STATUS__GDS_BUSY_MASK
| GRBM_STATUS__SPI_BUSY_MASK
|
3393 GRBM_STATUS__IA_BUSY_MASK
| GRBM_STATUS__IA_BUSY_NO_DMA_MASK
)) {
3394 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3395 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
3396 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3397 GRBM_SOFT_RESET
, SOFT_RESET_GFX
, 1);
3400 if (tmp
& (GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
3401 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3402 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
3406 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS2
);
3407 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
3408 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3409 GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
3412 if (grbm_soft_reset
) {
3414 adev
->gfx
.rlc
.funcs
->stop(adev
);
3416 /* Disable GFX parsing/prefetching */
3417 gfx_v9_0_cp_gfx_enable(adev
, false);
3419 /* Disable MEC parsing/prefetching */
3420 gfx_v9_0_cp_compute_enable(adev
, false);
3422 if (grbm_soft_reset
) {
3423 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3424 tmp
|= grbm_soft_reset
;
3425 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
3426 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3427 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3431 tmp
&= ~grbm_soft_reset
;
3432 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3433 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3436 /* Wait a little for things to settle down */
3442 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
3446 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
3447 WREG32_SOC15(GC
, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
3448 clock
= (uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_LSB
) |
3449 ((uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
3450 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
3454 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
3456 uint32_t gds_base
, uint32_t gds_size
,
3457 uint32_t gws_base
, uint32_t gws_size
,
3458 uint32_t oa_base
, uint32_t oa_size
)
3460 struct amdgpu_device
*adev
= ring
->adev
;
3463 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3464 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_BASE
) + 2 * vmid
,
3468 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3469 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
) + 2 * vmid
,
3473 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3474 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID0
) + vmid
,
3475 gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
3478 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3479 SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID0
) + vmid
,
3480 (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
3483 static int gfx_v9_0_early_init(void *handle
)
3485 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3487 adev
->gfx
.num_gfx_rings
= GFX9_NUM_GFX_RINGS
;
3488 adev
->gfx
.num_compute_rings
= AMDGPU_MAX_COMPUTE_RINGS
;
3489 gfx_v9_0_set_ring_funcs(adev
);
3490 gfx_v9_0_set_irq_funcs(adev
);
3491 gfx_v9_0_set_gds_init(adev
);
3492 gfx_v9_0_set_rlc_funcs(adev
);
3497 static int gfx_v9_0_late_init(void *handle
)
3499 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3502 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_reg_irq
, 0);
3506 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_inst_irq
, 0);
3513 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device
*adev
)
3515 uint32_t rlc_setting
;
3517 /* if RLC is not enabled, do nothing */
3518 rlc_setting
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
3519 if (!(rlc_setting
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
3525 static void gfx_v9_0_set_safe_mode(struct amdgpu_device
*adev
)
3530 data
= RLC_SAFE_MODE__CMD_MASK
;
3531 data
|= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT
);
3532 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
3534 /* wait for RLC_SAFE_MODE */
3535 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3536 if (!REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
3542 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device
*adev
)
3546 data
= RLC_SAFE_MODE__CMD_MASK
;
3547 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
3550 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device
*adev
,
3553 amdgpu_gfx_rlc_enter_safe_mode(adev
);
3555 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_PG
) && enable
) {
3556 gfx_v9_0_enable_gfx_cg_power_gating(adev
, true);
3557 if (adev
->pg_flags
& AMD_PG_SUPPORT_GFX_PIPELINE
)
3558 gfx_v9_0_enable_gfx_pipeline_powergating(adev
, true);
3560 gfx_v9_0_enable_gfx_cg_power_gating(adev
, false);
3561 gfx_v9_0_enable_gfx_pipeline_powergating(adev
, false);
3564 amdgpu_gfx_rlc_exit_safe_mode(adev
);
3567 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device
*adev
,
3570 /* TODO: double check if we need to perform under safe mode */
3571 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3573 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_SMG
) && enable
)
3574 gfx_v9_0_enable_gfx_static_mg_power_gating(adev
, true);
3576 gfx_v9_0_enable_gfx_static_mg_power_gating(adev
, false);
3578 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_DMG
) && enable
)
3579 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev
, true);
3581 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev
, false);
3583 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3586 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device
*adev
,
3591 amdgpu_gfx_rlc_enter_safe_mode(adev
);
3593 /* It is disabled by HW by default */
3594 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
)) {
3595 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3596 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3598 if (adev
->asic_type
!= CHIP_VEGA12
)
3599 data
&= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
;
3601 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
3602 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
3603 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
3605 /* only for Vega10 & Raven1 */
3606 data
|= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
;
3609 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3611 /* MGLS is a global flag to control all MGLS in GFX */
3612 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
3613 /* 2 - RLC memory Light sleep */
3614 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
) {
3615 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
3616 data
|= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
3618 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
3620 /* 3 - CP memory Light sleep */
3621 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
) {
3622 def
= data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
3623 data
|= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
3625 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
3629 /* 1 - MGCG_OVERRIDE */
3630 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3632 if (adev
->asic_type
!= CHIP_VEGA12
)
3633 data
|= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
;
3635 data
|= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
|
3636 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
3637 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
3638 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
3641 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3643 /* 2 - disable MGLS in RLC */
3644 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
3645 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
) {
3646 data
&= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
3647 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
3650 /* 3 - disable MGLS in CP */
3651 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
3652 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
) {
3653 data
&= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
3654 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
3658 amdgpu_gfx_rlc_exit_safe_mode(adev
);
3661 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device
*adev
,
3666 amdgpu_gfx_rlc_enter_safe_mode(adev
);
3668 /* Enable 3D CGCG/CGLS */
3669 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
3670 /* write cmd to clear cgcg/cgls ov */
3671 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3672 /* unset CGCG override */
3673 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK
;
3674 /* update CGCG and CGLS override bits */
3676 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3678 /* enable 3Dcgcg FSM(0x0000363f) */
3679 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
3681 data
= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
3682 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
;
3683 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGLS
)
3684 data
|= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
3685 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
;
3687 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
3689 /* set IDLE_POLL_COUNT(0x00900100) */
3690 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
3691 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
3692 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
3694 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
3696 /* Disable CGCG/CGLS */
3697 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
3698 /* disable cgcg, cgls should be disabled */
3699 data
&= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
|
3700 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
);
3701 /* disable cgcg and cgls in FSM */
3703 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
3706 amdgpu_gfx_rlc_exit_safe_mode(adev
);
3709 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device
*adev
,
3714 amdgpu_gfx_rlc_enter_safe_mode(adev
);
3716 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
)) {
3717 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3718 /* unset CGCG override */
3719 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK
;
3720 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
3721 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
3723 data
|= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
3724 /* update CGCG and CGLS override bits */
3726 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3728 /* enable cgcg FSM(0x0000363F) */
3729 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
3731 data
= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
3732 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
;
3733 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
3734 data
|= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
3735 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
3737 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
3739 /* set IDLE_POLL_COUNT(0x00900100) */
3740 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
3741 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
3742 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
3744 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
3746 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
3747 /* reset CGCG/CGLS bits */
3748 data
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
| RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
3749 /* disable cgcg and cgls in FSM */
3751 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
3754 amdgpu_gfx_rlc_exit_safe_mode(adev
);
3757 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device
*adev
,
3761 /* CGCG/CGLS should be enabled after MGCG/MGLS
3762 * === MGCG + MGLS ===
3764 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
3765 /* === CGCG /CGLS for GFX 3D Only === */
3766 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
3767 /* === CGCG + CGLS === */
3768 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
3770 /* CGCG/CGLS should be disabled before MGCG/MGLS
3771 * === CGCG + CGLS ===
3773 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
3774 /* === CGCG /CGLS for GFX 3D Only === */
3775 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
3776 /* === MGCG + MGLS === */
3777 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
3782 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs
= {
3783 .is_rlc_enabled
= gfx_v9_0_is_rlc_enabled
,
3784 .set_safe_mode
= gfx_v9_0_set_safe_mode
,
3785 .unset_safe_mode
= gfx_v9_0_unset_safe_mode
,
3786 .init
= gfx_v9_0_rlc_init
,
3787 .get_csb_size
= gfx_v9_0_get_csb_size
,
3788 .get_csb_buffer
= gfx_v9_0_get_csb_buffer
,
3789 .get_cp_table_num
= gfx_v9_0_cp_jump_table_num
,
3790 .resume
= gfx_v9_0_rlc_resume
,
3791 .stop
= gfx_v9_0_rlc_stop
,
3792 .reset
= gfx_v9_0_rlc_reset
,
3793 .start
= gfx_v9_0_rlc_start
3796 static int gfx_v9_0_set_powergating_state(void *handle
,
3797 enum amd_powergating_state state
)
3799 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3800 bool enable
= (state
== AMD_PG_STATE_GATE
) ? true : false;
3802 switch (adev
->asic_type
) {
3805 amdgpu_gfx_off_ctrl(adev
, false);
3806 cancel_delayed_work_sync(&adev
->gfx
.gfx_off_delay_work
);
3808 if (adev
->pg_flags
& AMD_PG_SUPPORT_RLC_SMU_HS
) {
3809 gfx_v9_0_enable_sck_slow_down_on_power_up(adev
, true);
3810 gfx_v9_0_enable_sck_slow_down_on_power_down(adev
, true);
3812 gfx_v9_0_enable_sck_slow_down_on_power_up(adev
, false);
3813 gfx_v9_0_enable_sck_slow_down_on_power_down(adev
, false);
3816 if (adev
->pg_flags
& AMD_PG_SUPPORT_CP
)
3817 gfx_v9_0_enable_cp_power_gating(adev
, true);
3819 gfx_v9_0_enable_cp_power_gating(adev
, false);
3821 /* update gfx cgpg state */
3822 gfx_v9_0_update_gfx_cg_power_gating(adev
, enable
);
3824 /* update mgcg state */
3825 gfx_v9_0_update_gfx_mg_power_gating(adev
, enable
);
3828 amdgpu_gfx_off_ctrl(adev
, true);
3832 amdgpu_gfx_off_ctrl(adev
, false);
3833 cancel_delayed_work_sync(&adev
->gfx
.gfx_off_delay_work
);
3835 amdgpu_gfx_off_ctrl(adev
, true);
3845 static int gfx_v9_0_set_clockgating_state(void *handle
,
3846 enum amd_clockgating_state state
)
3848 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3850 if (amdgpu_sriov_vf(adev
))
3853 switch (adev
->asic_type
) {
3858 gfx_v9_0_update_gfx_clock_gating(adev
,
3859 state
== AMD_CG_STATE_GATE
? true : false);
3867 static void gfx_v9_0_get_clockgating_state(void *handle
, u32
*flags
)
3869 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3872 if (amdgpu_sriov_vf(adev
))
3875 /* AMD_CG_SUPPORT_GFX_MGCG */
3876 data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3877 if (!(data
& RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
))
3878 *flags
|= AMD_CG_SUPPORT_GFX_MGCG
;
3880 /* AMD_CG_SUPPORT_GFX_CGCG */
3881 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
3882 if (data
& RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
)
3883 *flags
|= AMD_CG_SUPPORT_GFX_CGCG
;
3885 /* AMD_CG_SUPPORT_GFX_CGLS */
3886 if (data
& RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
)
3887 *flags
|= AMD_CG_SUPPORT_GFX_CGLS
;
3889 /* AMD_CG_SUPPORT_GFX_RLC_LS */
3890 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
3891 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
)
3892 *flags
|= AMD_CG_SUPPORT_GFX_RLC_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
3894 /* AMD_CG_SUPPORT_GFX_CP_LS */
3895 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
3896 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
)
3897 *flags
|= AMD_CG_SUPPORT_GFX_CP_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
3899 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
3900 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
3901 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
)
3902 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGCG
;
3904 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
3905 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
)
3906 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGLS
;
3909 static u64
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring
*ring
)
3911 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 is 32bit rptr*/
3914 static u64
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
3916 struct amdgpu_device
*adev
= ring
->adev
;
3919 /* XXX check if swapping is necessary on BE */
3920 if (ring
->use_doorbell
) {
3921 wptr
= atomic64_read((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
]);
3923 wptr
= RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
);
3924 wptr
+= (u64
)RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
) << 32;
3930 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
3932 struct amdgpu_device
*adev
= ring
->adev
;
3934 if (ring
->use_doorbell
) {
3935 /* XXX check if swapping is necessary on BE */
3936 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
3937 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
3939 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
3940 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
3944 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring
*ring
)
3946 struct amdgpu_device
*adev
= ring
->adev
;
3947 u32 ref_and_mask
, reg_mem_engine
;
3948 const struct nbio_hdp_flush_reg
*nbio_hf_reg
= adev
->nbio_funcs
->hdp_flush_reg
;
3950 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
3953 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp2
<< ring
->pipe
;
3956 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp6
<< ring
->pipe
;
3963 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp0
;
3964 reg_mem_engine
= 1; /* pfp */
3967 gfx_v9_0_wait_reg_mem(ring
, reg_mem_engine
, 0, 1,
3968 adev
->nbio_funcs
->get_hdp_flush_req_offset(adev
),
3969 adev
->nbio_funcs
->get_hdp_flush_done_offset(adev
),
3970 ref_and_mask
, ref_and_mask
, 0x20);
3973 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring
*ring
,
3974 struct amdgpu_job
*job
,
3975 struct amdgpu_ib
*ib
,
3978 unsigned vmid
= AMDGPU_JOB_GET_VMID(job
);
3979 u32 header
, control
= 0;
3981 if (ib
->flags
& AMDGPU_IB_FLAG_CE
)
3982 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
3984 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
3986 control
|= ib
->length_dw
| (vmid
<< 24);
3988 if (amdgpu_sriov_vf(ring
->adev
) && (ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
)) {
3989 control
|= INDIRECT_BUFFER_PRE_ENB(1);
3991 if (!(ib
->flags
& AMDGPU_IB_FLAG_CE
))
3992 gfx_v9_0_ring_emit_de_meta(ring
);
3995 amdgpu_ring_write(ring
, header
);
3996 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
3997 amdgpu_ring_write(ring
,
4001 lower_32_bits(ib
->gpu_addr
));
4002 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
4003 amdgpu_ring_write(ring
, control
);
4006 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring
*ring
,
4007 struct amdgpu_job
*job
,
4008 struct amdgpu_ib
*ib
,
4011 unsigned vmid
= AMDGPU_JOB_GET_VMID(job
);
4012 u32 control
= INDIRECT_BUFFER_VALID
| ib
->length_dw
| (vmid
<< 24);
4014 /* Currently, there is a high possibility to get wave ID mismatch
4015 * between ME and GDS, leading to a hw deadlock, because ME generates
4016 * different wave IDs than the GDS expects. This situation happens
4017 * randomly when at least 5 compute pipes use GDS ordered append.
4018 * The wave IDs generated by ME are also wrong after suspend/resume.
4019 * Those are probably bugs somewhere else in the kernel driver.
4021 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4022 * GDS to 0 for this ring (me/pipe).
4024 if (ib
->flags
& AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
) {
4025 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
4026 amdgpu_ring_write(ring
, mmGDS_COMPUTE_MAX_WAVE_ID
);
4027 amdgpu_ring_write(ring
, ring
->adev
->gds
.gds_compute_max_wave_id
);
4030 amdgpu_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
4031 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
4032 amdgpu_ring_write(ring
,
4036 lower_32_bits(ib
->gpu_addr
));
4037 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
4038 amdgpu_ring_write(ring
, control
);
4041 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
,
4042 u64 seq
, unsigned flags
)
4044 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
4045 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
4046 bool writeback
= flags
& AMDGPU_FENCE_FLAG_TC_WB_ONLY
;
4048 /* RELEASE_MEM - flush caches, send int */
4049 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 6));
4050 amdgpu_ring_write(ring
, ((writeback
? (EOP_TC_WB_ACTION_EN
|
4051 EOP_TC_NC_ACTION_EN
) :
4052 (EOP_TCL1_ACTION_EN
|
4054 EOP_TC_WB_ACTION_EN
|
4055 EOP_TC_MD_ACTION_EN
)) |
4056 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
4058 amdgpu_ring_write(ring
, DATA_SEL(write64bit
? 2 : 1) | INT_SEL(int_sel
? 2 : 0));
4061 * the address should be Qword aligned if 64bit write, Dword
4062 * aligned if only send 32bit data low (discard data high)
4068 amdgpu_ring_write(ring
, lower_32_bits(addr
));
4069 amdgpu_ring_write(ring
, upper_32_bits(addr
));
4070 amdgpu_ring_write(ring
, lower_32_bits(seq
));
4071 amdgpu_ring_write(ring
, upper_32_bits(seq
));
4072 amdgpu_ring_write(ring
, 0);
4075 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
4077 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
4078 uint32_t seq
= ring
->fence_drv
.sync_seq
;
4079 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
4081 gfx_v9_0_wait_reg_mem(ring
, usepfp
, 1, 0,
4082 lower_32_bits(addr
), upper_32_bits(addr
),
4083 seq
, 0xffffffff, 4);
4086 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
4087 unsigned vmid
, uint64_t pd_addr
)
4089 amdgpu_gmc_emit_flush_gpu_tlb(ring
, vmid
, pd_addr
);
4091 /* compute doesn't have PFP */
4092 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
) {
4093 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4094 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
4095 amdgpu_ring_write(ring
, 0x0);
4099 static u64
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring
*ring
)
4101 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 hardware is 32bit rptr */
4104 static u64
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
4108 /* XXX check if swapping is necessary on BE */
4109 if (ring
->use_doorbell
)
4110 wptr
= atomic64_read((atomic64_t
*)&ring
->adev
->wb
.wb
[ring
->wptr_offs
]);
4116 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring
*ring
,
4119 struct amdgpu_device
*adev
= ring
->adev
;
4120 int pipe_num
, tmp
, reg
;
4121 int pipe_percent
= acquire
? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK
: 0x1;
4123 pipe_num
= ring
->me
* adev
->gfx
.mec
.num_pipe_per_mec
+ ring
->pipe
;
4125 /* first me only has 2 entries, GFX and HP3D */
4129 reg
= SOC15_REG_OFFSET(GC
, 0, mmSPI_WCL_PIPE_PERCENT_GFX
) + pipe_num
;
4131 tmp
= REG_SET_FIELD(tmp
, SPI_WCL_PIPE_PERCENT_GFX
, VALUE
, pipe_percent
);
4135 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device
*adev
,
4136 struct amdgpu_ring
*ring
,
4141 struct amdgpu_ring
*iring
;
4143 mutex_lock(&adev
->gfx
.pipe_reserve_mutex
);
4144 pipe
= amdgpu_gfx_queue_to_bit(adev
, ring
->me
, ring
->pipe
, 0);
4146 set_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4148 clear_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4150 if (!bitmap_weight(adev
->gfx
.pipe_reserve_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
)) {
4151 /* Clear all reservations - everyone reacquires all resources */
4152 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; ++i
)
4153 gfx_v9_0_ring_set_pipe_percent(&adev
->gfx
.gfx_ring
[i
],
4156 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; ++i
)
4157 gfx_v9_0_ring_set_pipe_percent(&adev
->gfx
.compute_ring
[i
],
4160 /* Lower all pipes without a current reservation */
4161 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; ++i
) {
4162 iring
= &adev
->gfx
.gfx_ring
[i
];
4163 pipe
= amdgpu_gfx_queue_to_bit(adev
,
4167 reserve
= test_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4168 gfx_v9_0_ring_set_pipe_percent(iring
, reserve
);
4171 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; ++i
) {
4172 iring
= &adev
->gfx
.compute_ring
[i
];
4173 pipe
= amdgpu_gfx_queue_to_bit(adev
,
4177 reserve
= test_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4178 gfx_v9_0_ring_set_pipe_percent(iring
, reserve
);
4182 mutex_unlock(&adev
->gfx
.pipe_reserve_mutex
);
4185 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device
*adev
,
4186 struct amdgpu_ring
*ring
,
4189 uint32_t pipe_priority
= acquire
? 0x2 : 0x0;
4190 uint32_t queue_priority
= acquire
? 0xf : 0x0;
4192 mutex_lock(&adev
->srbm_mutex
);
4193 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
4195 WREG32_SOC15(GC
, 0, mmCP_HQD_PIPE_PRIORITY
, pipe_priority
);
4196 WREG32_SOC15(GC
, 0, mmCP_HQD_QUEUE_PRIORITY
, queue_priority
);
4198 soc15_grbm_select(adev
, 0, 0, 0, 0);
4199 mutex_unlock(&adev
->srbm_mutex
);
4202 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring
*ring
,
4203 enum drm_sched_priority priority
)
4205 struct amdgpu_device
*adev
= ring
->adev
;
4206 bool acquire
= priority
== DRM_SCHED_PRIORITY_HIGH_HW
;
4208 if (ring
->funcs
->type
!= AMDGPU_RING_TYPE_COMPUTE
)
4211 gfx_v9_0_hqd_set_priority(adev
, ring
, acquire
);
4212 gfx_v9_0_pipe_reserve_resources(adev
, ring
, acquire
);
4215 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
4217 struct amdgpu_device
*adev
= ring
->adev
;
4219 /* XXX check if swapping is necessary on BE */
4220 if (ring
->use_doorbell
) {
4221 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
4222 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
4224 BUG(); /* only DOORBELL method supported on gfx9 now */
4228 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring
*ring
, u64 addr
,
4229 u64 seq
, unsigned int flags
)
4231 struct amdgpu_device
*adev
= ring
->adev
;
4233 /* we only allocate 32bit for each seq wb address */
4234 BUG_ON(flags
& AMDGPU_FENCE_FLAG_64BIT
);
4236 /* write fence seq to the "addr" */
4237 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4238 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4239 WRITE_DATA_DST_SEL(5) | WR_CONFIRM
));
4240 amdgpu_ring_write(ring
, lower_32_bits(addr
));
4241 amdgpu_ring_write(ring
, upper_32_bits(addr
));
4242 amdgpu_ring_write(ring
, lower_32_bits(seq
));
4244 if (flags
& AMDGPU_FENCE_FLAG_INT
) {
4245 /* set register to trigger INT */
4246 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4247 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4248 WRITE_DATA_DST_SEL(0) | WR_CONFIRM
));
4249 amdgpu_ring_write(ring
, SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_STATUS
));
4250 amdgpu_ring_write(ring
, 0);
4251 amdgpu_ring_write(ring
, 0x20000000); /* src_id is 178 */
4255 static void gfx_v9_ring_emit_sb(struct amdgpu_ring
*ring
)
4257 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
4258 amdgpu_ring_write(ring
, 0);
4261 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
)
4263 struct v9_ce_ib_state ce_payload
= {0};
4267 cnt
= (sizeof(ce_payload
) >> 2) + 4 - 2;
4268 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4270 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4271 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
4272 WRITE_DATA_DST_SEL(8) |
4274 WRITE_DATA_CACHE_POLICY(0));
4275 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
4276 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
4277 amdgpu_ring_write_multiple(ring
, (void *)&ce_payload
, sizeof(ce_payload
) >> 2);
4280 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring
*ring
)
4282 struct v9_de_ib_state de_payload
= {0};
4283 uint64_t csa_addr
, gds_addr
;
4286 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4287 gds_addr
= csa_addr
+ 4096;
4288 de_payload
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
4289 de_payload
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
4291 cnt
= (sizeof(de_payload
) >> 2) + 4 - 2;
4292 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4293 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(1) |
4294 WRITE_DATA_DST_SEL(8) |
4296 WRITE_DATA_CACHE_POLICY(0));
4297 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
4298 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
4299 amdgpu_ring_write_multiple(ring
, (void *)&de_payload
, sizeof(de_payload
) >> 2);
4302 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring
*ring
, bool start
)
4304 amdgpu_ring_write(ring
, PACKET3(PACKET3_FRAME_CONTROL
, 0));
4305 amdgpu_ring_write(ring
, FRAME_CMD(start
? 0 : 1)); /* frame_end */
4308 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring
*ring
, uint32_t flags
)
4312 if (amdgpu_sriov_vf(ring
->adev
))
4313 gfx_v9_0_ring_emit_ce_meta(ring
);
4315 gfx_v9_0_ring_emit_tmz(ring
, true);
4317 dw2
|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4318 if (flags
& AMDGPU_HAVE_CTX_SWITCH
) {
4319 /* set load_global_config & load_global_uconfig */
4321 /* set load_cs_sh_regs */
4323 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4326 /* set load_ce_ram if preamble presented */
4327 if (AMDGPU_PREAMBLE_IB_PRESENT
& flags
)
4330 /* still load_ce_ram if this is the first time preamble presented
4331 * although there is no context switch happens.
4333 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST
& flags
)
4337 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
4338 amdgpu_ring_write(ring
, dw2
);
4339 amdgpu_ring_write(ring
, 0);
4342 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring
*ring
)
4345 amdgpu_ring_write(ring
, PACKET3(PACKET3_COND_EXEC
, 3));
4346 amdgpu_ring_write(ring
, lower_32_bits(ring
->cond_exe_gpu_addr
));
4347 amdgpu_ring_write(ring
, upper_32_bits(ring
->cond_exe_gpu_addr
));
4348 amdgpu_ring_write(ring
, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4349 ret
= ring
->wptr
& ring
->buf_mask
;
4350 amdgpu_ring_write(ring
, 0x55aa55aa); /* patch dummy value later */
4354 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring
*ring
, unsigned offset
)
4357 BUG_ON(offset
> ring
->buf_mask
);
4358 BUG_ON(ring
->ring
[offset
] != 0x55aa55aa);
4360 cur
= (ring
->wptr
& ring
->buf_mask
) - 1;
4361 if (likely(cur
> offset
))
4362 ring
->ring
[offset
] = cur
- offset
;
4364 ring
->ring
[offset
] = (ring
->ring_size
>>2) - offset
+ cur
;
4367 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring
*ring
, uint32_t reg
)
4369 struct amdgpu_device
*adev
= ring
->adev
;
4371 amdgpu_ring_write(ring
, PACKET3(PACKET3_COPY_DATA
, 4));
4372 amdgpu_ring_write(ring
, 0 | /* src: register*/
4373 (5 << 8) | /* dst: memory */
4374 (1 << 20)); /* write confirm */
4375 amdgpu_ring_write(ring
, reg
);
4376 amdgpu_ring_write(ring
, 0);
4377 amdgpu_ring_write(ring
, lower_32_bits(adev
->wb
.gpu_addr
+
4378 adev
->virt
.reg_val_offs
* 4));
4379 amdgpu_ring_write(ring
, upper_32_bits(adev
->wb
.gpu_addr
+
4380 adev
->virt
.reg_val_offs
* 4));
4383 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring
*ring
, uint32_t reg
,
4388 switch (ring
->funcs
->type
) {
4389 case AMDGPU_RING_TYPE_GFX
:
4390 cmd
= WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM
;
4392 case AMDGPU_RING_TYPE_KIQ
:
4393 cmd
= (1 << 16); /* no inc addr */
4399 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4400 amdgpu_ring_write(ring
, cmd
);
4401 amdgpu_ring_write(ring
, reg
);
4402 amdgpu_ring_write(ring
, 0);
4403 amdgpu_ring_write(ring
, val
);
4406 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring
*ring
, uint32_t reg
,
4407 uint32_t val
, uint32_t mask
)
4409 gfx_v9_0_wait_reg_mem(ring
, 0, 0, 0, reg
, 0, val
, mask
, 0x20);
4412 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring
*ring
,
4413 uint32_t reg0
, uint32_t reg1
,
4414 uint32_t ref
, uint32_t mask
)
4416 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
4417 struct amdgpu_device
*adev
= ring
->adev
;
4418 bool fw_version_ok
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
) ?
4419 adev
->gfx
.me_fw_write_wait
: adev
->gfx
.mec_fw_write_wait
;
4422 gfx_v9_0_wait_reg_mem(ring
, usepfp
, 0, 1, reg0
, reg1
,
4425 amdgpu_ring_emit_reg_write_reg_wait_helper(ring
, reg0
, reg1
,
4429 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring
*ring
, unsigned vmid
)
4431 struct amdgpu_device
*adev
= ring
->adev
;
4434 value
= REG_SET_FIELD(value
, SQ_CMD
, CMD
, 0x03);
4435 value
= REG_SET_FIELD(value
, SQ_CMD
, MODE
, 0x01);
4436 value
= REG_SET_FIELD(value
, SQ_CMD
, CHECK_VMID
, 1);
4437 value
= REG_SET_FIELD(value
, SQ_CMD
, VM_ID
, vmid
);
4438 WREG32(mmSQ_CMD
, value
);
4441 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
4442 enum amdgpu_interrupt_state state
)
4445 case AMDGPU_IRQ_STATE_DISABLE
:
4446 case AMDGPU_IRQ_STATE_ENABLE
:
4447 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
4448 TIME_STAMP_INT_ENABLE
,
4449 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
4456 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
4458 enum amdgpu_interrupt_state state
)
4460 u32 mec_int_cntl
, mec_int_cntl_reg
;
4463 * amdgpu controls only the first MEC. That's why this function only
4464 * handles the setting of interrupts for this specific MEC. All other
4465 * pipes' interrupts are set by amdkfd.
4471 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
4474 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE1_INT_CNTL
);
4477 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE2_INT_CNTL
);
4480 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE3_INT_CNTL
);
4483 DRM_DEBUG("invalid pipe %d\n", pipe
);
4487 DRM_DEBUG("invalid me %d\n", me
);
4492 case AMDGPU_IRQ_STATE_DISABLE
:
4493 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
4494 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
4495 TIME_STAMP_INT_ENABLE
, 0);
4496 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
4498 case AMDGPU_IRQ_STATE_ENABLE
:
4499 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
4500 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
4501 TIME_STAMP_INT_ENABLE
, 1);
4502 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
4509 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
4510 struct amdgpu_irq_src
*source
,
4512 enum amdgpu_interrupt_state state
)
4515 case AMDGPU_IRQ_STATE_DISABLE
:
4516 case AMDGPU_IRQ_STATE_ENABLE
:
4517 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
4518 PRIV_REG_INT_ENABLE
,
4519 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
4528 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
4529 struct amdgpu_irq_src
*source
,
4531 enum amdgpu_interrupt_state state
)
4534 case AMDGPU_IRQ_STATE_DISABLE
:
4535 case AMDGPU_IRQ_STATE_ENABLE
:
4536 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
4537 PRIV_INSTR_INT_ENABLE
,
4538 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
4546 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
4547 struct amdgpu_irq_src
*src
,
4549 enum amdgpu_interrupt_state state
)
4552 case AMDGPU_CP_IRQ_GFX_EOP
:
4553 gfx_v9_0_set_gfx_eop_interrupt_state(adev
, state
);
4555 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
4556 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
4558 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
4559 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
4561 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
4562 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
4564 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
4565 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
4567 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
4568 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
4570 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
4571 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
4573 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
4574 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
4576 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
4577 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
4585 static int gfx_v9_0_eop_irq(struct amdgpu_device
*adev
,
4586 struct amdgpu_irq_src
*source
,
4587 struct amdgpu_iv_entry
*entry
)
4590 u8 me_id
, pipe_id
, queue_id
;
4591 struct amdgpu_ring
*ring
;
4593 DRM_DEBUG("IH: CP EOP\n");
4594 me_id
= (entry
->ring_id
& 0x0c) >> 2;
4595 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
4596 queue_id
= (entry
->ring_id
& 0x70) >> 4;
4600 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
4604 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4605 ring
= &adev
->gfx
.compute_ring
[i
];
4606 /* Per-queue interrupt is supported for MEC starting from VI.
4607 * The interrupt can only be enabled/disabled per pipe instead of per queue.
4609 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
4610 amdgpu_fence_process(ring
);
4617 static void gfx_v9_0_fault(struct amdgpu_device
*adev
,
4618 struct amdgpu_iv_entry
*entry
)
4620 u8 me_id
, pipe_id
, queue_id
;
4621 struct amdgpu_ring
*ring
;
4624 me_id
= (entry
->ring_id
& 0x0c) >> 2;
4625 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
4626 queue_id
= (entry
->ring_id
& 0x70) >> 4;
4630 drm_sched_fault(&adev
->gfx
.gfx_ring
[0].sched
);
4634 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4635 ring
= &adev
->gfx
.compute_ring
[i
];
4636 if (ring
->me
== me_id
&& ring
->pipe
== pipe_id
&&
4637 ring
->queue
== queue_id
)
4638 drm_sched_fault(&ring
->sched
);
4644 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device
*adev
,
4645 struct amdgpu_irq_src
*source
,
4646 struct amdgpu_iv_entry
*entry
)
4648 DRM_ERROR("Illegal register access in command stream\n");
4649 gfx_v9_0_fault(adev
, entry
);
4653 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device
*adev
,
4654 struct amdgpu_irq_src
*source
,
4655 struct amdgpu_iv_entry
*entry
)
4657 DRM_ERROR("Illegal instruction in command stream\n");
4658 gfx_v9_0_fault(adev
, entry
);
4662 static const struct amd_ip_funcs gfx_v9_0_ip_funcs
= {
4664 .early_init
= gfx_v9_0_early_init
,
4665 .late_init
= gfx_v9_0_late_init
,
4666 .sw_init
= gfx_v9_0_sw_init
,
4667 .sw_fini
= gfx_v9_0_sw_fini
,
4668 .hw_init
= gfx_v9_0_hw_init
,
4669 .hw_fini
= gfx_v9_0_hw_fini
,
4670 .suspend
= gfx_v9_0_suspend
,
4671 .resume
= gfx_v9_0_resume
,
4672 .is_idle
= gfx_v9_0_is_idle
,
4673 .wait_for_idle
= gfx_v9_0_wait_for_idle
,
4674 .soft_reset
= gfx_v9_0_soft_reset
,
4675 .set_clockgating_state
= gfx_v9_0_set_clockgating_state
,
4676 .set_powergating_state
= gfx_v9_0_set_powergating_state
,
4677 .get_clockgating_state
= gfx_v9_0_get_clockgating_state
,
4680 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx
= {
4681 .type
= AMDGPU_RING_TYPE_GFX
,
4683 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
4684 .support_64bit_ptrs
= true,
4685 .vmhub
= AMDGPU_GFXHUB
,
4686 .get_rptr
= gfx_v9_0_ring_get_rptr_gfx
,
4687 .get_wptr
= gfx_v9_0_ring_get_wptr_gfx
,
4688 .set_wptr
= gfx_v9_0_ring_set_wptr_gfx
,
4689 .emit_frame_size
= /* totally 242 maximum if 16 IBs */
4691 7 + /* PIPELINE_SYNC */
4692 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
4693 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
4695 8 + /* FENCE for VM_FLUSH */
4696 20 + /* GDS switch */
4697 4 + /* double SWITCH_BUFFER,
4698 the first COND_EXEC jump to the place just
4699 prior to this double SWITCH_BUFFER */
4707 8 + 8 + /* FENCE x2 */
4708 2, /* SWITCH_BUFFER */
4709 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_gfx */
4710 .emit_ib
= gfx_v9_0_ring_emit_ib_gfx
,
4711 .emit_fence
= gfx_v9_0_ring_emit_fence
,
4712 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
4713 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
4714 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
4715 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
4716 .test_ring
= gfx_v9_0_ring_test_ring
,
4717 .test_ib
= gfx_v9_0_ring_test_ib
,
4718 .insert_nop
= amdgpu_ring_insert_nop
,
4719 .pad_ib
= amdgpu_ring_generic_pad_ib
,
4720 .emit_switch_buffer
= gfx_v9_ring_emit_sb
,
4721 .emit_cntxcntl
= gfx_v9_ring_emit_cntxcntl
,
4722 .init_cond_exec
= gfx_v9_0_ring_emit_init_cond_exec
,
4723 .patch_cond_exec
= gfx_v9_0_ring_emit_patch_cond_exec
,
4724 .emit_tmz
= gfx_v9_0_ring_emit_tmz
,
4725 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
4726 .emit_reg_wait
= gfx_v9_0_ring_emit_reg_wait
,
4727 .emit_reg_write_reg_wait
= gfx_v9_0_ring_emit_reg_write_reg_wait
,
4728 .soft_recovery
= gfx_v9_0_ring_soft_recovery
,
4731 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute
= {
4732 .type
= AMDGPU_RING_TYPE_COMPUTE
,
4734 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
4735 .support_64bit_ptrs
= true,
4736 .vmhub
= AMDGPU_GFXHUB
,
4737 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
4738 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
4739 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
4741 20 + /* gfx_v9_0_ring_emit_gds_switch */
4742 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4743 5 + /* hdp invalidate */
4744 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4745 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
4746 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
4747 2 + /* gfx_v9_0_ring_emit_vm_flush */
4748 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
4749 .emit_ib_size
= 7, /* gfx_v9_0_ring_emit_ib_compute */
4750 .emit_ib
= gfx_v9_0_ring_emit_ib_compute
,
4751 .emit_fence
= gfx_v9_0_ring_emit_fence
,
4752 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
4753 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
4754 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
4755 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
4756 .test_ring
= gfx_v9_0_ring_test_ring
,
4757 .test_ib
= gfx_v9_0_ring_test_ib
,
4758 .insert_nop
= amdgpu_ring_insert_nop
,
4759 .pad_ib
= amdgpu_ring_generic_pad_ib
,
4760 .set_priority
= gfx_v9_0_ring_set_priority_compute
,
4761 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
4762 .emit_reg_wait
= gfx_v9_0_ring_emit_reg_wait
,
4763 .emit_reg_write_reg_wait
= gfx_v9_0_ring_emit_reg_write_reg_wait
,
4766 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq
= {
4767 .type
= AMDGPU_RING_TYPE_KIQ
,
4769 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
4770 .support_64bit_ptrs
= true,
4771 .vmhub
= AMDGPU_GFXHUB
,
4772 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
4773 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
4774 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
4776 20 + /* gfx_v9_0_ring_emit_gds_switch */
4777 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4778 5 + /* hdp invalidate */
4779 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4780 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
4781 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
4782 2 + /* gfx_v9_0_ring_emit_vm_flush */
4783 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4784 .emit_ib_size
= 7, /* gfx_v9_0_ring_emit_ib_compute */
4785 .emit_fence
= gfx_v9_0_ring_emit_fence_kiq
,
4786 .test_ring
= gfx_v9_0_ring_test_ring
,
4787 .insert_nop
= amdgpu_ring_insert_nop
,
4788 .pad_ib
= amdgpu_ring_generic_pad_ib
,
4789 .emit_rreg
= gfx_v9_0_ring_emit_rreg
,
4790 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
4791 .emit_reg_wait
= gfx_v9_0_ring_emit_reg_wait
,
4792 .emit_reg_write_reg_wait
= gfx_v9_0_ring_emit_reg_write_reg_wait
,
4795 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
)
4799 adev
->gfx
.kiq
.ring
.funcs
= &gfx_v9_0_ring_funcs_kiq
;
4801 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
4802 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_gfx
;
4804 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
4805 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_compute
;
4808 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs
= {
4809 .set
= gfx_v9_0_set_eop_interrupt_state
,
4810 .process
= gfx_v9_0_eop_irq
,
4813 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs
= {
4814 .set
= gfx_v9_0_set_priv_reg_fault_state
,
4815 .process
= gfx_v9_0_priv_reg_irq
,
4818 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs
= {
4819 .set
= gfx_v9_0_set_priv_inst_fault_state
,
4820 .process
= gfx_v9_0_priv_inst_irq
,
4823 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
)
4825 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
4826 adev
->gfx
.eop_irq
.funcs
= &gfx_v9_0_eop_irq_funcs
;
4828 adev
->gfx
.priv_reg_irq
.num_types
= 1;
4829 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v9_0_priv_reg_irq_funcs
;
4831 adev
->gfx
.priv_inst_irq
.num_types
= 1;
4832 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v9_0_priv_inst_irq_funcs
;
4835 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
)
4837 switch (adev
->asic_type
) {
4842 adev
->gfx
.rlc
.funcs
= &gfx_v9_0_rlc_funcs
;
4849 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
)
4851 /* init asci gds info */
4852 switch (adev
->asic_type
) {
4856 adev
->gds
.mem
.total_size
= 0x10000;
4859 adev
->gds
.mem
.total_size
= 0x1000;
4862 adev
->gds
.mem
.total_size
= 0x10000;
4866 switch (adev
->asic_type
) {
4869 adev
->gds
.gds_compute_max_wave_id
= 0x7ff;
4872 adev
->gds
.gds_compute_max_wave_id
= 0x27f;
4875 if (adev
->rev_id
>= 0x8)
4876 adev
->gds
.gds_compute_max_wave_id
= 0x77; /* raven2 */
4878 adev
->gds
.gds_compute_max_wave_id
= 0x15f; /* raven1 */
4881 /* this really depends on the chip */
4882 adev
->gds
.gds_compute_max_wave_id
= 0x7ff;
4886 adev
->gds
.gws
.total_size
= 64;
4887 adev
->gds
.oa
.total_size
= 16;
4889 if (adev
->gds
.mem
.total_size
== 64 * 1024) {
4890 adev
->gds
.mem
.gfx_partition_size
= 4096;
4891 adev
->gds
.mem
.cs_partition_size
= 4096;
4893 adev
->gds
.gws
.gfx_partition_size
= 4;
4894 adev
->gds
.gws
.cs_partition_size
= 4;
4896 adev
->gds
.oa
.gfx_partition_size
= 4;
4897 adev
->gds
.oa
.cs_partition_size
= 1;
4899 adev
->gds
.mem
.gfx_partition_size
= 1024;
4900 adev
->gds
.mem
.cs_partition_size
= 1024;
4902 adev
->gds
.gws
.gfx_partition_size
= 16;
4903 adev
->gds
.gws
.cs_partition_size
= 16;
4905 adev
->gds
.oa
.gfx_partition_size
= 4;
4906 adev
->gds
.oa
.cs_partition_size
= 4;
4910 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device
*adev
,
4918 data
= bitmap
<< GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT
;
4919 data
&= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK
;
4921 WREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
, data
);
4924 static u32
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device
*adev
)
4928 data
= RREG32_SOC15(GC
, 0, mmCC_GC_SHADER_ARRAY_CONFIG
);
4929 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
);
4931 data
&= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK
;
4932 data
>>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT
;
4934 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_cu_per_sh
);
4936 return (~data
) & mask
;
4939 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
4940 struct amdgpu_cu_info
*cu_info
)
4942 int i
, j
, k
, counter
, active_cu_number
= 0;
4943 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
4944 unsigned disable_masks
[4 * 2];
4946 if (!adev
|| !cu_info
)
4949 amdgpu_gfx_parse_disable_cu(disable_masks
, 4, 2);
4951 mutex_lock(&adev
->grbm_idx_mutex
);
4952 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
4953 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
4957 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
4959 gfx_v9_0_set_user_cu_inactive_bitmap(
4960 adev
, disable_masks
[i
* 2 + j
]);
4961 bitmap
= gfx_v9_0_get_cu_active_bitmap(adev
);
4962 cu_info
->bitmap
[i
][j
] = bitmap
;
4964 for (k
= 0; k
< adev
->gfx
.config
.max_cu_per_sh
; k
++) {
4965 if (bitmap
& mask
) {
4966 if (counter
< adev
->gfx
.config
.max_cu_per_sh
)
4972 active_cu_number
+= counter
;
4974 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
4975 cu_info
->ao_cu_bitmap
[i
][j
] = ao_bitmap
;
4978 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
4979 mutex_unlock(&adev
->grbm_idx_mutex
);
4981 cu_info
->number
= active_cu_number
;
4982 cu_info
->ao_cu_mask
= ao_cu_mask
;
4983 cu_info
->simd_per_cu
= NUM_SIMD_PER_CU
;
4988 const struct amdgpu_ip_block_version gfx_v9_0_ip_block
=
4990 .type
= AMD_IP_BLOCK_TYPE_GFX
,
4994 .funcs
= &gfx_v9_0_ip_funcs
,