2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
30 #include "amdgpu_atomfirmware.h"
32 #include "gc/gc_9_0_offset.h"
33 #include "gc/gc_9_0_sh_mask.h"
34 #include "vega10_enum.h"
35 #include "hdp/hdp_4_0_offset.h"
37 #include "soc15_common.h"
38 #include "clearstate_gfx9.h"
39 #include "v9_structs.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
43 #define GFX9_NUM_GFX_RINGS 1
44 #define GFX9_MEC_HPD_SIZE 2048
45 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
46 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
48 #define mmPWR_MISC_CNTL_STATUS 0x0183
49 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
50 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
51 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
52 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
53 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
55 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
56 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
57 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
58 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
59 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
60 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
62 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
63 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
65 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
66 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
67 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
69 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
77 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/raven_me.bin");
79 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
80 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
83 static const struct soc15_reg_golden golden_settings_gc_9_0
[] =
85 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xf00fffff, 0x00000400),
86 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0x80000000, 0x80000000),
87 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_GPU_ID
, 0x0000000f, 0x00000000),
88 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
, 0x00000003, 0x82400024),
89 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x00000001),
90 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
91 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSH_MEM_CONFIG
, 0x00001000, 0x00001000),
92 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_0
, 0x0007ffff, 0x00000800),
93 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_1
, 0x0007ffff, 0x00000800),
94 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0x01ffffff, 0x0000ff87),
95 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0x01ffffff, 0x0000ff8f),
96 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQC_CONFIG
, 0x03000000, 0x020a2000),
97 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfffffeef, 0x010b0000),
98 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x4a2c0e68),
99 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0xb5d3f197),
100 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_CACHE_INVALIDATION
, 0x3fff3af3, 0x19200000),
101 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_GS_MAX_WAVE_ID
, 0x00000fff, 0x000003ff)
104 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10
[] =
106 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL
, 0x0000f000, 0x00012107),
107 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_3
, 0x30000000, 0x10000000),
108 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPC_UTCL1_CNTL
, 0x08000000, 0x08000080),
109 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPF_UTCL1_CNTL
, 0x08000000, 0x08000080),
110 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPG_UTCL1_CNTL
, 0x08000000, 0x08000080),
111 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xffff77ff, 0x2a114042),
112 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xffff77ff, 0x2a114042),
113 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmIA_UTCL1_CNTL
, 0x08000000, 0x08000080),
114 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0x00008000, 0x00048000),
115 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_0
, 0x08000000, 0x08000080),
116 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_1
, 0x08000000, 0x08000080),
117 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_2
, 0x08000000, 0x08000080),
118 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_PREWALKER_UTCL1_CNTL
, 0x08000000, 0x08000080),
119 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_SPM_UTCL1_CNTL
, 0x08000000, 0x08000080),
120 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_UTCL1_CNTL2
, 0x00030000, 0x00020000),
121 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0x0000000f, 0x01000107),
122 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x00001800, 0x00000800),
123 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmWD_UTCL1_CNTL
, 0x08000000, 0x08000080)
126 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20
[] =
128 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_DCC_CONFIG
, 0x0f000080, 0x04000080),
129 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_2
, 0x0f000000, 0x0a000000),
130 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_3
, 0x30000000, 0x10000000),
131 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xf3e777ff, 0x22014042),
132 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xf3e777ff, 0x22014042),
133 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0x00003e00, 0x00000400),
134 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xff840000, 0x04040000),
135 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_UTCL1_CNTL2
, 0x00030000, 0x00030000),
136 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0xffff010f, 0x01000107),
137 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0x000b0000, 0x000b0000),
138 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x01000000, 0x01000000)
141 static const struct soc15_reg_golden golden_settings_gc_9_1
[] =
143 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL
, 0xfffdf3cf, 0x00014104),
144 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPC_UTCL1_CNTL
, 0x08000000, 0x08000080),
145 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPF_UTCL1_CNTL
, 0x08000000, 0x08000080),
146 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCPG_UTCL1_CNTL
, 0x08000000, 0x08000080),
147 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xf00fffff, 0x00000420),
148 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_GPU_ID
, 0x0000000f, 0x00000000),
149 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmIA_UTCL1_CNTL
, 0x08000000, 0x08000080),
150 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
, 0x00000003, 0x82400024),
151 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x00000001),
152 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
153 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_0
, 0x08000000, 0x08000080),
154 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_1
, 0x08000000, 0x08000080),
155 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_GPM_UTCL1_CNTL_2
, 0x08000000, 0x08000080),
156 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_PREWALKER_UTCL1_CNTL
, 0x08000000, 0x08000080),
157 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRLC_SPM_UTCL1_CNTL
, 0x08000000, 0x08000080),
158 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfffffeef, 0x010b0000),
159 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000),
160 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x00003120),
161 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_CACHE_INVALIDATION
, 0x3fff3af3, 0x19200000),
162 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_GS_MAX_WAVE_ID
, 0x00000fff, 0x000000ff),
163 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmWD_UTCL1_CNTL
, 0x08000000, 0x08000080)
166 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1
[] =
168 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_3
, 0x30000000, 0x10000000),
169 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xffff77ff, 0x24000042),
170 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xffff77ff, 0x24000042),
171 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xffffffff, 0x04048000),
172 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_MODE_CNTL_1
, 0x06000000, 0x06000000),
173 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_UTCL1_CNTL2
, 0x00030000, 0x00020000),
174 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x01bd9f33, 0x00000800)
177 static const struct soc15_reg_golden golden_settings_gc_9_x_common
[] =
179 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGRBM_CAM_INDEX
, 0xffffffff, 0x00000000),
180 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGRBM_CAM_DATA
, 0xffffffff, 0x2544c382)
183 static const struct soc15_reg_golden golden_settings_gc_9_2_1
[] =
185 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xf00fffff, 0x00000420),
186 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_GPU_ID
, 0x0000000f, 0x00000000),
187 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
, 0x00000003, 0x82400024),
188 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x00000001),
189 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
190 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSH_MEM_CONFIG
, 0x00001000, 0x00001000),
191 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_0
, 0x0007ffff, 0x00000800),
192 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_CU_1
, 0x0007ffff, 0x00000800),
193 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0
, 0x01ffffff, 0x0000ff87),
194 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1
, 0x01ffffff, 0x0000ff8f),
195 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQC_CONFIG
, 0x03000000, 0x020a2000),
196 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfffffeef, 0x010b0000),
197 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x4a2c0e68),
198 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0xb5d3f197),
199 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_CACHE_INVALIDATION
, 0x3fff3af3, 0x19200000),
200 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmVGT_GS_MAX_WAVE_ID
, 0x00000fff, 0x000003ff)
203 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12
[] =
205 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_DCC_CONFIG
, 0x00000080, 0x04000080),
206 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL
, 0xfffdf3cf, 0x00014104),
207 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_2
, 0x0f000000, 0x0a000000),
208 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG
, 0xffff77ff, 0x24104041),
209 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGB_ADDR_CONFIG_READ
, 0xffff77ff, 0x24104041),
210 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xffffffff, 0x04040000),
211 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0xffff03ff, 0x01000107),
212 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_HI
, 0xffffffff, 0x00000000),
213 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CHAN_STEER_LO
, 0xffffffff, 0x76325410),
214 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTD_CNTL
, 0x01bd9f33, 0x01000000)
217 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS
[] =
219 mmRLC_SRM_INDEX_CNTL_ADDR_0
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
220 mmRLC_SRM_INDEX_CNTL_ADDR_1
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
221 mmRLC_SRM_INDEX_CNTL_ADDR_2
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
222 mmRLC_SRM_INDEX_CNTL_ADDR_3
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
223 mmRLC_SRM_INDEX_CNTL_ADDR_4
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
224 mmRLC_SRM_INDEX_CNTL_ADDR_5
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
225 mmRLC_SRM_INDEX_CNTL_ADDR_6
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
226 mmRLC_SRM_INDEX_CNTL_ADDR_7
- mmRLC_SRM_INDEX_CNTL_ADDR_0
,
229 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS
[] =
231 mmRLC_SRM_INDEX_CNTL_DATA_0
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
232 mmRLC_SRM_INDEX_CNTL_DATA_1
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
233 mmRLC_SRM_INDEX_CNTL_DATA_2
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
234 mmRLC_SRM_INDEX_CNTL_DATA_3
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
235 mmRLC_SRM_INDEX_CNTL_DATA_4
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
236 mmRLC_SRM_INDEX_CNTL_DATA_5
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
237 mmRLC_SRM_INDEX_CNTL_DATA_6
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
238 mmRLC_SRM_INDEX_CNTL_DATA_7
- mmRLC_SRM_INDEX_CNTL_DATA_0
,
241 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
242 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
243 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
245 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
);
246 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
);
247 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
);
248 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
);
249 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
250 struct amdgpu_cu_info
*cu_info
);
251 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
);
252 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
);
253 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring
*ring
);
255 static void gfx_v9_0_init_golden_registers(struct amdgpu_device
*adev
)
257 switch (adev
->asic_type
) {
259 soc15_program_register_sequence(adev
,
260 golden_settings_gc_9_0
,
261 ARRAY_SIZE(golden_settings_gc_9_0
));
262 soc15_program_register_sequence(adev
,
263 golden_settings_gc_9_0_vg10
,
264 ARRAY_SIZE(golden_settings_gc_9_0_vg10
));
267 soc15_program_register_sequence(adev
,
268 golden_settings_gc_9_2_1
,
269 ARRAY_SIZE(golden_settings_gc_9_2_1
));
270 soc15_program_register_sequence(adev
,
271 golden_settings_gc_9_2_1_vg12
,
272 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12
));
275 soc15_program_register_sequence(adev
,
276 golden_settings_gc_9_0
,
277 ARRAY_SIZE(golden_settings_gc_9_0
));
278 soc15_program_register_sequence(adev
,
279 golden_settings_gc_9_0_vg20
,
280 ARRAY_SIZE(golden_settings_gc_9_0_vg20
));
283 soc15_program_register_sequence(adev
,
284 golden_settings_gc_9_1
,
285 ARRAY_SIZE(golden_settings_gc_9_1
));
286 soc15_program_register_sequence(adev
,
287 golden_settings_gc_9_1_rv1
,
288 ARRAY_SIZE(golden_settings_gc_9_1_rv1
));
294 soc15_program_register_sequence(adev
, golden_settings_gc_9_x_common
,
295 (const u32
)ARRAY_SIZE(golden_settings_gc_9_x_common
));
298 static void gfx_v9_0_scratch_init(struct amdgpu_device
*adev
)
300 adev
->gfx
.scratch
.num_reg
= 8;
301 adev
->gfx
.scratch
.reg_base
= SOC15_REG_OFFSET(GC
, 0, mmSCRATCH_REG0
);
302 adev
->gfx
.scratch
.free_mask
= (1u << adev
->gfx
.scratch
.num_reg
) - 1;
305 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring
*ring
, int eng_sel
,
306 bool wc
, uint32_t reg
, uint32_t val
)
308 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
309 amdgpu_ring_write(ring
, WRITE_DATA_ENGINE_SEL(eng_sel
) |
310 WRITE_DATA_DST_SEL(0) |
311 (wc
? WR_CONFIRM
: 0));
312 amdgpu_ring_write(ring
, reg
);
313 amdgpu_ring_write(ring
, 0);
314 amdgpu_ring_write(ring
, val
);
317 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring
*ring
, int eng_sel
,
318 int mem_space
, int opt
, uint32_t addr0
,
319 uint32_t addr1
, uint32_t ref
, uint32_t mask
,
322 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
323 amdgpu_ring_write(ring
,
324 /* memory (1) or register (0) */
325 (WAIT_REG_MEM_MEM_SPACE(mem_space
) |
326 WAIT_REG_MEM_OPERATION(opt
) | /* wait */
327 WAIT_REG_MEM_FUNCTION(3) | /* equal */
328 WAIT_REG_MEM_ENGINE(eng_sel
)));
331 BUG_ON(addr0
& 0x3); /* Dword align */
332 amdgpu_ring_write(ring
, addr0
);
333 amdgpu_ring_write(ring
, addr1
);
334 amdgpu_ring_write(ring
, ref
);
335 amdgpu_ring_write(ring
, mask
);
336 amdgpu_ring_write(ring
, inv
); /* poll interval */
339 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring
*ring
)
341 struct amdgpu_device
*adev
= ring
->adev
;
347 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
349 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r
);
352 WREG32(scratch
, 0xCAFEDEAD);
353 r
= amdgpu_ring_alloc(ring
, 3);
355 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
357 amdgpu_gfx_scratch_free(adev
, scratch
);
360 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
361 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
362 amdgpu_ring_write(ring
, 0xDEADBEEF);
363 amdgpu_ring_commit(ring
);
365 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
366 tmp
= RREG32(scratch
);
367 if (tmp
== 0xDEADBEEF)
371 if (i
< adev
->usec_timeout
) {
372 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
375 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
376 ring
->idx
, scratch
, tmp
);
379 amdgpu_gfx_scratch_free(adev
, scratch
);
383 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
385 struct amdgpu_device
*adev
= ring
->adev
;
387 struct dma_fence
*f
= NULL
;
394 r
= amdgpu_device_wb_get(adev
, &index
);
396 dev_err(adev
->dev
, "(%ld) failed to allocate wb slot\n", r
);
400 gpu_addr
= adev
->wb
.gpu_addr
+ (index
* 4);
401 adev
->wb
.wb
[index
] = cpu_to_le32(0xCAFEDEAD);
402 memset(&ib
, 0, sizeof(ib
));
403 r
= amdgpu_ib_get(adev
, NULL
, 16, &ib
);
405 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r
);
408 ib
.ptr
[0] = PACKET3(PACKET3_WRITE_DATA
, 3);
409 ib
.ptr
[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM
;
410 ib
.ptr
[2] = lower_32_bits(gpu_addr
);
411 ib
.ptr
[3] = upper_32_bits(gpu_addr
);
412 ib
.ptr
[4] = 0xDEADBEEF;
415 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
419 r
= dma_fence_wait_timeout(f
, false, timeout
);
421 DRM_ERROR("amdgpu: IB test timed out.\n");
425 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
429 tmp
= adev
->wb
.wb
[index
];
430 if (tmp
== 0xDEADBEEF) {
431 DRM_DEBUG("ib test on ring %d succeeded\n", ring
->idx
);
434 DRM_ERROR("ib test on ring %d failed\n", ring
->idx
);
439 amdgpu_ib_free(adev
, &ib
, NULL
);
442 amdgpu_device_wb_free(adev
, index
);
447 static void gfx_v9_0_free_microcode(struct amdgpu_device
*adev
)
449 release_firmware(adev
->gfx
.pfp_fw
);
450 adev
->gfx
.pfp_fw
= NULL
;
451 release_firmware(adev
->gfx
.me_fw
);
452 adev
->gfx
.me_fw
= NULL
;
453 release_firmware(adev
->gfx
.ce_fw
);
454 adev
->gfx
.ce_fw
= NULL
;
455 release_firmware(adev
->gfx
.rlc_fw
);
456 adev
->gfx
.rlc_fw
= NULL
;
457 release_firmware(adev
->gfx
.mec_fw
);
458 adev
->gfx
.mec_fw
= NULL
;
459 release_firmware(adev
->gfx
.mec2_fw
);
460 adev
->gfx
.mec2_fw
= NULL
;
462 kfree(adev
->gfx
.rlc
.register_list_format
);
465 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device
*adev
)
467 const struct rlc_firmware_header_v2_1
*rlc_hdr
;
469 rlc_hdr
= (const struct rlc_firmware_header_v2_1
*)adev
->gfx
.rlc_fw
->data
;
470 adev
->gfx
.rlc_srlc_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_ucode_ver
);
471 adev
->gfx
.rlc_srlc_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_feature_ver
);
472 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_size_bytes
);
473 adev
->gfx
.rlc
.save_restore_list_cntl
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_cntl_offset_bytes
);
474 adev
->gfx
.rlc_srlg_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_ucode_ver
);
475 adev
->gfx
.rlc_srlg_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_feature_ver
);
476 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_size_bytes
);
477 adev
->gfx
.rlc
.save_restore_list_gpm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_gpm_offset_bytes
);
478 adev
->gfx
.rlc_srls_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_ucode_ver
);
479 adev
->gfx
.rlc_srls_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_feature_ver
);
480 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_size_bytes
);
481 adev
->gfx
.rlc
.save_restore_list_srm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_srm_offset_bytes
);
482 adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
=
483 le32_to_cpu(rlc_hdr
->reg_list_format_direct_reg_list_length
);
486 static int gfx_v9_0_init_microcode(struct amdgpu_device
*adev
)
488 const char *chip_name
;
491 struct amdgpu_firmware_info
*info
= NULL
;
492 const struct common_firmware_header
*header
= NULL
;
493 const struct gfx_firmware_header_v1_0
*cp_hdr
;
494 const struct rlc_firmware_header_v2_0
*rlc_hdr
;
495 unsigned int *tmp
= NULL
;
497 uint16_t version_major
;
498 uint16_t version_minor
;
502 switch (adev
->asic_type
) {
504 chip_name
= "vega10";
507 chip_name
= "vega12";
510 chip_name
= "vega20";
519 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp.bin", chip_name
);
520 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
523 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
526 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
527 adev
->gfx
.pfp_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
528 adev
->gfx
.pfp_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
530 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me.bin", chip_name
);
531 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
534 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
537 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
538 adev
->gfx
.me_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
539 adev
->gfx
.me_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
541 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce.bin", chip_name
);
542 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
545 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
548 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
549 adev
->gfx
.ce_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
550 adev
->gfx
.ce_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
552 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc.bin", chip_name
);
553 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
556 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
557 rlc_hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
559 version_major
= le16_to_cpu(rlc_hdr
->header
.header_version_major
);
560 version_minor
= le16_to_cpu(rlc_hdr
->header
.header_version_minor
);
561 if (version_major
== 2 && version_minor
== 1)
562 adev
->gfx
.rlc
.is_rlc_v2_1
= true;
564 adev
->gfx
.rlc_fw_version
= le32_to_cpu(rlc_hdr
->header
.ucode_version
);
565 adev
->gfx
.rlc_feature_version
= le32_to_cpu(rlc_hdr
->ucode_feature_version
);
566 adev
->gfx
.rlc
.save_and_restore_offset
=
567 le32_to_cpu(rlc_hdr
->save_and_restore_offset
);
568 adev
->gfx
.rlc
.clear_state_descriptor_offset
=
569 le32_to_cpu(rlc_hdr
->clear_state_descriptor_offset
);
570 adev
->gfx
.rlc
.avail_scratch_ram_locations
=
571 le32_to_cpu(rlc_hdr
->avail_scratch_ram_locations
);
572 adev
->gfx
.rlc
.reg_restore_list_size
=
573 le32_to_cpu(rlc_hdr
->reg_restore_list_size
);
574 adev
->gfx
.rlc
.reg_list_format_start
=
575 le32_to_cpu(rlc_hdr
->reg_list_format_start
);
576 adev
->gfx
.rlc
.reg_list_format_separate_start
=
577 le32_to_cpu(rlc_hdr
->reg_list_format_separate_start
);
578 adev
->gfx
.rlc
.starting_offsets_start
=
579 le32_to_cpu(rlc_hdr
->starting_offsets_start
);
580 adev
->gfx
.rlc
.reg_list_format_size_bytes
=
581 le32_to_cpu(rlc_hdr
->reg_list_format_size_bytes
);
582 adev
->gfx
.rlc
.reg_list_size_bytes
=
583 le32_to_cpu(rlc_hdr
->reg_list_size_bytes
);
584 adev
->gfx
.rlc
.register_list_format
=
585 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
+
586 adev
->gfx
.rlc
.reg_list_size_bytes
, GFP_KERNEL
);
587 if (!adev
->gfx
.rlc
.register_list_format
) {
592 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
593 le32_to_cpu(rlc_hdr
->reg_list_format_array_offset_bytes
));
594 for (i
= 0 ; i
< (rlc_hdr
->reg_list_format_size_bytes
>> 2); i
++)
595 adev
->gfx
.rlc
.register_list_format
[i
] = le32_to_cpu(tmp
[i
]);
597 adev
->gfx
.rlc
.register_restore
= adev
->gfx
.rlc
.register_list_format
+ i
;
599 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
600 le32_to_cpu(rlc_hdr
->reg_list_array_offset_bytes
));
601 for (i
= 0 ; i
< (rlc_hdr
->reg_list_size_bytes
>> 2); i
++)
602 adev
->gfx
.rlc
.register_restore
[i
] = le32_to_cpu(tmp
[i
]);
604 if (adev
->gfx
.rlc
.is_rlc_v2_1
)
605 gfx_v9_0_init_rlc_ext_microcode(adev
);
607 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec.bin", chip_name
);
608 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
611 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
614 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
615 adev
->gfx
.mec_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
616 adev
->gfx
.mec_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
619 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2.bin", chip_name
);
620 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
622 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
625 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
626 adev
->gfx
.mec2_fw
->data
;
627 adev
->gfx
.mec2_fw_version
=
628 le32_to_cpu(cp_hdr
->header
.ucode_version
);
629 adev
->gfx
.mec2_feature_version
=
630 le32_to_cpu(cp_hdr
->ucode_feature_version
);
633 adev
->gfx
.mec2_fw
= NULL
;
636 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
637 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
638 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
639 info
->fw
= adev
->gfx
.pfp_fw
;
640 header
= (const struct common_firmware_header
*)info
->fw
->data
;
641 adev
->firmware
.fw_size
+=
642 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
644 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
645 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
646 info
->fw
= adev
->gfx
.me_fw
;
647 header
= (const struct common_firmware_header
*)info
->fw
->data
;
648 adev
->firmware
.fw_size
+=
649 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
651 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
652 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
653 info
->fw
= adev
->gfx
.ce_fw
;
654 header
= (const struct common_firmware_header
*)info
->fw
->data
;
655 adev
->firmware
.fw_size
+=
656 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
658 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
659 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
660 info
->fw
= adev
->gfx
.rlc_fw
;
661 header
= (const struct common_firmware_header
*)info
->fw
->data
;
662 adev
->firmware
.fw_size
+=
663 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
665 if (adev
->gfx
.rlc
.is_rlc_v2_1
&&
666 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
&&
667 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
&&
668 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
) {
669 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
];
670 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
;
671 info
->fw
= adev
->gfx
.rlc_fw
;
672 adev
->firmware
.fw_size
+=
673 ALIGN(adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
, PAGE_SIZE
);
675 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
];
676 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
;
677 info
->fw
= adev
->gfx
.rlc_fw
;
678 adev
->firmware
.fw_size
+=
679 ALIGN(adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
, PAGE_SIZE
);
681 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
];
682 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
;
683 info
->fw
= adev
->gfx
.rlc_fw
;
684 adev
->firmware
.fw_size
+=
685 ALIGN(adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
, PAGE_SIZE
);
688 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
689 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
690 info
->fw
= adev
->gfx
.mec_fw
;
691 header
= (const struct common_firmware_header
*)info
->fw
->data
;
692 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
693 adev
->firmware
.fw_size
+=
694 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
696 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1_JT
];
697 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1_JT
;
698 info
->fw
= adev
->gfx
.mec_fw
;
699 adev
->firmware
.fw_size
+=
700 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
702 if (adev
->gfx
.mec2_fw
) {
703 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
704 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
705 info
->fw
= adev
->gfx
.mec2_fw
;
706 header
= (const struct common_firmware_header
*)info
->fw
->data
;
707 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
708 adev
->firmware
.fw_size
+=
709 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
710 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2_JT
];
711 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2_JT
;
712 info
->fw
= adev
->gfx
.mec2_fw
;
713 adev
->firmware
.fw_size
+=
714 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
722 "gfx9: Failed to load firmware \"%s\"\n",
724 release_firmware(adev
->gfx
.pfp_fw
);
725 adev
->gfx
.pfp_fw
= NULL
;
726 release_firmware(adev
->gfx
.me_fw
);
727 adev
->gfx
.me_fw
= NULL
;
728 release_firmware(adev
->gfx
.ce_fw
);
729 adev
->gfx
.ce_fw
= NULL
;
730 release_firmware(adev
->gfx
.rlc_fw
);
731 adev
->gfx
.rlc_fw
= NULL
;
732 release_firmware(adev
->gfx
.mec_fw
);
733 adev
->gfx
.mec_fw
= NULL
;
734 release_firmware(adev
->gfx
.mec2_fw
);
735 adev
->gfx
.mec2_fw
= NULL
;
740 static u32
gfx_v9_0_get_csb_size(struct amdgpu_device
*adev
)
743 const struct cs_section_def
*sect
= NULL
;
744 const struct cs_extent_def
*ext
= NULL
;
746 /* begin clear state */
748 /* context control state */
751 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
752 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
753 if (sect
->id
== SECT_CONTEXT
)
754 count
+= 2 + ext
->reg_count
;
760 /* end clear state */
768 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device
*adev
,
769 volatile u32
*buffer
)
772 const struct cs_section_def
*sect
= NULL
;
773 const struct cs_extent_def
*ext
= NULL
;
775 if (adev
->gfx
.rlc
.cs_data
== NULL
)
780 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
781 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
783 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
784 buffer
[count
++] = cpu_to_le32(0x80000000);
785 buffer
[count
++] = cpu_to_le32(0x80000000);
787 for (sect
= adev
->gfx
.rlc
.cs_data
; sect
->section
!= NULL
; ++sect
) {
788 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
789 if (sect
->id
== SECT_CONTEXT
) {
791 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, ext
->reg_count
));
792 buffer
[count
++] = cpu_to_le32(ext
->reg_index
-
793 PACKET3_SET_CONTEXT_REG_START
);
794 for (i
= 0; i
< ext
->reg_count
; i
++)
795 buffer
[count
++] = cpu_to_le32(ext
->extent
[i
]);
802 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
803 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE
);
805 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE
, 0));
806 buffer
[count
++] = cpu_to_le32(0);
809 static void gfx_v9_0_init_lbpw(struct amdgpu_device
*adev
)
813 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
814 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_1
, 0x0000007F);
815 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_2
, 0x0333A5A7);
816 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_3
, 0x00000077);
817 WREG32_SOC15(GC
, 0, mmRLC_LB_THR_CONFIG_4
, (0x30 | 0x40 << 8 | 0x02FA << 16));
819 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
820 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTR_INIT
, 0x00000000);
822 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
823 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTR_MAX
, 0x00000500);
825 mutex_lock(&adev
->grbm_idx_mutex
);
826 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
827 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
828 WREG32_SOC15(GC
, 0, mmRLC_LB_INIT_CU_MASK
, 0xffffffff);
830 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
831 data
= REG_SET_FIELD(0, RLC_LB_PARAMS
, FIFO_SAMPLES
, 0x0003);
832 data
|= REG_SET_FIELD(data
, RLC_LB_PARAMS
, PG_IDLE_SAMPLES
, 0x0010);
833 data
|= REG_SET_FIELD(data
, RLC_LB_PARAMS
, PG_IDLE_SAMPLE_INTERVAL
, 0x033F);
834 WREG32_SOC15(GC
, 0, mmRLC_LB_PARAMS
, data
);
836 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
837 data
= RREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_7
);
840 WREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_7
, data
);
842 /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */
843 WREG32_SOC15(GC
, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK
, 0xFFF);
845 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
846 * but used for RLC_LB_CNTL configuration */
847 data
= RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK
;
848 data
|= REG_SET_FIELD(data
, RLC_LB_CNTL
, CU_MASK_USED_OFF_HYST
, 0x09);
849 data
|= REG_SET_FIELD(data
, RLC_LB_CNTL
, RESERVED
, 0x80000);
850 WREG32_SOC15(GC
, 0, mmRLC_LB_CNTL
, data
);
851 mutex_unlock(&adev
->grbm_idx_mutex
);
854 static void gfx_v9_0_enable_lbpw(struct amdgpu_device
*adev
, bool enable
)
856 WREG32_FIELD15(GC
, 0, RLC_LB_CNTL
, LOAD_BALANCE_ENABLE
, enable
? 1 : 0);
859 static void rv_init_cp_jump_table(struct amdgpu_device
*adev
)
861 const __le32
*fw_data
;
862 volatile u32
*dst_ptr
;
863 int me
, i
, max_me
= 5;
865 u32 table_offset
, table_size
;
867 /* write the cp table buffer */
868 dst_ptr
= adev
->gfx
.rlc
.cp_table_ptr
;
869 for (me
= 0; me
< max_me
; me
++) {
871 const struct gfx_firmware_header_v1_0
*hdr
=
872 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
873 fw_data
= (const __le32
*)
874 (adev
->gfx
.ce_fw
->data
+
875 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
876 table_offset
= le32_to_cpu(hdr
->jt_offset
);
877 table_size
= le32_to_cpu(hdr
->jt_size
);
878 } else if (me
== 1) {
879 const struct gfx_firmware_header_v1_0
*hdr
=
880 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
881 fw_data
= (const __le32
*)
882 (adev
->gfx
.pfp_fw
->data
+
883 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
884 table_offset
= le32_to_cpu(hdr
->jt_offset
);
885 table_size
= le32_to_cpu(hdr
->jt_size
);
886 } else if (me
== 2) {
887 const struct gfx_firmware_header_v1_0
*hdr
=
888 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
889 fw_data
= (const __le32
*)
890 (adev
->gfx
.me_fw
->data
+
891 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
892 table_offset
= le32_to_cpu(hdr
->jt_offset
);
893 table_size
= le32_to_cpu(hdr
->jt_size
);
894 } else if (me
== 3) {
895 const struct gfx_firmware_header_v1_0
*hdr
=
896 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
897 fw_data
= (const __le32
*)
898 (adev
->gfx
.mec_fw
->data
+
899 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
900 table_offset
= le32_to_cpu(hdr
->jt_offset
);
901 table_size
= le32_to_cpu(hdr
->jt_size
);
902 } else if (me
== 4) {
903 const struct gfx_firmware_header_v1_0
*hdr
=
904 (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec2_fw
->data
;
905 fw_data
= (const __le32
*)
906 (adev
->gfx
.mec2_fw
->data
+
907 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
908 table_offset
= le32_to_cpu(hdr
->jt_offset
);
909 table_size
= le32_to_cpu(hdr
->jt_size
);
912 for (i
= 0; i
< table_size
; i
++) {
913 dst_ptr
[bo_offset
+ i
] =
914 cpu_to_le32(le32_to_cpu(fw_data
[table_offset
+ i
]));
917 bo_offset
+= table_size
;
921 static void gfx_v9_0_rlc_fini(struct amdgpu_device
*adev
)
923 /* clear state block */
924 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.clear_state_obj
,
925 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
926 (void **)&adev
->gfx
.rlc
.cs_ptr
);
928 /* jump table block */
929 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.cp_table_obj
,
930 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
931 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
934 static int gfx_v9_0_rlc_init(struct amdgpu_device
*adev
)
936 volatile u32
*dst_ptr
;
938 const struct cs_section_def
*cs_data
;
941 adev
->gfx
.rlc
.cs_data
= gfx9_cs_data
;
943 cs_data
= adev
->gfx
.rlc
.cs_data
;
946 /* clear state block */
947 adev
->gfx
.rlc
.clear_state_size
= dws
= gfx_v9_0_get_csb_size(adev
);
948 r
= amdgpu_bo_create_reserved(adev
, dws
* 4, PAGE_SIZE
,
949 AMDGPU_GEM_DOMAIN_VRAM
,
950 &adev
->gfx
.rlc
.clear_state_obj
,
951 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
952 (void **)&adev
->gfx
.rlc
.cs_ptr
);
954 dev_err(adev
->dev
, "(%d) failed to create rlc csb bo\n",
956 gfx_v9_0_rlc_fini(adev
);
959 /* set up the cs buffer */
960 dst_ptr
= adev
->gfx
.rlc
.cs_ptr
;
961 gfx_v9_0_get_csb_buffer(adev
, dst_ptr
);
962 amdgpu_bo_kunmap(adev
->gfx
.rlc
.clear_state_obj
);
963 amdgpu_bo_unpin(adev
->gfx
.rlc
.clear_state_obj
);
964 amdgpu_bo_unreserve(adev
->gfx
.rlc
.clear_state_obj
);
967 if (adev
->asic_type
== CHIP_RAVEN
) {
968 /* TODO: double check the cp_table_size for RV */
969 adev
->gfx
.rlc
.cp_table_size
= ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
970 r
= amdgpu_bo_create_reserved(adev
, adev
->gfx
.rlc
.cp_table_size
,
971 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_VRAM
,
972 &adev
->gfx
.rlc
.cp_table_obj
,
973 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
974 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
977 "(%d) failed to create cp table bo\n", r
);
978 gfx_v9_0_rlc_fini(adev
);
982 rv_init_cp_jump_table(adev
);
983 amdgpu_bo_kunmap(adev
->gfx
.rlc
.cp_table_obj
);
984 amdgpu_bo_unreserve(adev
->gfx
.rlc
.cp_table_obj
);
986 gfx_v9_0_init_lbpw(adev
);
992 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device
*adev
)
996 r
= amdgpu_bo_reserve(adev
->gfx
.rlc
.clear_state_obj
, false);
997 if (unlikely(r
!= 0))
1000 r
= amdgpu_bo_pin(adev
->gfx
.rlc
.clear_state_obj
,
1001 AMDGPU_GEM_DOMAIN_VRAM
);
1003 adev
->gfx
.rlc
.clear_state_gpu_addr
=
1004 amdgpu_bo_gpu_offset(adev
->gfx
.rlc
.clear_state_obj
);
1006 amdgpu_bo_unreserve(adev
->gfx
.rlc
.clear_state_obj
);
1011 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device
*adev
)
1015 if (!adev
->gfx
.rlc
.clear_state_obj
)
1018 r
= amdgpu_bo_reserve(adev
->gfx
.rlc
.clear_state_obj
, true);
1019 if (likely(r
== 0)) {
1020 amdgpu_bo_unpin(adev
->gfx
.rlc
.clear_state_obj
);
1021 amdgpu_bo_unreserve(adev
->gfx
.rlc
.clear_state_obj
);
1025 static void gfx_v9_0_mec_fini(struct amdgpu_device
*adev
)
1027 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.hpd_eop_obj
, NULL
, NULL
);
1028 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.mec_fw_obj
, NULL
, NULL
);
1031 static int gfx_v9_0_mec_init(struct amdgpu_device
*adev
)
1035 const __le32
*fw_data
;
1038 size_t mec_hpd_size
;
1040 const struct gfx_firmware_header_v1_0
*mec_hdr
;
1042 bitmap_zero(adev
->gfx
.mec
.queue_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
);
1044 /* take ownership of the relevant compute queues */
1045 amdgpu_gfx_compute_queue_acquire(adev
);
1046 mec_hpd_size
= adev
->gfx
.num_compute_rings
* GFX9_MEC_HPD_SIZE
;
1048 r
= amdgpu_bo_create_reserved(adev
, mec_hpd_size
, PAGE_SIZE
,
1049 AMDGPU_GEM_DOMAIN_GTT
,
1050 &adev
->gfx
.mec
.hpd_eop_obj
,
1051 &adev
->gfx
.mec
.hpd_eop_gpu_addr
,
1054 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
1055 gfx_v9_0_mec_fini(adev
);
1059 memset(hpd
, 0, adev
->gfx
.mec
.hpd_eop_obj
->tbo
.mem
.size
);
1061 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
1062 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
1064 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1066 fw_data
= (const __le32
*)
1067 (adev
->gfx
.mec_fw
->data
+
1068 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
1069 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
) / 4;
1071 r
= amdgpu_bo_create_reserved(adev
, mec_hdr
->header
.ucode_size_bytes
,
1072 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
1073 &adev
->gfx
.mec
.mec_fw_obj
,
1074 &adev
->gfx
.mec
.mec_fw_gpu_addr
,
1077 dev_warn(adev
->dev
, "(%d) create mec firmware bo failed\n", r
);
1078 gfx_v9_0_mec_fini(adev
);
1082 memcpy(fw
, fw_data
, fw_size
);
1084 amdgpu_bo_kunmap(adev
->gfx
.mec
.mec_fw_obj
);
1085 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
1090 static uint32_t wave_read_ind(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t address
)
1092 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1093 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1094 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
1095 (address
<< SQ_IND_INDEX__INDEX__SHIFT
) |
1096 (SQ_IND_INDEX__FORCE_READ_MASK
));
1097 return RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1100 static void wave_read_regs(struct amdgpu_device
*adev
, uint32_t simd
,
1101 uint32_t wave
, uint32_t thread
,
1102 uint32_t regno
, uint32_t num
, uint32_t *out
)
1104 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1105 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1106 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
1107 (regno
<< SQ_IND_INDEX__INDEX__SHIFT
) |
1108 (thread
<< SQ_IND_INDEX__THREAD_ID__SHIFT
) |
1109 (SQ_IND_INDEX__FORCE_READ_MASK
) |
1110 (SQ_IND_INDEX__AUTO_INCR_MASK
));
1112 *(out
++) = RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1115 static void gfx_v9_0_read_wave_data(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t *dst
, int *no_fields
)
1117 /* type 1 wave data */
1118 dst
[(*no_fields
)++] = 1;
1119 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_STATUS
);
1120 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_LO
);
1121 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_HI
);
1122 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_LO
);
1123 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_HI
);
1124 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_HW_ID
);
1125 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW0
);
1126 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW1
);
1127 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_GPR_ALLOC
);
1128 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_LDS_ALLOC
);
1129 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TRAPSTS
);
1130 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_STS
);
1131 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_DBG0
);
1132 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_M0
);
1135 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1136 uint32_t wave
, uint32_t start
,
1137 uint32_t size
, uint32_t *dst
)
1140 adev
, simd
, wave
, 0,
1141 start
+ SQIND_WAVE_SGPRS_OFFSET
, size
, dst
);
1144 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1145 uint32_t wave
, uint32_t thread
,
1146 uint32_t start
, uint32_t size
,
1150 adev
, simd
, wave
, thread
,
1151 start
+ SQIND_WAVE_VGPRS_OFFSET
, size
, dst
);
1154 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device
*adev
,
1155 u32 me
, u32 pipe
, u32 q
)
1157 soc15_grbm_select(adev
, me
, pipe
, q
, 0);
1160 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs
= {
1161 .get_gpu_clock_counter
= &gfx_v9_0_get_gpu_clock_counter
,
1162 .select_se_sh
= &gfx_v9_0_select_se_sh
,
1163 .read_wave_data
= &gfx_v9_0_read_wave_data
,
1164 .read_wave_sgprs
= &gfx_v9_0_read_wave_sgprs
,
1165 .read_wave_vgprs
= &gfx_v9_0_read_wave_vgprs
,
1166 .select_me_pipe_q
= &gfx_v9_0_select_me_pipe_q
1169 static int gfx_v9_0_gpu_early_init(struct amdgpu_device
*adev
)
1174 adev
->gfx
.funcs
= &gfx_v9_0_gfx_funcs
;
1176 switch (adev
->asic_type
) {
1178 adev
->gfx
.config
.max_hw_contexts
= 8;
1179 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1180 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1181 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1182 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1183 gb_addr_config
= VEGA10_GB_ADDR_CONFIG_GOLDEN
;
1186 adev
->gfx
.config
.max_hw_contexts
= 8;
1187 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1188 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1189 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1190 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1191 gb_addr_config
= VEGA12_GB_ADDR_CONFIG_GOLDEN
;
1192 DRM_INFO("fix gfx.config for vega12\n");
1195 adev
->gfx
.config
.max_hw_contexts
= 8;
1196 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1197 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1198 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1199 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1200 gb_addr_config
= RREG32_SOC15(GC
, 0, mmGB_ADDR_CONFIG
);
1201 gb_addr_config
&= ~0xf3e777ff;
1202 gb_addr_config
|= 0x22014042;
1203 /* check vbios table if gpu info is not available */
1204 err
= amdgpu_atomfirmware_get_gfx_info(adev
);
1209 adev
->gfx
.config
.max_hw_contexts
= 8;
1210 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1211 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1212 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
1213 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1214 gb_addr_config
= RAVEN_GB_ADDR_CONFIG_GOLDEN
;
1221 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
1223 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
= 1 <<
1225 adev
->gfx
.config
.gb_addr_config
,
1229 adev
->gfx
.config
.max_tile_pipes
=
1230 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
;
1232 adev
->gfx
.config
.gb_addr_config_fields
.num_banks
= 1 <<
1234 adev
->gfx
.config
.gb_addr_config
,
1237 adev
->gfx
.config
.gb_addr_config_fields
.max_compress_frags
= 1 <<
1239 adev
->gfx
.config
.gb_addr_config
,
1241 MAX_COMPRESSED_FRAGS
);
1242 adev
->gfx
.config
.gb_addr_config_fields
.num_rb_per_se
= 1 <<
1244 adev
->gfx
.config
.gb_addr_config
,
1247 adev
->gfx
.config
.gb_addr_config_fields
.num_se
= 1 <<
1249 adev
->gfx
.config
.gb_addr_config
,
1251 NUM_SHADER_ENGINES
);
1252 adev
->gfx
.config
.gb_addr_config_fields
.pipe_interleave_size
= 1 << (8 +
1254 adev
->gfx
.config
.gb_addr_config
,
1256 PIPE_INTERLEAVE_SIZE
));
1261 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device
*adev
,
1262 struct amdgpu_ngg_buf
*ngg_buf
,
1264 int default_size_se
)
1269 dev_err(adev
->dev
, "Buffer size is invalid: %d\n", size_se
);
1272 size_se
= size_se
? size_se
: default_size_se
;
1274 ngg_buf
->size
= size_se
* adev
->gfx
.config
.max_shader_engines
;
1275 r
= amdgpu_bo_create_kernel(adev
, ngg_buf
->size
,
1276 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_VRAM
,
1281 dev_err(adev
->dev
, "(%d) failed to create NGG buffer\n", r
);
1284 ngg_buf
->bo_size
= amdgpu_bo_size(ngg_buf
->bo
);
1289 static int gfx_v9_0_ngg_fini(struct amdgpu_device
*adev
)
1293 for (i
= 0; i
< NGG_BUF_MAX
; i
++)
1294 amdgpu_bo_free_kernel(&adev
->gfx
.ngg
.buf
[i
].bo
,
1295 &adev
->gfx
.ngg
.buf
[i
].gpu_addr
,
1298 memset(&adev
->gfx
.ngg
.buf
[0], 0,
1299 sizeof(struct amdgpu_ngg_buf
) * NGG_BUF_MAX
);
1301 adev
->gfx
.ngg
.init
= false;
1306 static int gfx_v9_0_ngg_init(struct amdgpu_device
*adev
)
1310 if (!amdgpu_ngg
|| adev
->gfx
.ngg
.init
== true)
1313 /* GDS reserve memory: 64 bytes alignment */
1314 adev
->gfx
.ngg
.gds_reserve_size
= ALIGN(5 * 4, 0x40);
1315 adev
->gds
.mem
.total_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
1316 adev
->gds
.mem
.gfx_partition_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
1317 adev
->gfx
.ngg
.gds_reserve_addr
= RREG32_SOC15(GC
, 0, mmGDS_VMID0_BASE
);
1318 adev
->gfx
.ngg
.gds_reserve_addr
+= RREG32_SOC15(GC
, 0, mmGDS_VMID0_SIZE
);
1320 /* Primitive Buffer */
1321 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_PRIM
],
1322 amdgpu_prim_buf_per_se
,
1325 dev_err(adev
->dev
, "Failed to create Primitive Buffer\n");
1329 /* Position Buffer */
1330 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_POS
],
1331 amdgpu_pos_buf_per_se
,
1334 dev_err(adev
->dev
, "Failed to create Position Buffer\n");
1338 /* Control Sideband */
1339 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_CNTL
],
1340 amdgpu_cntl_sb_buf_per_se
,
1343 dev_err(adev
->dev
, "Failed to create Control Sideband Buffer\n");
1347 /* Parameter Cache, not created by default */
1348 if (amdgpu_param_buf_per_se
<= 0)
1351 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_PARAM
],
1352 amdgpu_param_buf_per_se
,
1355 dev_err(adev
->dev
, "Failed to create Parameter Cache\n");
1360 adev
->gfx
.ngg
.init
= true;
1363 gfx_v9_0_ngg_fini(adev
);
1367 static int gfx_v9_0_ngg_en(struct amdgpu_device
*adev
)
1369 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
1376 /* Program buffer size */
1377 data
= REG_SET_FIELD(0, WD_BUF_RESOURCE_1
, INDEX_BUF_SIZE
,
1378 adev
->gfx
.ngg
.buf
[NGG_PRIM
].size
>> 8);
1379 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_1
, POS_BUF_SIZE
,
1380 adev
->gfx
.ngg
.buf
[NGG_POS
].size
>> 8);
1381 WREG32_SOC15(GC
, 0, mmWD_BUF_RESOURCE_1
, data
);
1383 data
= REG_SET_FIELD(0, WD_BUF_RESOURCE_2
, CNTL_SB_BUF_SIZE
,
1384 adev
->gfx
.ngg
.buf
[NGG_CNTL
].size
>> 8);
1385 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_2
, PARAM_BUF_SIZE
,
1386 adev
->gfx
.ngg
.buf
[NGG_PARAM
].size
>> 10);
1387 WREG32_SOC15(GC
, 0, mmWD_BUF_RESOURCE_2
, data
);
1389 /* Program buffer base address */
1390 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_PRIM
].gpu_addr
);
1391 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE
, BASE
, base
);
1392 WREG32_SOC15(GC
, 0, mmWD_INDEX_BUF_BASE
, data
);
1394 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_PRIM
].gpu_addr
);
1395 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI
, BASE_HI
, base
);
1396 WREG32_SOC15(GC
, 0, mmWD_INDEX_BUF_BASE_HI
, data
);
1398 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_POS
].gpu_addr
);
1399 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE
, BASE
, base
);
1400 WREG32_SOC15(GC
, 0, mmWD_POS_BUF_BASE
, data
);
1402 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_POS
].gpu_addr
);
1403 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE_HI
, BASE_HI
, base
);
1404 WREG32_SOC15(GC
, 0, mmWD_POS_BUF_BASE_HI
, data
);
1406 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_CNTL
].gpu_addr
);
1407 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE
, BASE
, base
);
1408 WREG32_SOC15(GC
, 0, mmWD_CNTL_SB_BUF_BASE
, data
);
1410 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_CNTL
].gpu_addr
);
1411 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI
, BASE_HI
, base
);
1412 WREG32_SOC15(GC
, 0, mmWD_CNTL_SB_BUF_BASE_HI
, data
);
1414 /* Clear GDS reserved memory */
1415 r
= amdgpu_ring_alloc(ring
, 17);
1417 DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
1422 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1423 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
),
1424 (adev
->gds
.mem
.total_size
+
1425 adev
->gfx
.ngg
.gds_reserve_size
) >>
1428 amdgpu_ring_write(ring
, PACKET3(PACKET3_DMA_DATA
, 5));
1429 amdgpu_ring_write(ring
, (PACKET3_DMA_DATA_CP_SYNC
|
1430 PACKET3_DMA_DATA_DST_SEL(1) |
1431 PACKET3_DMA_DATA_SRC_SEL(2)));
1432 amdgpu_ring_write(ring
, 0);
1433 amdgpu_ring_write(ring
, 0);
1434 amdgpu_ring_write(ring
, adev
->gfx
.ngg
.gds_reserve_addr
);
1435 amdgpu_ring_write(ring
, 0);
1436 amdgpu_ring_write(ring
, PACKET3_DMA_DATA_CMD_RAW_WAIT
|
1437 adev
->gfx
.ngg
.gds_reserve_size
);
1439 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1440 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
), 0);
1442 amdgpu_ring_commit(ring
);
1447 static int gfx_v9_0_compute_ring_init(struct amdgpu_device
*adev
, int ring_id
,
1448 int mec
, int pipe
, int queue
)
1452 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[ring_id
];
1454 ring
= &adev
->gfx
.compute_ring
[ring_id
];
1459 ring
->queue
= queue
;
1461 ring
->ring_obj
= NULL
;
1462 ring
->use_doorbell
= true;
1463 ring
->doorbell_index
= (AMDGPU_DOORBELL_MEC_RING0
+ ring_id
) << 1;
1464 ring
->eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
1465 + (ring_id
* GFX9_MEC_HPD_SIZE
);
1466 sprintf(ring
->name
, "comp_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1468 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1469 + ((ring
->me
- 1) * adev
->gfx
.mec
.num_pipe_per_mec
)
1472 /* type-2 packets are deprecated on MEC, use type-3 instead */
1473 r
= amdgpu_ring_init(adev
, ring
, 1024,
1474 &adev
->gfx
.eop_irq
, irq_type
);
1482 static int gfx_v9_0_sw_init(void *handle
)
1484 int i
, j
, k
, r
, ring_id
;
1485 struct amdgpu_ring
*ring
;
1486 struct amdgpu_kiq
*kiq
;
1487 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1489 switch (adev
->asic_type
) {
1494 adev
->gfx
.mec
.num_mec
= 2;
1497 adev
->gfx
.mec
.num_mec
= 1;
1501 adev
->gfx
.mec
.num_pipe_per_mec
= 4;
1502 adev
->gfx
.mec
.num_queue_per_pipe
= 8;
1505 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_9_0__SRCID__CP_IB2_INTERRUPT_PKT
, &adev
->gfx
.kiq
.irq
);
1510 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_9_0__SRCID__CP_EOP_INTERRUPT
, &adev
->gfx
.eop_irq
);
1514 /* Privileged reg */
1515 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_9_0__SRCID__CP_PRIV_REG_FAULT
,
1516 &adev
->gfx
.priv_reg_irq
);
1520 /* Privileged inst */
1521 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT
,
1522 &adev
->gfx
.priv_inst_irq
);
1526 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
1528 gfx_v9_0_scratch_init(adev
);
1530 r
= gfx_v9_0_init_microcode(adev
);
1532 DRM_ERROR("Failed to load gfx firmware!\n");
1536 r
= gfx_v9_0_rlc_init(adev
);
1538 DRM_ERROR("Failed to init rlc BOs!\n");
1542 r
= gfx_v9_0_mec_init(adev
);
1544 DRM_ERROR("Failed to init MEC BOs!\n");
1548 /* set up the gfx ring */
1549 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
1550 ring
= &adev
->gfx
.gfx_ring
[i
];
1551 ring
->ring_obj
= NULL
;
1553 sprintf(ring
->name
, "gfx");
1555 sprintf(ring
->name
, "gfx_%d", i
);
1556 ring
->use_doorbell
= true;
1557 ring
->doorbell_index
= AMDGPU_DOORBELL64_GFX_RING0
<< 1;
1558 r
= amdgpu_ring_init(adev
, ring
, 1024,
1559 &adev
->gfx
.eop_irq
, AMDGPU_CP_IRQ_GFX_EOP
);
1564 /* set up the compute queues - allocate horizontally across pipes */
1566 for (i
= 0; i
< adev
->gfx
.mec
.num_mec
; ++i
) {
1567 for (j
= 0; j
< adev
->gfx
.mec
.num_queue_per_pipe
; j
++) {
1568 for (k
= 0; k
< adev
->gfx
.mec
.num_pipe_per_mec
; k
++) {
1569 if (!amdgpu_gfx_is_mec_queue_enabled(adev
, i
, k
, j
))
1572 r
= gfx_v9_0_compute_ring_init(adev
,
1583 r
= amdgpu_gfx_kiq_init(adev
, GFX9_MEC_HPD_SIZE
);
1585 DRM_ERROR("Failed to init KIQ BOs!\n");
1589 kiq
= &adev
->gfx
.kiq
;
1590 r
= amdgpu_gfx_kiq_init_ring(adev
, &kiq
->ring
, &kiq
->irq
);
1594 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1595 r
= amdgpu_gfx_compute_mqd_sw_init(adev
, sizeof(struct v9_mqd_allocation
));
1599 /* reserve GDS, GWS and OA resource for gfx */
1600 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.mem
.gfx_partition_size
,
1601 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GDS
,
1602 &adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
1606 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.gws
.gfx_partition_size
,
1607 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GWS
,
1608 &adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
1612 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.oa
.gfx_partition_size
,
1613 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_OA
,
1614 &adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
1618 adev
->gfx
.ce_ram_size
= 0x8000;
1620 r
= gfx_v9_0_gpu_early_init(adev
);
1624 r
= gfx_v9_0_ngg_init(adev
);
1632 static int gfx_v9_0_sw_fini(void *handle
)
1635 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1637 amdgpu_bo_free_kernel(&adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
1638 amdgpu_bo_free_kernel(&adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
1639 amdgpu_bo_free_kernel(&adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
1641 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1642 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
1643 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1644 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
1646 amdgpu_gfx_compute_mqd_sw_fini(adev
);
1647 amdgpu_gfx_kiq_free_ring(&adev
->gfx
.kiq
.ring
, &adev
->gfx
.kiq
.irq
);
1648 amdgpu_gfx_kiq_fini(adev
);
1650 gfx_v9_0_mec_fini(adev
);
1651 gfx_v9_0_ngg_fini(adev
);
1652 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.clear_state_obj
,
1653 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
1654 (void **)&adev
->gfx
.rlc
.cs_ptr
);
1655 if (adev
->asic_type
== CHIP_RAVEN
) {
1656 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.cp_table_obj
,
1657 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
1658 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
1660 gfx_v9_0_free_microcode(adev
);
1666 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
1671 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
)
1675 if (instance
== 0xffffffff)
1676 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
, 1);
1678 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_INDEX
, instance
);
1680 if (se_num
== 0xffffffff)
1681 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1683 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1685 if (sh_num
== 0xffffffff)
1686 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1688 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1690 WREG32_SOC15(GC
, 0, mmGRBM_GFX_INDEX
, data
);
1693 static u32
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device
*adev
)
1697 data
= RREG32_SOC15(GC
, 0, mmCC_RB_BACKEND_DISABLE
);
1698 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_RB_BACKEND_DISABLE
);
1700 data
&= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK
;
1701 data
>>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT
;
1703 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_backends_per_se
/
1704 adev
->gfx
.config
.max_sh_per_se
);
1706 return (~data
) & mask
;
1709 static void gfx_v9_0_setup_rb(struct amdgpu_device
*adev
)
1714 u32 rb_bitmap_width_per_sh
= adev
->gfx
.config
.max_backends_per_se
/
1715 adev
->gfx
.config
.max_sh_per_se
;
1717 mutex_lock(&adev
->grbm_idx_mutex
);
1718 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1719 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1720 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1721 data
= gfx_v9_0_get_rb_active_bitmap(adev
);
1722 active_rbs
|= data
<< ((i
* adev
->gfx
.config
.max_sh_per_se
+ j
) *
1723 rb_bitmap_width_per_sh
);
1726 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1727 mutex_unlock(&adev
->grbm_idx_mutex
);
1729 adev
->gfx
.config
.backend_enable_mask
= active_rbs
;
1730 adev
->gfx
.config
.num_rbs
= hweight32(active_rbs
);
1733 #define DEFAULT_SH_MEM_BASES (0x6000)
1734 #define FIRST_COMPUTE_VMID (8)
1735 #define LAST_COMPUTE_VMID (16)
1736 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device
*adev
)
1739 uint32_t sh_mem_config
;
1740 uint32_t sh_mem_bases
;
1743 * Configure apertures:
1744 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1745 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1746 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1748 sh_mem_bases
= DEFAULT_SH_MEM_BASES
| (DEFAULT_SH_MEM_BASES
<< 16);
1750 sh_mem_config
= SH_MEM_ADDRESS_MODE_64
|
1751 SH_MEM_ALIGNMENT_MODE_UNALIGNED
<<
1752 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT
;
1754 mutex_lock(&adev
->srbm_mutex
);
1755 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1756 soc15_grbm_select(adev
, 0, 0, 0, i
);
1757 /* CP and shaders */
1758 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, sh_mem_config
);
1759 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, sh_mem_bases
);
1761 soc15_grbm_select(adev
, 0, 0, 0, 0);
1762 mutex_unlock(&adev
->srbm_mutex
);
1765 static void gfx_v9_0_gpu_init(struct amdgpu_device
*adev
)
1770 WREG32_FIELD15(GC
, 0, GRBM_CNTL
, READ_TIMEOUT
, 0xff);
1772 gfx_v9_0_tiling_mode_table_init(adev
);
1774 gfx_v9_0_setup_rb(adev
);
1775 gfx_v9_0_get_cu_info(adev
, &adev
->gfx
.cu_info
);
1776 adev
->gfx
.config
.db_debug2
= RREG32_SOC15(GC
, 0, mmDB_DEBUG2
);
1778 /* XXX SH_MEM regs */
1779 /* where to put LDS, scratch, GPUVM in FSA64 space */
1780 mutex_lock(&adev
->srbm_mutex
);
1781 for (i
= 0; i
< adev
->vm_manager
.id_mgr
[AMDGPU_GFXHUB
].num_ids
; i
++) {
1782 soc15_grbm_select(adev
, 0, 0, 0, i
);
1783 /* CP and shaders */
1785 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
1786 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
1787 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, tmp
);
1788 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, 0);
1790 tmp
= REG_SET_FIELD(0, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
1791 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
1792 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, tmp
);
1793 tmp
= REG_SET_FIELD(0, SH_MEM_BASES
, PRIVATE_BASE
,
1794 (adev
->gmc
.private_aperture_start
>> 48));
1795 tmp
= REG_SET_FIELD(tmp
, SH_MEM_BASES
, SHARED_BASE
,
1796 (adev
->gmc
.shared_aperture_start
>> 48));
1797 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, tmp
);
1800 soc15_grbm_select(adev
, 0, 0, 0, 0);
1802 mutex_unlock(&adev
->srbm_mutex
);
1804 gfx_v9_0_init_compute_vmid(adev
);
1807 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device
*adev
)
1812 mutex_lock(&adev
->grbm_idx_mutex
);
1813 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1814 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1815 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1816 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1817 if (RREG32_SOC15(GC
, 0, mmRLC_SERDES_CU_MASTER_BUSY
) == 0)
1821 if (k
== adev
->usec_timeout
) {
1822 gfx_v9_0_select_se_sh(adev
, 0xffffffff,
1823 0xffffffff, 0xffffffff);
1824 mutex_unlock(&adev
->grbm_idx_mutex
);
1825 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1831 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1832 mutex_unlock(&adev
->grbm_idx_mutex
);
1834 mask
= RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK
|
1835 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK
|
1836 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK
|
1837 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK
;
1838 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1839 if ((RREG32_SOC15(GC
, 0, mmRLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
1845 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
1848 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
);
1850 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
, enable
? 1 : 0);
1851 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
, enable
? 1 : 0);
1852 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
, enable
? 1 : 0);
1853 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
, enable
? 1 : 0);
1855 WREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
, tmp
);
1858 static void gfx_v9_0_init_csb(struct amdgpu_device
*adev
)
1861 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CSIB_ADDR_HI
),
1862 adev
->gfx
.rlc
.clear_state_gpu_addr
>> 32);
1863 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CSIB_ADDR_LO
),
1864 adev
->gfx
.rlc
.clear_state_gpu_addr
& 0xfffffffc);
1865 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_CSIB_LENGTH
),
1866 adev
->gfx
.rlc
.clear_state_size
);
1869 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format
,
1870 int indirect_offset
,
1872 int *unique_indirect_regs
,
1873 int unique_indirect_reg_count
,
1874 int *indirect_start_offsets
,
1875 int *indirect_start_offsets_count
,
1876 int max_start_offsets_count
)
1880 for (; indirect_offset
< list_size
; indirect_offset
++) {
1881 WARN_ON(*indirect_start_offsets_count
>= max_start_offsets_count
);
1882 indirect_start_offsets
[*indirect_start_offsets_count
] = indirect_offset
;
1883 *indirect_start_offsets_count
= *indirect_start_offsets_count
+ 1;
1885 while (register_list_format
[indirect_offset
] != 0xFFFFFFFF) {
1886 indirect_offset
+= 2;
1888 /* look for the matching indice */
1889 for (idx
= 0; idx
< unique_indirect_reg_count
; idx
++) {
1890 if (unique_indirect_regs
[idx
] ==
1891 register_list_format
[indirect_offset
] ||
1892 !unique_indirect_regs
[idx
])
1896 BUG_ON(idx
>= unique_indirect_reg_count
);
1898 if (!unique_indirect_regs
[idx
])
1899 unique_indirect_regs
[idx
] = register_list_format
[indirect_offset
];
1906 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device
*adev
)
1908 int unique_indirect_regs
[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1909 int unique_indirect_reg_count
= 0;
1911 int indirect_start_offsets
[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1912 int indirect_start_offsets_count
= 0;
1918 u32
*register_list_format
=
1919 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
, GFP_KERNEL
);
1920 if (!register_list_format
)
1922 memcpy(register_list_format
, adev
->gfx
.rlc
.register_list_format
,
1923 adev
->gfx
.rlc
.reg_list_format_size_bytes
);
1925 /* setup unique_indirect_regs array and indirect_start_offsets array */
1926 unique_indirect_reg_count
= ARRAY_SIZE(unique_indirect_regs
);
1927 gfx_v9_1_parse_ind_reg_list(register_list_format
,
1928 adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
,
1929 adev
->gfx
.rlc
.reg_list_format_size_bytes
>> 2,
1930 unique_indirect_regs
,
1931 unique_indirect_reg_count
,
1932 indirect_start_offsets
,
1933 &indirect_start_offsets_count
,
1934 ARRAY_SIZE(indirect_start_offsets
));
1936 /* enable auto inc in case it is disabled */
1937 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
));
1938 tmp
|= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK
;
1939 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
), tmp
);
1941 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
1942 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_ARAM_ADDR
),
1943 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET
);
1944 for (i
= 0; i
< adev
->gfx
.rlc
.reg_list_size_bytes
>> 2; i
++)
1945 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_ARAM_DATA
),
1946 adev
->gfx
.rlc
.register_restore
[i
]);
1948 /* load indirect register */
1949 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_ADDR
),
1950 adev
->gfx
.rlc
.reg_list_format_start
);
1952 /* direct register portion */
1953 for (i
= 0; i
< adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
; i
++)
1954 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_DATA
),
1955 register_list_format
[i
]);
1957 /* indirect register portion */
1958 while (i
< (adev
->gfx
.rlc
.reg_list_format_size_bytes
>> 2)) {
1959 if (register_list_format
[i
] == 0xFFFFFFFF) {
1960 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, register_list_format
[i
++]);
1964 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, register_list_format
[i
++]);
1965 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, register_list_format
[i
++]);
1967 for (j
= 0; j
< unique_indirect_reg_count
; j
++) {
1968 if (register_list_format
[i
] == unique_indirect_regs
[j
]) {
1969 WREG32_SOC15(GC
, 0, mmRLC_GPM_SCRATCH_DATA
, j
);
1974 BUG_ON(j
>= unique_indirect_reg_count
);
1979 /* set save/restore list size */
1980 list_size
= adev
->gfx
.rlc
.reg_list_size_bytes
>> 2;
1981 list_size
= list_size
>> 1;
1982 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_ADDR
),
1983 adev
->gfx
.rlc
.reg_restore_list_size
);
1984 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_DATA
), list_size
);
1986 /* write the starting offsets to RLC scratch ram */
1987 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_ADDR
),
1988 adev
->gfx
.rlc
.starting_offsets_start
);
1989 for (i
= 0; i
< ARRAY_SIZE(indirect_start_offsets
); i
++)
1990 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_GPM_SCRATCH_DATA
),
1991 indirect_start_offsets
[i
]);
1993 /* load unique indirect regs*/
1994 for (i
= 0; i
< ARRAY_SIZE(unique_indirect_regs
); i
++) {
1995 if (unique_indirect_regs
[i
] != 0) {
1996 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0
)
1997 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS
[i
],
1998 unique_indirect_regs
[i
] & 0x3FFFF);
2000 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_INDEX_CNTL_DATA_0
)
2001 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS
[i
],
2002 unique_indirect_regs
[i
] >> 20);
2006 kfree(register_list_format
);
2010 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device
*adev
)
2012 WREG32_FIELD15(GC
, 0, RLC_SRM_CNTL
, SRM_ENABLE
, 1);
2015 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device
*adev
,
2019 uint32_t default_data
= 0;
2021 default_data
= data
= RREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
));
2022 if (enable
== true) {
2023 /* enable GFXIP control over CGPG */
2024 data
|= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK
;
2025 if(default_data
!= data
)
2026 WREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
), data
);
2029 data
&= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK
;
2030 data
|= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT
);
2031 if(default_data
!= data
)
2032 WREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
), data
);
2034 /* restore GFXIP control over GCPG */
2035 data
&= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK
;
2036 if(default_data
!= data
)
2037 WREG32(SOC15_REG_OFFSET(PWR
, 0, mmPWR_MISC_CNTL_STATUS
), data
);
2041 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device
*adev
)
2045 if (adev
->pg_flags
& (AMD_PG_SUPPORT_GFX_PG
|
2046 AMD_PG_SUPPORT_GFX_SMG
|
2047 AMD_PG_SUPPORT_GFX_DMG
)) {
2048 /* init IDLE_POLL_COUNT = 60 */
2049 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
));
2050 data
&= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK
;
2051 data
|= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
2052 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
), data
);
2054 /* init RLC PG Delay */
2056 data
|= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT
);
2057 data
|= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT
);
2058 data
|= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT
);
2059 data
|= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT
);
2060 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY
), data
);
2062 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_2
));
2063 data
&= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK
;
2064 data
|= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT
);
2065 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_2
), data
);
2067 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_3
));
2068 data
&= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK
;
2069 data
|= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT
);
2070 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_DELAY_3
), data
);
2072 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_AUTO_PG_CTRL
));
2073 data
&= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK
;
2075 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2076 data
|= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT
);
2077 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_AUTO_PG_CTRL
), data
);
2079 pwr_10_0_gfxip_control_over_cgpg(adev
, true);
2083 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device
*adev
,
2087 uint32_t default_data
= 0;
2089 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2090 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2091 SMU_CLK_SLOWDOWN_ON_PU_ENABLE
,
2093 if (default_data
!= data
)
2094 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2097 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device
*adev
,
2101 uint32_t default_data
= 0;
2103 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2104 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2105 SMU_CLK_SLOWDOWN_ON_PD_ENABLE
,
2107 if(default_data
!= data
)
2108 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2111 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device
*adev
,
2115 uint32_t default_data
= 0;
2117 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2118 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2121 if(default_data
!= data
)
2122 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2125 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device
*adev
,
2128 uint32_t data
, default_data
;
2130 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2131 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2132 GFX_POWER_GATING_ENABLE
,
2134 if(default_data
!= data
)
2135 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2138 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device
*adev
,
2141 uint32_t data
, default_data
;
2143 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2144 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2145 GFX_PIPELINE_PG_ENABLE
,
2147 if(default_data
!= data
)
2148 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2151 /* read any GFX register to wake up GFX */
2152 data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmDB_RENDER_CONTROL
));
2155 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device
*adev
,
2158 uint32_t data
, default_data
;
2160 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2161 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2162 STATIC_PER_CU_PG_ENABLE
,
2164 if(default_data
!= data
)
2165 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2168 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device
*adev
,
2171 uint32_t data
, default_data
;
2173 default_data
= data
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
));
2174 data
= REG_SET_FIELD(data
, RLC_PG_CNTL
,
2175 DYN_PER_CU_PG_ENABLE
,
2177 if(default_data
!= data
)
2178 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_PG_CNTL
), data
);
2181 static void gfx_v9_0_init_pg(struct amdgpu_device
*adev
)
2183 gfx_v9_0_init_csb(adev
);
2186 * Rlc save restore list is workable since v2_1.
2187 * And it's needed by gfxoff feature.
2189 if (adev
->gfx
.rlc
.is_rlc_v2_1
) {
2190 if (adev
->asic_type
== CHIP_VEGA12
)
2191 gfx_v9_1_init_rlc_save_restore_list(adev
);
2192 gfx_v9_0_enable_save_restore_machine(adev
);
2195 if (adev
->pg_flags
& (AMD_PG_SUPPORT_GFX_PG
|
2196 AMD_PG_SUPPORT_GFX_SMG
|
2197 AMD_PG_SUPPORT_GFX_DMG
|
2199 AMD_PG_SUPPORT_GDS
|
2200 AMD_PG_SUPPORT_RLC_SMU_HS
)) {
2201 WREG32(mmRLC_JUMP_TABLE_RESTORE
,
2202 adev
->gfx
.rlc
.cp_table_gpu_addr
>> 8);
2203 gfx_v9_0_init_gfx_power_gating(adev
);
2207 void gfx_v9_0_rlc_stop(struct amdgpu_device
*adev
)
2209 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 0);
2210 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
2211 gfx_v9_0_wait_for_rlc_serdes(adev
);
2214 static void gfx_v9_0_rlc_reset(struct amdgpu_device
*adev
)
2216 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
2218 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
2222 static void gfx_v9_0_rlc_start(struct amdgpu_device
*adev
)
2224 #ifdef AMDGPU_RLC_DEBUG_RETRY
2228 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 1);
2231 /* carrizo do enable cp interrupt after cp inited */
2232 if (!(adev
->flags
& AMD_IS_APU
)) {
2233 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
2237 #ifdef AMDGPU_RLC_DEBUG_RETRY
2238 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2239 rlc_ucode_ver
= RREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_6
);
2240 if(rlc_ucode_ver
== 0x108) {
2241 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2242 rlc_ucode_ver
, adev
->gfx
.rlc_fw_version
);
2243 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2244 * default is 0x9C4 to create a 100us interval */
2245 WREG32_SOC15(GC
, 0, mmRLC_GPM_TIMER_INT_3
, 0x9C4);
2246 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2247 * to disable the page fault retry interrupts, default is
2249 WREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_12
, 0x100);
2254 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device
*adev
)
2256 const struct rlc_firmware_header_v2_0
*hdr
;
2257 const __le32
*fw_data
;
2258 unsigned i
, fw_size
;
2260 if (!adev
->gfx
.rlc_fw
)
2263 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
2264 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
2266 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
2267 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
2268 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
2270 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
,
2271 RLCG_UCODE_LOADING_START_ADDRESS
);
2272 for (i
= 0; i
< fw_size
; i
++)
2273 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_DATA
, le32_to_cpup(fw_data
++));
2274 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
, adev
->gfx
.rlc_fw_version
);
2279 static int gfx_v9_0_rlc_resume(struct amdgpu_device
*adev
)
2283 if (amdgpu_sriov_vf(adev
)) {
2284 gfx_v9_0_init_csb(adev
);
2288 gfx_v9_0_rlc_stop(adev
);
2291 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, 0);
2293 gfx_v9_0_rlc_reset(adev
);
2295 gfx_v9_0_init_pg(adev
);
2297 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
2298 /* legacy rlc firmware loading */
2299 r
= gfx_v9_0_rlc_load_microcode(adev
);
2304 if (adev
->asic_type
== CHIP_RAVEN
) {
2305 if (amdgpu_lbpw
!= 0)
2306 gfx_v9_0_enable_lbpw(adev
, true);
2308 gfx_v9_0_enable_lbpw(adev
, false);
2311 gfx_v9_0_rlc_start(adev
);
2316 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
2319 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_CNTL
);
2321 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, enable
? 0 : 1);
2322 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, enable
? 0 : 1);
2323 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, enable
? 0 : 1);
2325 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
2326 adev
->gfx
.gfx_ring
[i
].ready
= false;
2328 WREG32_SOC15(GC
, 0, mmCP_ME_CNTL
, tmp
);
2332 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
2334 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
2335 const struct gfx_firmware_header_v1_0
*ce_hdr
;
2336 const struct gfx_firmware_header_v1_0
*me_hdr
;
2337 const __le32
*fw_data
;
2338 unsigned i
, fw_size
;
2340 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
2343 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2344 adev
->gfx
.pfp_fw
->data
;
2345 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
2346 adev
->gfx
.ce_fw
->data
;
2347 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
2348 adev
->gfx
.me_fw
->data
;
2350 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
2351 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
2352 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
2354 gfx_v9_0_cp_gfx_enable(adev
, false);
2357 fw_data
= (const __le32
*)
2358 (adev
->gfx
.pfp_fw
->data
+
2359 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
2360 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
) / 4;
2361 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_ADDR
, 0);
2362 for (i
= 0; i
< fw_size
; i
++)
2363 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_DATA
, le32_to_cpup(fw_data
++));
2364 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_ADDR
, adev
->gfx
.pfp_fw_version
);
2367 fw_data
= (const __le32
*)
2368 (adev
->gfx
.ce_fw
->data
+
2369 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
2370 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
) / 4;
2371 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_ADDR
, 0);
2372 for (i
= 0; i
< fw_size
; i
++)
2373 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_DATA
, le32_to_cpup(fw_data
++));
2374 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_ADDR
, adev
->gfx
.ce_fw_version
);
2377 fw_data
= (const __le32
*)
2378 (adev
->gfx
.me_fw
->data
+
2379 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
2380 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
) / 4;
2381 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_WADDR
, 0);
2382 for (i
= 0; i
< fw_size
; i
++)
2383 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_DATA
, le32_to_cpup(fw_data
++));
2384 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_WADDR
, adev
->gfx
.me_fw_version
);
2389 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device
*adev
)
2391 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
2392 const struct cs_section_def
*sect
= NULL
;
2393 const struct cs_extent_def
*ext
= NULL
;
2397 WREG32_SOC15(GC
, 0, mmCP_MAX_CONTEXT
, adev
->gfx
.config
.max_hw_contexts
- 1);
2398 WREG32_SOC15(GC
, 0, mmCP_DEVICE_ID
, 1);
2400 gfx_v9_0_cp_gfx_enable(adev
, true);
2402 r
= amdgpu_ring_alloc(ring
, gfx_v9_0_get_csb_size(adev
) + 4 + 3);
2404 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
2408 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2409 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2411 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
2412 amdgpu_ring_write(ring
, 0x80000000);
2413 amdgpu_ring_write(ring
, 0x80000000);
2415 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
2416 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
2417 if (sect
->id
== SECT_CONTEXT
) {
2418 amdgpu_ring_write(ring
,
2419 PACKET3(PACKET3_SET_CONTEXT_REG
,
2421 amdgpu_ring_write(ring
,
2422 ext
->reg_index
- PACKET3_SET_CONTEXT_REG_START
);
2423 for (i
= 0; i
< ext
->reg_count
; i
++)
2424 amdgpu_ring_write(ring
, ext
->extent
[i
]);
2429 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2430 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2432 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2433 amdgpu_ring_write(ring
, 0);
2435 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
2436 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
2437 amdgpu_ring_write(ring
, 0x8000);
2438 amdgpu_ring_write(ring
, 0x8000);
2440 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
,1));
2441 tmp
= (PACKET3_SET_UCONFIG_REG_INDEX_TYPE
|
2442 (SOC15_REG_OFFSET(GC
, 0, mmVGT_INDEX_TYPE
) - PACKET3_SET_UCONFIG_REG_START
));
2443 amdgpu_ring_write(ring
, tmp
);
2444 amdgpu_ring_write(ring
, 0);
2446 amdgpu_ring_commit(ring
);
2451 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device
*adev
)
2453 struct amdgpu_ring
*ring
;
2456 u64 rb_addr
, rptr_addr
, wptr_gpu_addr
;
2458 /* Set the write pointer delay */
2459 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_DELAY
, 0);
2461 /* set the RB to use vmid 0 */
2462 WREG32_SOC15(GC
, 0, mmCP_RB_VMID
, 0);
2464 /* Set ring buffer size */
2465 ring
= &adev
->gfx
.gfx_ring
[0];
2466 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2467 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
2468 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
2470 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
2472 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2474 /* Initialize the ring buffer's write pointers */
2476 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
2477 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
2479 /* set the wb address wether it's enabled or not */
2480 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2481 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR
, lower_32_bits(rptr_addr
));
2482 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
2484 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2485 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
, lower_32_bits(wptr_gpu_addr
));
2486 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
, upper_32_bits(wptr_gpu_addr
));
2489 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2491 rb_addr
= ring
->gpu_addr
>> 8;
2492 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE
, rb_addr
);
2493 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
2495 tmp
= RREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
);
2496 if (ring
->use_doorbell
) {
2497 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2498 DOORBELL_OFFSET
, ring
->doorbell_index
);
2499 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2502 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
2504 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
, tmp
);
2506 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
2507 DOORBELL_RANGE_LOWER
, ring
->doorbell_index
);
2508 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_LOWER
, tmp
);
2510 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_UPPER
,
2511 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
2514 /* start the ring */
2515 gfx_v9_0_cp_gfx_start(adev
);
2521 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
2526 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
, 0);
2528 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
,
2529 (CP_MEC_CNTL__MEC_ME1_HALT_MASK
| CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
2530 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
2531 adev
->gfx
.compute_ring
[i
].ready
= false;
2532 adev
->gfx
.kiq
.ring
.ready
= false;
2537 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
2539 const struct gfx_firmware_header_v1_0
*mec_hdr
;
2540 const __le32
*fw_data
;
2544 if (!adev
->gfx
.mec_fw
)
2547 gfx_v9_0_cp_compute_enable(adev
, false);
2549 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
2550 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
2552 fw_data
= (const __le32
*)
2553 (adev
->gfx
.mec_fw
->data
+
2554 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
2556 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, VMID
, 0);
2557 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2558 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_CNTL
, tmp
);
2560 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_LO
,
2561 adev
->gfx
.mec
.mec_fw_gpu_addr
& 0xFFFFF000);
2562 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_HI
,
2563 upper_32_bits(adev
->gfx
.mec
.mec_fw_gpu_addr
));
2566 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
,
2567 mec_hdr
->jt_offset
);
2568 for (i
= 0; i
< mec_hdr
->jt_size
; i
++)
2569 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_DATA
,
2570 le32_to_cpup(fw_data
+ mec_hdr
->jt_offset
+ i
));
2572 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
,
2573 adev
->gfx
.mec_fw_version
);
2574 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2580 static void gfx_v9_0_kiq_setting(struct amdgpu_ring
*ring
)
2583 struct amdgpu_device
*adev
= ring
->adev
;
2585 /* tell RLC which is KIQ queue */
2586 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
);
2588 tmp
|= (ring
->me
<< 5) | (ring
->pipe
<< 3) | (ring
->queue
);
2589 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
2591 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
2594 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device
*adev
)
2596 struct amdgpu_ring
*kiq_ring
= &adev
->gfx
.kiq
.ring
;
2597 uint32_t scratch
, tmp
= 0;
2598 uint64_t queue_mask
= 0;
2601 for (i
= 0; i
< AMDGPU_MAX_COMPUTE_QUEUES
; ++i
) {
2602 if (!test_bit(i
, adev
->gfx
.mec
.queue_bitmap
))
2605 /* This situation may be hit in the future if a new HW
2606 * generation exposes more than 64 queues. If so, the
2607 * definition of queue_mask needs updating */
2608 if (WARN_ON(i
>= (sizeof(queue_mask
)*8))) {
2609 DRM_ERROR("Invalid KCQ enabled: %d\n", i
);
2613 queue_mask
|= (1ull << i
);
2616 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
2618 DRM_ERROR("Failed to get scratch reg (%d).\n", r
);
2621 WREG32(scratch
, 0xCAFEDEAD);
2623 r
= amdgpu_ring_alloc(kiq_ring
, (7 * adev
->gfx
.num_compute_rings
) + 11);
2625 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
2626 amdgpu_gfx_scratch_free(adev
, scratch
);
2631 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_RESOURCES
, 6));
2632 amdgpu_ring_write(kiq_ring
, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2633 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2634 amdgpu_ring_write(kiq_ring
, lower_32_bits(queue_mask
)); /* queue mask lo */
2635 amdgpu_ring_write(kiq_ring
, upper_32_bits(queue_mask
)); /* queue mask hi */
2636 amdgpu_ring_write(kiq_ring
, 0); /* gws mask lo */
2637 amdgpu_ring_write(kiq_ring
, 0); /* gws mask hi */
2638 amdgpu_ring_write(kiq_ring
, 0); /* oac mask */
2639 amdgpu_ring_write(kiq_ring
, 0); /* gds heap base:0, gds heap size:0 */
2640 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2641 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
2642 uint64_t mqd_addr
= amdgpu_bo_gpu_offset(ring
->mqd_obj
);
2643 uint64_t wptr_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2645 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
2646 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2647 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2648 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2649 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2650 PACKET3_MAP_QUEUES_QUEUE(ring
->queue
) |
2651 PACKET3_MAP_QUEUES_PIPE(ring
->pipe
) |
2652 PACKET3_MAP_QUEUES_ME((ring
->me
== 1 ? 0 : 1)) |
2653 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2654 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2655 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2656 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2657 amdgpu_ring_write(kiq_ring
, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring
->doorbell_index
));
2658 amdgpu_ring_write(kiq_ring
, lower_32_bits(mqd_addr
));
2659 amdgpu_ring_write(kiq_ring
, upper_32_bits(mqd_addr
));
2660 amdgpu_ring_write(kiq_ring
, lower_32_bits(wptr_addr
));
2661 amdgpu_ring_write(kiq_ring
, upper_32_bits(wptr_addr
));
2663 /* write to scratch for completion */
2664 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
2665 amdgpu_ring_write(kiq_ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
2666 amdgpu_ring_write(kiq_ring
, 0xDEADBEEF);
2667 amdgpu_ring_commit(kiq_ring
);
2669 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2670 tmp
= RREG32(scratch
);
2671 if (tmp
== 0xDEADBEEF)
2675 if (i
>= adev
->usec_timeout
) {
2676 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
2680 amdgpu_gfx_scratch_free(adev
, scratch
);
2685 static int gfx_v9_0_mqd_init(struct amdgpu_ring
*ring
)
2687 struct amdgpu_device
*adev
= ring
->adev
;
2688 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2689 uint64_t hqd_gpu_addr
, wb_gpu_addr
, eop_base_addr
;
2692 mqd
->header
= 0xC0310800;
2693 mqd
->compute_pipelinestat_enable
= 0x00000001;
2694 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
2695 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
2696 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
2697 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
2698 mqd
->compute_misc_reserved
= 0x00000003;
2700 mqd
->dynamic_cu_mask_addr_lo
=
2701 lower_32_bits(ring
->mqd_gpu_addr
2702 + offsetof(struct v9_mqd_allocation
, dynamic_cu_mask
));
2703 mqd
->dynamic_cu_mask_addr_hi
=
2704 upper_32_bits(ring
->mqd_gpu_addr
2705 + offsetof(struct v9_mqd_allocation
, dynamic_cu_mask
));
2707 eop_base_addr
= ring
->eop_gpu_addr
>> 8;
2708 mqd
->cp_hqd_eop_base_addr_lo
= eop_base_addr
;
2709 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_base_addr
);
2711 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2712 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
);
2713 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
2714 (order_base_2(GFX9_MEC_HPD_SIZE
/ 4) - 1));
2716 mqd
->cp_hqd_eop_control
= tmp
;
2718 /* enable doorbell? */
2719 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
2721 if (ring
->use_doorbell
) {
2722 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2723 DOORBELL_OFFSET
, ring
->doorbell_index
);
2724 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2726 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2727 DOORBELL_SOURCE
, 0);
2728 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2731 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2735 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
2737 /* disable the queue if it's active */
2739 mqd
->cp_hqd_dequeue_request
= 0;
2740 mqd
->cp_hqd_pq_rptr
= 0;
2741 mqd
->cp_hqd_pq_wptr_lo
= 0;
2742 mqd
->cp_hqd_pq_wptr_hi
= 0;
2744 /* set the pointer to the MQD */
2745 mqd
->cp_mqd_base_addr_lo
= ring
->mqd_gpu_addr
& 0xfffffffc;
2746 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
2748 /* set MQD vmid to 0 */
2749 tmp
= RREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
);
2750 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
2751 mqd
->cp_mqd_control
= tmp
;
2753 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2754 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
2755 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
2756 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
2758 /* set up the HQD, this is similar to CP_RB0_CNTL */
2759 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
);
2760 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
2761 (order_base_2(ring
->ring_size
/ 4) - 1));
2762 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
2763 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
2765 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
2767 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
2768 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
2769 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
2770 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
2771 mqd
->cp_hqd_pq_control
= tmp
;
2773 /* set the wb address whether it's enabled or not */
2774 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2775 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
2776 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
2777 upper_32_bits(wb_gpu_addr
) & 0xffff;
2779 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2780 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2781 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
2782 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
2785 /* enable the doorbell if requested */
2786 if (ring
->use_doorbell
) {
2787 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
2788 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2789 DOORBELL_OFFSET
, ring
->doorbell_index
);
2791 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2793 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2794 DOORBELL_SOURCE
, 0);
2795 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
2799 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
2801 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2803 mqd
->cp_hqd_pq_rptr
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
);
2805 /* set the vmid for the queue */
2806 mqd
->cp_hqd_vmid
= 0;
2808 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
);
2809 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
2810 mqd
->cp_hqd_persistent_state
= tmp
;
2812 /* set MIN_IB_AVAIL_SIZE */
2813 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_IB_CONTROL
);
2814 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IB_CONTROL
, MIN_IB_AVAIL_SIZE
, 3);
2815 mqd
->cp_hqd_ib_control
= tmp
;
2817 /* activate the queue */
2818 mqd
->cp_hqd_active
= 1;
2823 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring
*ring
)
2825 struct amdgpu_device
*adev
= ring
->adev
;
2826 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2829 /* disable wptr polling */
2830 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
2832 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
,
2833 mqd
->cp_hqd_eop_base_addr_lo
);
2834 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
,
2835 mqd
->cp_hqd_eop_base_addr_hi
);
2837 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2838 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
,
2839 mqd
->cp_hqd_eop_control
);
2841 /* enable doorbell? */
2842 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
2843 mqd
->cp_hqd_pq_doorbell_control
);
2845 /* disable the queue if it's active */
2846 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
2847 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
2848 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
2849 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
2853 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
,
2854 mqd
->cp_hqd_dequeue_request
);
2855 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
,
2856 mqd
->cp_hqd_pq_rptr
);
2857 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
2858 mqd
->cp_hqd_pq_wptr_lo
);
2859 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
2860 mqd
->cp_hqd_pq_wptr_hi
);
2863 /* set the pointer to the MQD */
2864 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
,
2865 mqd
->cp_mqd_base_addr_lo
);
2866 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
,
2867 mqd
->cp_mqd_base_addr_hi
);
2869 /* set MQD vmid to 0 */
2870 WREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
,
2871 mqd
->cp_mqd_control
);
2873 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2874 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE
,
2875 mqd
->cp_hqd_pq_base_lo
);
2876 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE_HI
,
2877 mqd
->cp_hqd_pq_base_hi
);
2879 /* set up the HQD, this is similar to CP_RB0_CNTL */
2880 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
,
2881 mqd
->cp_hqd_pq_control
);
2883 /* set the wb address whether it's enabled or not */
2884 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
,
2885 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
2886 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
2887 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
2889 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2890 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
,
2891 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
2892 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
,
2893 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
2895 /* enable the doorbell if requested */
2896 if (ring
->use_doorbell
) {
2897 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
,
2898 (AMDGPU_DOORBELL64_KIQ
*2) << 2);
2899 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
,
2900 (AMDGPU_DOORBELL64_USERQUEUE_END
* 2) << 2);
2903 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
2904 mqd
->cp_hqd_pq_doorbell_control
);
2906 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2907 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
2908 mqd
->cp_hqd_pq_wptr_lo
);
2909 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
2910 mqd
->cp_hqd_pq_wptr_hi
);
2912 /* set the vmid for the queue */
2913 WREG32_SOC15(GC
, 0, mmCP_HQD_VMID
, mqd
->cp_hqd_vmid
);
2915 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
,
2916 mqd
->cp_hqd_persistent_state
);
2918 /* activate the queue */
2919 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
,
2920 mqd
->cp_hqd_active
);
2922 if (ring
->use_doorbell
)
2923 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
2928 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring
*ring
)
2930 struct amdgpu_device
*adev
= ring
->adev
;
2933 /* disable the queue if it's active */
2934 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
2936 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
2938 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
2939 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
2944 if (j
== AMDGPU_MAX_USEC_TIMEOUT
) {
2945 DRM_DEBUG("KIQ dequeue request failed.\n");
2947 /* Manual disable if dequeue request times out */
2948 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
, 0);
2951 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
,
2955 WREG32_SOC15(GC
, 0, mmCP_HQD_IQ_TIMER
, 0);
2956 WREG32_SOC15(GC
, 0, mmCP_HQD_IB_CONTROL
, 0);
2957 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
, 0);
2958 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
, 0x40000000);
2959 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
, 0);
2960 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
, 0);
2961 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
, 0);
2962 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
, 0);
2967 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring
*ring
)
2969 struct amdgpu_device
*adev
= ring
->adev
;
2970 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2971 int mqd_idx
= AMDGPU_MAX_COMPUTE_RINGS
;
2973 gfx_v9_0_kiq_setting(ring
);
2975 if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
2976 /* reset MQD to a clean status */
2977 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
2978 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(struct v9_mqd_allocation
));
2980 /* reset ring buffer */
2982 amdgpu_ring_clear_ring(ring
);
2984 mutex_lock(&adev
->srbm_mutex
);
2985 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2986 gfx_v9_0_kiq_init_register(ring
);
2987 soc15_grbm_select(adev
, 0, 0, 0, 0);
2988 mutex_unlock(&adev
->srbm_mutex
);
2990 memset((void *)mqd
, 0, sizeof(struct v9_mqd_allocation
));
2991 ((struct v9_mqd_allocation
*)mqd
)->dynamic_cu_mask
= 0xFFFFFFFF;
2992 ((struct v9_mqd_allocation
*)mqd
)->dynamic_rb_mask
= 0xFFFFFFFF;
2993 mutex_lock(&adev
->srbm_mutex
);
2994 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2995 gfx_v9_0_mqd_init(ring
);
2996 gfx_v9_0_kiq_init_register(ring
);
2997 soc15_grbm_select(adev
, 0, 0, 0, 0);
2998 mutex_unlock(&adev
->srbm_mutex
);
3000 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3001 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(struct v9_mqd_allocation
));
3007 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring
*ring
)
3009 struct amdgpu_device
*adev
= ring
->adev
;
3010 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
3011 int mqd_idx
= ring
- &adev
->gfx
.compute_ring
[0];
3013 if (!adev
->in_gpu_reset
&& !adev
->gfx
.in_suspend
) {
3014 memset((void *)mqd
, 0, sizeof(struct v9_mqd_allocation
));
3015 ((struct v9_mqd_allocation
*)mqd
)->dynamic_cu_mask
= 0xFFFFFFFF;
3016 ((struct v9_mqd_allocation
*)mqd
)->dynamic_rb_mask
= 0xFFFFFFFF;
3017 mutex_lock(&adev
->srbm_mutex
);
3018 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3019 gfx_v9_0_mqd_init(ring
);
3020 soc15_grbm_select(adev
, 0, 0, 0, 0);
3021 mutex_unlock(&adev
->srbm_mutex
);
3023 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3024 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(struct v9_mqd_allocation
));
3025 } else if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
3026 /* reset MQD to a clean status */
3027 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3028 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(struct v9_mqd_allocation
));
3030 /* reset ring buffer */
3032 amdgpu_ring_clear_ring(ring
);
3034 amdgpu_ring_clear_ring(ring
);
3040 static int gfx_v9_0_kiq_resume(struct amdgpu_device
*adev
)
3042 struct amdgpu_ring
*ring
= NULL
;
3045 gfx_v9_0_cp_compute_enable(adev
, true);
3047 ring
= &adev
->gfx
.kiq
.ring
;
3049 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3050 if (unlikely(r
!= 0))
3053 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3055 r
= gfx_v9_0_kiq_init_queue(ring
);
3056 amdgpu_bo_kunmap(ring
->mqd_obj
);
3057 ring
->mqd_ptr
= NULL
;
3059 amdgpu_bo_unreserve(ring
->mqd_obj
);
3063 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3064 ring
= &adev
->gfx
.compute_ring
[i
];
3066 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3067 if (unlikely(r
!= 0))
3069 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3071 r
= gfx_v9_0_kcq_init_queue(ring
);
3072 amdgpu_bo_kunmap(ring
->mqd_obj
);
3073 ring
->mqd_ptr
= NULL
;
3075 amdgpu_bo_unreserve(ring
->mqd_obj
);
3080 r
= gfx_v9_0_kiq_kcq_enable(adev
);
3085 static int gfx_v9_0_cp_resume(struct amdgpu_device
*adev
)
3088 struct amdgpu_ring
*ring
;
3090 if (!(adev
->flags
& AMD_IS_APU
))
3091 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
3093 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
3094 /* legacy firmware loading */
3095 r
= gfx_v9_0_cp_gfx_load_microcode(adev
);
3099 r
= gfx_v9_0_cp_compute_load_microcode(adev
);
3104 r
= gfx_v9_0_cp_gfx_resume(adev
);
3108 r
= gfx_v9_0_kiq_resume(adev
);
3112 ring
= &adev
->gfx
.gfx_ring
[0];
3113 r
= amdgpu_ring_test_ring(ring
);
3115 ring
->ready
= false;
3119 ring
= &adev
->gfx
.kiq
.ring
;
3121 r
= amdgpu_ring_test_ring(ring
);
3123 ring
->ready
= false;
3125 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3126 ring
= &adev
->gfx
.compute_ring
[i
];
3129 r
= amdgpu_ring_test_ring(ring
);
3131 ring
->ready
= false;
3134 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
3139 static void gfx_v9_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
3141 gfx_v9_0_cp_gfx_enable(adev
, enable
);
3142 gfx_v9_0_cp_compute_enable(adev
, enable
);
3145 static int gfx_v9_0_hw_init(void *handle
)
3148 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3150 gfx_v9_0_init_golden_registers(adev
);
3152 gfx_v9_0_gpu_init(adev
);
3154 r
= gfx_v9_0_csb_vram_pin(adev
);
3158 r
= gfx_v9_0_rlc_resume(adev
);
3162 r
= gfx_v9_0_cp_resume(adev
);
3166 r
= gfx_v9_0_ngg_en(adev
);
3173 static int gfx_v9_0_kcq_disable(struct amdgpu_ring
*kiq_ring
,struct amdgpu_ring
*ring
)
3175 struct amdgpu_device
*adev
= kiq_ring
->adev
;
3176 uint32_t scratch
, tmp
= 0;
3179 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
3181 DRM_ERROR("Failed to get scratch reg (%d).\n", r
);
3184 WREG32(scratch
, 0xCAFEDEAD);
3186 r
= amdgpu_ring_alloc(kiq_ring
, 10);
3188 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
3189 amdgpu_gfx_scratch_free(adev
, scratch
);
3194 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_UNMAP_QUEUES
, 4));
3195 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3196 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3197 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3198 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3199 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3200 amdgpu_ring_write(kiq_ring
, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring
->doorbell_index
));
3201 amdgpu_ring_write(kiq_ring
, 0);
3202 amdgpu_ring_write(kiq_ring
, 0);
3203 amdgpu_ring_write(kiq_ring
, 0);
3204 /* write to scratch for completion */
3205 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
3206 amdgpu_ring_write(kiq_ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
3207 amdgpu_ring_write(kiq_ring
, 0xDEADBEEF);
3208 amdgpu_ring_commit(kiq_ring
);
3210 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3211 tmp
= RREG32(scratch
);
3212 if (tmp
== 0xDEADBEEF)
3216 if (i
>= adev
->usec_timeout
) {
3217 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch
, tmp
);
3220 amdgpu_gfx_scratch_free(adev
, scratch
);
3224 static int gfx_v9_0_hw_fini(void *handle
)
3226 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3229 amdgpu_device_ip_set_powergating_state(adev
, AMD_IP_BLOCK_TYPE_GFX
,
3230 AMD_PG_STATE_UNGATE
);
3232 amdgpu_irq_put(adev
, &adev
->gfx
.priv_reg_irq
, 0);
3233 amdgpu_irq_put(adev
, &adev
->gfx
.priv_inst_irq
, 0);
3235 /* disable KCQ to avoid CPC touch memory not valid anymore */
3236 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
3237 gfx_v9_0_kcq_disable(&adev
->gfx
.kiq
.ring
, &adev
->gfx
.compute_ring
[i
]);
3239 if (amdgpu_sriov_vf(adev
)) {
3240 gfx_v9_0_cp_gfx_enable(adev
, false);
3241 /* must disable polling for SRIOV when hw finished, otherwise
3242 * CPC engine may still keep fetching WB address which is already
3243 * invalid after sw finished and trigger DMAR reading error in
3246 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
3250 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3251 * otherwise KIQ is hanging when binding back
3253 if (!adev
->in_gpu_reset
&& !adev
->gfx
.in_suspend
) {
3254 mutex_lock(&adev
->srbm_mutex
);
3255 soc15_grbm_select(adev
, adev
->gfx
.kiq
.ring
.me
,
3256 adev
->gfx
.kiq
.ring
.pipe
,
3257 adev
->gfx
.kiq
.ring
.queue
, 0);
3258 gfx_v9_0_kiq_fini_register(&adev
->gfx
.kiq
.ring
);
3259 soc15_grbm_select(adev
, 0, 0, 0, 0);
3260 mutex_unlock(&adev
->srbm_mutex
);
3263 gfx_v9_0_cp_enable(adev
, false);
3264 gfx_v9_0_rlc_stop(adev
);
3266 gfx_v9_0_csb_vram_unpin(adev
);
3271 static int gfx_v9_0_suspend(void *handle
)
3273 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3275 adev
->gfx
.in_suspend
= true;
3276 return gfx_v9_0_hw_fini(adev
);
3279 static int gfx_v9_0_resume(void *handle
)
3281 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3284 r
= gfx_v9_0_hw_init(adev
);
3285 adev
->gfx
.in_suspend
= false;
3289 static bool gfx_v9_0_is_idle(void *handle
)
3291 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3293 if (REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmGRBM_STATUS
),
3294 GRBM_STATUS
, GUI_ACTIVE
))
3300 static int gfx_v9_0_wait_for_idle(void *handle
)
3303 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3305 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3306 if (gfx_v9_0_is_idle(handle
))
3313 static int gfx_v9_0_soft_reset(void *handle
)
3315 u32 grbm_soft_reset
= 0;
3317 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3320 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
);
3321 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
3322 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
3323 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__VGT_BUSY_MASK
|
3324 GRBM_STATUS__DB_BUSY_MASK
| GRBM_STATUS__CB_BUSY_MASK
|
3325 GRBM_STATUS__GDS_BUSY_MASK
| GRBM_STATUS__SPI_BUSY_MASK
|
3326 GRBM_STATUS__IA_BUSY_MASK
| GRBM_STATUS__IA_BUSY_NO_DMA_MASK
)) {
3327 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3328 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
3329 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3330 GRBM_SOFT_RESET
, SOFT_RESET_GFX
, 1);
3333 if (tmp
& (GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
3334 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3335 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
3339 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS2
);
3340 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
3341 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3342 GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
3345 if (grbm_soft_reset
) {
3347 gfx_v9_0_rlc_stop(adev
);
3349 /* Disable GFX parsing/prefetching */
3350 gfx_v9_0_cp_gfx_enable(adev
, false);
3352 /* Disable MEC parsing/prefetching */
3353 gfx_v9_0_cp_compute_enable(adev
, false);
3355 if (grbm_soft_reset
) {
3356 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3357 tmp
|= grbm_soft_reset
;
3358 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
3359 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3360 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3364 tmp
&= ~grbm_soft_reset
;
3365 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3366 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3369 /* Wait a little for things to settle down */
3375 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
3379 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
3380 WREG32_SOC15(GC
, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
3381 clock
= (uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_LSB
) |
3382 ((uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
3383 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
3387 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
3389 uint32_t gds_base
, uint32_t gds_size
,
3390 uint32_t gws_base
, uint32_t gws_size
,
3391 uint32_t oa_base
, uint32_t oa_size
)
3393 struct amdgpu_device
*adev
= ring
->adev
;
3395 gds_base
= gds_base
>> AMDGPU_GDS_SHIFT
;
3396 gds_size
= gds_size
>> AMDGPU_GDS_SHIFT
;
3398 gws_base
= gws_base
>> AMDGPU_GWS_SHIFT
;
3399 gws_size
= gws_size
>> AMDGPU_GWS_SHIFT
;
3401 oa_base
= oa_base
>> AMDGPU_OA_SHIFT
;
3402 oa_size
= oa_size
>> AMDGPU_OA_SHIFT
;
3405 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3406 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_BASE
) + 2 * vmid
,
3410 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3411 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
) + 2 * vmid
,
3415 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3416 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID0
) + vmid
,
3417 gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
3420 gfx_v9_0_write_data_to_reg(ring
, 0, false,
3421 SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID0
) + vmid
,
3422 (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
3425 static int gfx_v9_0_early_init(void *handle
)
3427 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3429 adev
->gfx
.num_gfx_rings
= GFX9_NUM_GFX_RINGS
;
3430 adev
->gfx
.num_compute_rings
= AMDGPU_MAX_COMPUTE_RINGS
;
3431 gfx_v9_0_set_ring_funcs(adev
);
3432 gfx_v9_0_set_irq_funcs(adev
);
3433 gfx_v9_0_set_gds_init(adev
);
3434 gfx_v9_0_set_rlc_funcs(adev
);
3439 static int gfx_v9_0_late_init(void *handle
)
3441 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3444 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_reg_irq
, 0);
3448 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_inst_irq
, 0);
3455 static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device
*adev
)
3457 uint32_t rlc_setting
, data
;
3460 if (adev
->gfx
.rlc
.in_safe_mode
)
3463 /* if RLC is not enabled, do nothing */
3464 rlc_setting
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
3465 if (!(rlc_setting
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
3468 if (adev
->cg_flags
&
3469 (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
|
3470 AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
3471 data
= RLC_SAFE_MODE__CMD_MASK
;
3472 data
|= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT
);
3473 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
3475 /* wait for RLC_SAFE_MODE */
3476 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3477 if (!REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
3481 adev
->gfx
.rlc
.in_safe_mode
= true;
3485 static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device
*adev
)
3487 uint32_t rlc_setting
, data
;
3489 if (!adev
->gfx
.rlc
.in_safe_mode
)
3492 /* if RLC is not enabled, do nothing */
3493 rlc_setting
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
3494 if (!(rlc_setting
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
3497 if (adev
->cg_flags
&
3498 (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
)) {
3500 * Try to exit safe mode only if it is already in safe
3503 data
= RLC_SAFE_MODE__CMD_MASK
;
3504 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
3505 adev
->gfx
.rlc
.in_safe_mode
= false;
3509 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device
*adev
,
3512 gfx_v9_0_enter_rlc_safe_mode(adev
);
3514 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_PG
) && enable
) {
3515 gfx_v9_0_enable_gfx_cg_power_gating(adev
, true);
3516 if (adev
->pg_flags
& AMD_PG_SUPPORT_GFX_PIPELINE
)
3517 gfx_v9_0_enable_gfx_pipeline_powergating(adev
, true);
3519 gfx_v9_0_enable_gfx_cg_power_gating(adev
, false);
3520 gfx_v9_0_enable_gfx_pipeline_powergating(adev
, false);
3523 gfx_v9_0_exit_rlc_safe_mode(adev
);
3526 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device
*adev
,
3529 /* TODO: double check if we need to perform under safe mode */
3530 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3532 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_SMG
) && enable
)
3533 gfx_v9_0_enable_gfx_static_mg_power_gating(adev
, true);
3535 gfx_v9_0_enable_gfx_static_mg_power_gating(adev
, false);
3537 if ((adev
->pg_flags
& AMD_PG_SUPPORT_GFX_DMG
) && enable
)
3538 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev
, true);
3540 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev
, false);
3542 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3545 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device
*adev
,
3550 /* It is disabled by HW by default */
3551 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
)) {
3552 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3553 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3555 if (adev
->asic_type
!= CHIP_VEGA12
)
3556 data
&= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
;
3558 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
3559 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
3560 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
3562 /* only for Vega10 & Raven1 */
3563 data
|= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
;
3566 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3568 /* MGLS is a global flag to control all MGLS in GFX */
3569 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
3570 /* 2 - RLC memory Light sleep */
3571 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
) {
3572 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
3573 data
|= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
3575 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
3577 /* 3 - CP memory Light sleep */
3578 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
) {
3579 def
= data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
3580 data
|= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
3582 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
3586 /* 1 - MGCG_OVERRIDE */
3587 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3589 if (adev
->asic_type
!= CHIP_VEGA12
)
3590 data
|= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
;
3592 data
|= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
|
3593 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
3594 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
3595 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
3598 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3600 /* 2 - disable MGLS in RLC */
3601 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
3602 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
) {
3603 data
&= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
3604 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
3607 /* 3 - disable MGLS in CP */
3608 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
3609 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
) {
3610 data
&= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
3611 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
3616 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device
*adev
,
3621 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
3623 /* Enable 3D CGCG/CGLS */
3624 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
3625 /* write cmd to clear cgcg/cgls ov */
3626 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3627 /* unset CGCG override */
3628 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK
;
3629 /* update CGCG and CGLS override bits */
3631 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3633 /* enable 3Dcgcg FSM(0x0000363f) */
3634 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
3636 data
= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
3637 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
;
3638 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGLS
)
3639 data
|= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
3640 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
;
3642 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
3644 /* set IDLE_POLL_COUNT(0x00900100) */
3645 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
3646 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
3647 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
3649 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
3651 /* Disable CGCG/CGLS */
3652 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
3653 /* disable cgcg, cgls should be disabled */
3654 data
&= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
|
3655 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
);
3656 /* disable cgcg and cgls in FSM */
3658 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
3661 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
3664 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device
*adev
,
3669 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
3671 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
)) {
3672 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3673 /* unset CGCG override */
3674 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK
;
3675 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
3676 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
3678 data
|= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
3679 /* update CGCG and CGLS override bits */
3681 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
3683 /* enable cgcg FSM(0x0000363F) */
3684 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
3686 data
= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
3687 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
;
3688 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
3689 data
|= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
3690 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
3692 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
3694 /* set IDLE_POLL_COUNT(0x00900100) */
3695 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
3696 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
3697 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
3699 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
3701 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
3702 /* reset CGCG/CGLS bits */
3703 data
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
| RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
3704 /* disable cgcg and cgls in FSM */
3706 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
3709 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
3712 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device
*adev
,
3716 /* CGCG/CGLS should be enabled after MGCG/MGLS
3717 * === MGCG + MGLS ===
3719 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
3720 /* === CGCG /CGLS for GFX 3D Only === */
3721 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
3722 /* === CGCG + CGLS === */
3723 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
3725 /* CGCG/CGLS should be disabled before MGCG/MGLS
3726 * === CGCG + CGLS ===
3728 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
3729 /* === CGCG /CGLS for GFX 3D Only === */
3730 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
3731 /* === MGCG + MGLS === */
3732 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
3737 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs
= {
3738 .enter_safe_mode
= gfx_v9_0_enter_rlc_safe_mode
,
3739 .exit_safe_mode
= gfx_v9_0_exit_rlc_safe_mode
3742 static int gfx_v9_0_set_powergating_state(void *handle
,
3743 enum amd_powergating_state state
)
3745 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3746 bool enable
= (state
== AMD_PG_STATE_GATE
) ? true : false;
3748 switch (adev
->asic_type
) {
3750 if (adev
->pg_flags
& AMD_PG_SUPPORT_RLC_SMU_HS
) {
3751 gfx_v9_0_enable_sck_slow_down_on_power_up(adev
, true);
3752 gfx_v9_0_enable_sck_slow_down_on_power_down(adev
, true);
3754 gfx_v9_0_enable_sck_slow_down_on_power_up(adev
, false);
3755 gfx_v9_0_enable_sck_slow_down_on_power_down(adev
, false);
3758 if (adev
->pg_flags
& AMD_PG_SUPPORT_CP
)
3759 gfx_v9_0_enable_cp_power_gating(adev
, true);
3761 gfx_v9_0_enable_cp_power_gating(adev
, false);
3763 /* update gfx cgpg state */
3764 gfx_v9_0_update_gfx_cg_power_gating(adev
, enable
);
3766 /* update mgcg state */
3767 gfx_v9_0_update_gfx_mg_power_gating(adev
, enable
);
3769 /* set gfx off through smu */
3770 if (enable
&& adev
->powerplay
.pp_funcs
->set_powergating_by_smu
)
3771 amdgpu_dpm_set_powergating_by_smu(adev
, AMD_IP_BLOCK_TYPE_GFX
, true);
3774 /* set gfx off through smu */
3775 if (enable
&& adev
->powerplay
.pp_funcs
->set_powergating_by_smu
)
3776 amdgpu_dpm_set_powergating_by_smu(adev
, AMD_IP_BLOCK_TYPE_GFX
, true);
3785 static int gfx_v9_0_set_clockgating_state(void *handle
,
3786 enum amd_clockgating_state state
)
3788 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3790 if (amdgpu_sriov_vf(adev
))
3793 switch (adev
->asic_type
) {
3798 gfx_v9_0_update_gfx_clock_gating(adev
,
3799 state
== AMD_CG_STATE_GATE
? true : false);
3807 static void gfx_v9_0_get_clockgating_state(void *handle
, u32
*flags
)
3809 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3812 if (amdgpu_sriov_vf(adev
))
3815 /* AMD_CG_SUPPORT_GFX_MGCG */
3816 data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
3817 if (!(data
& RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
))
3818 *flags
|= AMD_CG_SUPPORT_GFX_MGCG
;
3820 /* AMD_CG_SUPPORT_GFX_CGCG */
3821 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
3822 if (data
& RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
)
3823 *flags
|= AMD_CG_SUPPORT_GFX_CGCG
;
3825 /* AMD_CG_SUPPORT_GFX_CGLS */
3826 if (data
& RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
)
3827 *flags
|= AMD_CG_SUPPORT_GFX_CGLS
;
3829 /* AMD_CG_SUPPORT_GFX_RLC_LS */
3830 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
3831 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
)
3832 *flags
|= AMD_CG_SUPPORT_GFX_RLC_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
3834 /* AMD_CG_SUPPORT_GFX_CP_LS */
3835 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
3836 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
)
3837 *flags
|= AMD_CG_SUPPORT_GFX_CP_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
3839 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
3840 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
3841 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
)
3842 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGCG
;
3844 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
3845 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
)
3846 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGLS
;
3849 static u64
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring
*ring
)
3851 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 is 32bit rptr*/
3854 static u64
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
3856 struct amdgpu_device
*adev
= ring
->adev
;
3859 /* XXX check if swapping is necessary on BE */
3860 if (ring
->use_doorbell
) {
3861 wptr
= atomic64_read((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
]);
3863 wptr
= RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
);
3864 wptr
+= (u64
)RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
) << 32;
3870 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
3872 struct amdgpu_device
*adev
= ring
->adev
;
3874 if (ring
->use_doorbell
) {
3875 /* XXX check if swapping is necessary on BE */
3876 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
3877 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
3879 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
3880 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
3884 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring
*ring
)
3886 struct amdgpu_device
*adev
= ring
->adev
;
3887 u32 ref_and_mask
, reg_mem_engine
;
3888 const struct nbio_hdp_flush_reg
*nbio_hf_reg
= adev
->nbio_funcs
->hdp_flush_reg
;
3890 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
3893 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp2
<< ring
->pipe
;
3896 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp6
<< ring
->pipe
;
3903 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp0
;
3904 reg_mem_engine
= 1; /* pfp */
3907 gfx_v9_0_wait_reg_mem(ring
, reg_mem_engine
, 0, 1,
3908 adev
->nbio_funcs
->get_hdp_flush_req_offset(adev
),
3909 adev
->nbio_funcs
->get_hdp_flush_done_offset(adev
),
3910 ref_and_mask
, ref_and_mask
, 0x20);
3913 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring
*ring
,
3914 struct amdgpu_ib
*ib
,
3915 unsigned vmid
, bool ctx_switch
)
3917 u32 header
, control
= 0;
3919 if (ib
->flags
& AMDGPU_IB_FLAG_CE
)
3920 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
3922 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
3924 control
|= ib
->length_dw
| (vmid
<< 24);
3926 if (amdgpu_sriov_vf(ring
->adev
) && (ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
)) {
3927 control
|= INDIRECT_BUFFER_PRE_ENB(1);
3929 if (!(ib
->flags
& AMDGPU_IB_FLAG_CE
))
3930 gfx_v9_0_ring_emit_de_meta(ring
);
3933 amdgpu_ring_write(ring
, header
);
3934 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
3935 amdgpu_ring_write(ring
,
3939 lower_32_bits(ib
->gpu_addr
));
3940 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
3941 amdgpu_ring_write(ring
, control
);
3944 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring
*ring
,
3945 struct amdgpu_ib
*ib
,
3946 unsigned vmid
, bool ctx_switch
)
3948 u32 control
= INDIRECT_BUFFER_VALID
| ib
->length_dw
| (vmid
<< 24);
3950 amdgpu_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
3951 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
3952 amdgpu_ring_write(ring
,
3956 lower_32_bits(ib
->gpu_addr
));
3957 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
3958 amdgpu_ring_write(ring
, control
);
3961 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
,
3962 u64 seq
, unsigned flags
)
3964 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
3965 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
3966 bool writeback
= flags
& AMDGPU_FENCE_FLAG_TC_WB_ONLY
;
3968 /* RELEASE_MEM - flush caches, send int */
3969 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 6));
3970 amdgpu_ring_write(ring
, ((writeback
? (EOP_TC_WB_ACTION_EN
|
3971 EOP_TC_NC_ACTION_EN
) :
3972 (EOP_TCL1_ACTION_EN
|
3974 EOP_TC_WB_ACTION_EN
|
3975 EOP_TC_MD_ACTION_EN
)) |
3976 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
3978 amdgpu_ring_write(ring
, DATA_SEL(write64bit
? 2 : 1) | INT_SEL(int_sel
? 2 : 0));
3981 * the address should be Qword aligned if 64bit write, Dword
3982 * aligned if only send 32bit data low (discard data high)
3988 amdgpu_ring_write(ring
, lower_32_bits(addr
));
3989 amdgpu_ring_write(ring
, upper_32_bits(addr
));
3990 amdgpu_ring_write(ring
, lower_32_bits(seq
));
3991 amdgpu_ring_write(ring
, upper_32_bits(seq
));
3992 amdgpu_ring_write(ring
, 0);
3995 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
3997 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
3998 uint32_t seq
= ring
->fence_drv
.sync_seq
;
3999 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
4001 gfx_v9_0_wait_reg_mem(ring
, usepfp
, 1, 0,
4002 lower_32_bits(addr
), upper_32_bits(addr
),
4003 seq
, 0xffffffff, 4);
4006 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
4007 unsigned vmid
, uint64_t pd_addr
)
4009 amdgpu_gmc_emit_flush_gpu_tlb(ring
, vmid
, pd_addr
);
4011 /* compute doesn't have PFP */
4012 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
) {
4013 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4014 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
4015 amdgpu_ring_write(ring
, 0x0);
4019 static u64
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring
*ring
)
4021 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 hardware is 32bit rptr */
4024 static u64
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
4028 /* XXX check if swapping is necessary on BE */
4029 if (ring
->use_doorbell
)
4030 wptr
= atomic64_read((atomic64_t
*)&ring
->adev
->wb
.wb
[ring
->wptr_offs
]);
4036 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring
*ring
,
4039 struct amdgpu_device
*adev
= ring
->adev
;
4040 int pipe_num
, tmp
, reg
;
4041 int pipe_percent
= acquire
? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK
: 0x1;
4043 pipe_num
= ring
->me
* adev
->gfx
.mec
.num_pipe_per_mec
+ ring
->pipe
;
4045 /* first me only has 2 entries, GFX and HP3D */
4049 reg
= SOC15_REG_OFFSET(GC
, 0, mmSPI_WCL_PIPE_PERCENT_GFX
) + pipe_num
;
4051 tmp
= REG_SET_FIELD(tmp
, SPI_WCL_PIPE_PERCENT_GFX
, VALUE
, pipe_percent
);
4055 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device
*adev
,
4056 struct amdgpu_ring
*ring
,
4061 struct amdgpu_ring
*iring
;
4063 mutex_lock(&adev
->gfx
.pipe_reserve_mutex
);
4064 pipe
= amdgpu_gfx_queue_to_bit(adev
, ring
->me
, ring
->pipe
, 0);
4066 set_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4068 clear_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4070 if (!bitmap_weight(adev
->gfx
.pipe_reserve_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
)) {
4071 /* Clear all reservations - everyone reacquires all resources */
4072 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; ++i
)
4073 gfx_v9_0_ring_set_pipe_percent(&adev
->gfx
.gfx_ring
[i
],
4076 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; ++i
)
4077 gfx_v9_0_ring_set_pipe_percent(&adev
->gfx
.compute_ring
[i
],
4080 /* Lower all pipes without a current reservation */
4081 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; ++i
) {
4082 iring
= &adev
->gfx
.gfx_ring
[i
];
4083 pipe
= amdgpu_gfx_queue_to_bit(adev
,
4087 reserve
= test_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4088 gfx_v9_0_ring_set_pipe_percent(iring
, reserve
);
4091 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; ++i
) {
4092 iring
= &adev
->gfx
.compute_ring
[i
];
4093 pipe
= amdgpu_gfx_queue_to_bit(adev
,
4097 reserve
= test_bit(pipe
, adev
->gfx
.pipe_reserve_bitmap
);
4098 gfx_v9_0_ring_set_pipe_percent(iring
, reserve
);
4102 mutex_unlock(&adev
->gfx
.pipe_reserve_mutex
);
4105 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device
*adev
,
4106 struct amdgpu_ring
*ring
,
4109 uint32_t pipe_priority
= acquire
? 0x2 : 0x0;
4110 uint32_t queue_priority
= acquire
? 0xf : 0x0;
4112 mutex_lock(&adev
->srbm_mutex
);
4113 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
4115 WREG32_SOC15(GC
, 0, mmCP_HQD_PIPE_PRIORITY
, pipe_priority
);
4116 WREG32_SOC15(GC
, 0, mmCP_HQD_QUEUE_PRIORITY
, queue_priority
);
4118 soc15_grbm_select(adev
, 0, 0, 0, 0);
4119 mutex_unlock(&adev
->srbm_mutex
);
4122 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring
*ring
,
4123 enum drm_sched_priority priority
)
4125 struct amdgpu_device
*adev
= ring
->adev
;
4126 bool acquire
= priority
== DRM_SCHED_PRIORITY_HIGH_HW
;
4128 if (ring
->funcs
->type
!= AMDGPU_RING_TYPE_COMPUTE
)
4131 gfx_v9_0_hqd_set_priority(adev
, ring
, acquire
);
4132 gfx_v9_0_pipe_reserve_resources(adev
, ring
, acquire
);
4135 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
4137 struct amdgpu_device
*adev
= ring
->adev
;
4139 /* XXX check if swapping is necessary on BE */
4140 if (ring
->use_doorbell
) {
4141 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
4142 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
4144 BUG(); /* only DOORBELL method supported on gfx9 now */
4148 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring
*ring
, u64 addr
,
4149 u64 seq
, unsigned int flags
)
4151 struct amdgpu_device
*adev
= ring
->adev
;
4153 /* we only allocate 32bit for each seq wb address */
4154 BUG_ON(flags
& AMDGPU_FENCE_FLAG_64BIT
);
4156 /* write fence seq to the "addr" */
4157 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4158 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4159 WRITE_DATA_DST_SEL(5) | WR_CONFIRM
));
4160 amdgpu_ring_write(ring
, lower_32_bits(addr
));
4161 amdgpu_ring_write(ring
, upper_32_bits(addr
));
4162 amdgpu_ring_write(ring
, lower_32_bits(seq
));
4164 if (flags
& AMDGPU_FENCE_FLAG_INT
) {
4165 /* set register to trigger INT */
4166 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4167 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4168 WRITE_DATA_DST_SEL(0) | WR_CONFIRM
));
4169 amdgpu_ring_write(ring
, SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_STATUS
));
4170 amdgpu_ring_write(ring
, 0);
4171 amdgpu_ring_write(ring
, 0x20000000); /* src_id is 178 */
4175 static void gfx_v9_ring_emit_sb(struct amdgpu_ring
*ring
)
4177 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
4178 amdgpu_ring_write(ring
, 0);
4181 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
)
4183 struct v9_ce_ib_state ce_payload
= {0};
4187 cnt
= (sizeof(ce_payload
) >> 2) + 4 - 2;
4188 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4190 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4191 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
4192 WRITE_DATA_DST_SEL(8) |
4194 WRITE_DATA_CACHE_POLICY(0));
4195 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
4196 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
4197 amdgpu_ring_write_multiple(ring
, (void *)&ce_payload
, sizeof(ce_payload
) >> 2);
4200 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring
*ring
)
4202 struct v9_de_ib_state de_payload
= {0};
4203 uint64_t csa_addr
, gds_addr
;
4206 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4207 gds_addr
= csa_addr
+ 4096;
4208 de_payload
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
4209 de_payload
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
4211 cnt
= (sizeof(de_payload
) >> 2) + 4 - 2;
4212 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4213 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(1) |
4214 WRITE_DATA_DST_SEL(8) |
4216 WRITE_DATA_CACHE_POLICY(0));
4217 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
4218 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
4219 amdgpu_ring_write_multiple(ring
, (void *)&de_payload
, sizeof(de_payload
) >> 2);
4222 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring
*ring
, bool start
)
4224 amdgpu_ring_write(ring
, PACKET3(PACKET3_FRAME_CONTROL
, 0));
4225 amdgpu_ring_write(ring
, FRAME_CMD(start
? 0 : 1)); /* frame_end */
4228 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring
*ring
, uint32_t flags
)
4232 if (amdgpu_sriov_vf(ring
->adev
))
4233 gfx_v9_0_ring_emit_ce_meta(ring
);
4235 gfx_v9_0_ring_emit_tmz(ring
, true);
4237 dw2
|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4238 if (flags
& AMDGPU_HAVE_CTX_SWITCH
) {
4239 /* set load_global_config & load_global_uconfig */
4241 /* set load_cs_sh_regs */
4243 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4246 /* set load_ce_ram if preamble presented */
4247 if (AMDGPU_PREAMBLE_IB_PRESENT
& flags
)
4250 /* still load_ce_ram if this is the first time preamble presented
4251 * although there is no context switch happens.
4253 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST
& flags
)
4257 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
4258 amdgpu_ring_write(ring
, dw2
);
4259 amdgpu_ring_write(ring
, 0);
4262 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring
*ring
)
4265 amdgpu_ring_write(ring
, PACKET3(PACKET3_COND_EXEC
, 3));
4266 amdgpu_ring_write(ring
, lower_32_bits(ring
->cond_exe_gpu_addr
));
4267 amdgpu_ring_write(ring
, upper_32_bits(ring
->cond_exe_gpu_addr
));
4268 amdgpu_ring_write(ring
, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4269 ret
= ring
->wptr
& ring
->buf_mask
;
4270 amdgpu_ring_write(ring
, 0x55aa55aa); /* patch dummy value later */
4274 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring
*ring
, unsigned offset
)
4277 BUG_ON(offset
> ring
->buf_mask
);
4278 BUG_ON(ring
->ring
[offset
] != 0x55aa55aa);
4280 cur
= (ring
->wptr
& ring
->buf_mask
) - 1;
4281 if (likely(cur
> offset
))
4282 ring
->ring
[offset
] = cur
- offset
;
4284 ring
->ring
[offset
] = (ring
->ring_size
>>2) - offset
+ cur
;
4287 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring
*ring
, uint32_t reg
)
4289 struct amdgpu_device
*adev
= ring
->adev
;
4291 amdgpu_ring_write(ring
, PACKET3(PACKET3_COPY_DATA
, 4));
4292 amdgpu_ring_write(ring
, 0 | /* src: register*/
4293 (5 << 8) | /* dst: memory */
4294 (1 << 20)); /* write confirm */
4295 amdgpu_ring_write(ring
, reg
);
4296 amdgpu_ring_write(ring
, 0);
4297 amdgpu_ring_write(ring
, lower_32_bits(adev
->wb
.gpu_addr
+
4298 adev
->virt
.reg_val_offs
* 4));
4299 amdgpu_ring_write(ring
, upper_32_bits(adev
->wb
.gpu_addr
+
4300 adev
->virt
.reg_val_offs
* 4));
4303 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring
*ring
, uint32_t reg
,
4308 switch (ring
->funcs
->type
) {
4309 case AMDGPU_RING_TYPE_GFX
:
4310 cmd
= WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM
;
4312 case AMDGPU_RING_TYPE_KIQ
:
4313 cmd
= (1 << 16); /* no inc addr */
4319 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4320 amdgpu_ring_write(ring
, cmd
);
4321 amdgpu_ring_write(ring
, reg
);
4322 amdgpu_ring_write(ring
, 0);
4323 amdgpu_ring_write(ring
, val
);
4326 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring
*ring
, uint32_t reg
,
4327 uint32_t val
, uint32_t mask
)
4329 gfx_v9_0_wait_reg_mem(ring
, 0, 0, 0, reg
, 0, val
, mask
, 0x20);
4332 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring
*ring
,
4333 uint32_t reg0
, uint32_t reg1
,
4334 uint32_t ref
, uint32_t mask
)
4336 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
4338 if (amdgpu_sriov_vf(ring
->adev
))
4339 gfx_v9_0_wait_reg_mem(ring
, usepfp
, 0, 1, reg0
, reg1
,
4342 amdgpu_ring_emit_reg_write_reg_wait_helper(ring
, reg0
, reg1
,
4346 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
4347 enum amdgpu_interrupt_state state
)
4350 case AMDGPU_IRQ_STATE_DISABLE
:
4351 case AMDGPU_IRQ_STATE_ENABLE
:
4352 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
4353 TIME_STAMP_INT_ENABLE
,
4354 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
4361 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
4363 enum amdgpu_interrupt_state state
)
4365 u32 mec_int_cntl
, mec_int_cntl_reg
;
4368 * amdgpu controls only the first MEC. That's why this function only
4369 * handles the setting of interrupts for this specific MEC. All other
4370 * pipes' interrupts are set by amdkfd.
4376 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
4379 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE1_INT_CNTL
);
4382 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE2_INT_CNTL
);
4385 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE3_INT_CNTL
);
4388 DRM_DEBUG("invalid pipe %d\n", pipe
);
4392 DRM_DEBUG("invalid me %d\n", me
);
4397 case AMDGPU_IRQ_STATE_DISABLE
:
4398 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
4399 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
4400 TIME_STAMP_INT_ENABLE
, 0);
4401 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
4403 case AMDGPU_IRQ_STATE_ENABLE
:
4404 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
4405 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
4406 TIME_STAMP_INT_ENABLE
, 1);
4407 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
4414 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
4415 struct amdgpu_irq_src
*source
,
4417 enum amdgpu_interrupt_state state
)
4420 case AMDGPU_IRQ_STATE_DISABLE
:
4421 case AMDGPU_IRQ_STATE_ENABLE
:
4422 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
4423 PRIV_REG_INT_ENABLE
,
4424 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
4433 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
4434 struct amdgpu_irq_src
*source
,
4436 enum amdgpu_interrupt_state state
)
4439 case AMDGPU_IRQ_STATE_DISABLE
:
4440 case AMDGPU_IRQ_STATE_ENABLE
:
4441 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
4442 PRIV_INSTR_INT_ENABLE
,
4443 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
4451 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
4452 struct amdgpu_irq_src
*src
,
4454 enum amdgpu_interrupt_state state
)
4457 case AMDGPU_CP_IRQ_GFX_EOP
:
4458 gfx_v9_0_set_gfx_eop_interrupt_state(adev
, state
);
4460 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
4461 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
4463 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
4464 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
4466 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
4467 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
4469 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
4470 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
4472 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
4473 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
4475 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
4476 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
4478 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
4479 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
4481 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
4482 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
4490 static int gfx_v9_0_eop_irq(struct amdgpu_device
*adev
,
4491 struct amdgpu_irq_src
*source
,
4492 struct amdgpu_iv_entry
*entry
)
4495 u8 me_id
, pipe_id
, queue_id
;
4496 struct amdgpu_ring
*ring
;
4498 DRM_DEBUG("IH: CP EOP\n");
4499 me_id
= (entry
->ring_id
& 0x0c) >> 2;
4500 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
4501 queue_id
= (entry
->ring_id
& 0x70) >> 4;
4505 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
4509 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4510 ring
= &adev
->gfx
.compute_ring
[i
];
4511 /* Per-queue interrupt is supported for MEC starting from VI.
4512 * The interrupt can only be enabled/disabled per pipe instead of per queue.
4514 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
4515 amdgpu_fence_process(ring
);
4522 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device
*adev
,
4523 struct amdgpu_irq_src
*source
,
4524 struct amdgpu_iv_entry
*entry
)
4526 DRM_ERROR("Illegal register access in command stream\n");
4527 schedule_work(&adev
->reset_work
);
4531 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device
*adev
,
4532 struct amdgpu_irq_src
*source
,
4533 struct amdgpu_iv_entry
*entry
)
4535 DRM_ERROR("Illegal instruction in command stream\n");
4536 schedule_work(&adev
->reset_work
);
4540 static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device
*adev
,
4541 struct amdgpu_irq_src
*src
,
4543 enum amdgpu_interrupt_state state
)
4545 uint32_t tmp
, target
;
4546 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
4549 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
4551 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME2_PIPE0_INT_CNTL
);
4552 target
+= ring
->pipe
;
4555 case AMDGPU_CP_KIQ_IRQ_DRIVER0
:
4556 if (state
== AMDGPU_IRQ_STATE_DISABLE
) {
4557 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
4558 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
4559 GENERIC2_INT_ENABLE
, 0);
4560 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
4562 tmp
= RREG32(target
);
4563 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
4564 GENERIC2_INT_ENABLE
, 0);
4565 WREG32(target
, tmp
);
4567 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
4568 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
4569 GENERIC2_INT_ENABLE
, 1);
4570 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
4572 tmp
= RREG32(target
);
4573 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
4574 GENERIC2_INT_ENABLE
, 1);
4575 WREG32(target
, tmp
);
4579 BUG(); /* kiq only support GENERIC2_INT now */
4585 static int gfx_v9_0_kiq_irq(struct amdgpu_device
*adev
,
4586 struct amdgpu_irq_src
*source
,
4587 struct amdgpu_iv_entry
*entry
)
4589 u8 me_id
, pipe_id
, queue_id
;
4590 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
4592 me_id
= (entry
->ring_id
& 0x0c) >> 2;
4593 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
4594 queue_id
= (entry
->ring_id
& 0x70) >> 4;
4595 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
4596 me_id
, pipe_id
, queue_id
);
4598 amdgpu_fence_process(ring
);
4602 static const struct amd_ip_funcs gfx_v9_0_ip_funcs
= {
4604 .early_init
= gfx_v9_0_early_init
,
4605 .late_init
= gfx_v9_0_late_init
,
4606 .sw_init
= gfx_v9_0_sw_init
,
4607 .sw_fini
= gfx_v9_0_sw_fini
,
4608 .hw_init
= gfx_v9_0_hw_init
,
4609 .hw_fini
= gfx_v9_0_hw_fini
,
4610 .suspend
= gfx_v9_0_suspend
,
4611 .resume
= gfx_v9_0_resume
,
4612 .is_idle
= gfx_v9_0_is_idle
,
4613 .wait_for_idle
= gfx_v9_0_wait_for_idle
,
4614 .soft_reset
= gfx_v9_0_soft_reset
,
4615 .set_clockgating_state
= gfx_v9_0_set_clockgating_state
,
4616 .set_powergating_state
= gfx_v9_0_set_powergating_state
,
4617 .get_clockgating_state
= gfx_v9_0_get_clockgating_state
,
4620 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx
= {
4621 .type
= AMDGPU_RING_TYPE_GFX
,
4623 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
4624 .support_64bit_ptrs
= true,
4625 .vmhub
= AMDGPU_GFXHUB
,
4626 .get_rptr
= gfx_v9_0_ring_get_rptr_gfx
,
4627 .get_wptr
= gfx_v9_0_ring_get_wptr_gfx
,
4628 .set_wptr
= gfx_v9_0_ring_set_wptr_gfx
,
4629 .emit_frame_size
= /* totally 242 maximum if 16 IBs */
4631 7 + /* PIPELINE_SYNC */
4632 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
4633 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
4635 8 + /* FENCE for VM_FLUSH */
4636 20 + /* GDS switch */
4637 4 + /* double SWITCH_BUFFER,
4638 the first COND_EXEC jump to the place just
4639 prior to this double SWITCH_BUFFER */
4647 8 + 8 + /* FENCE x2 */
4648 2, /* SWITCH_BUFFER */
4649 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_gfx */
4650 .emit_ib
= gfx_v9_0_ring_emit_ib_gfx
,
4651 .emit_fence
= gfx_v9_0_ring_emit_fence
,
4652 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
4653 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
4654 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
4655 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
4656 .test_ring
= gfx_v9_0_ring_test_ring
,
4657 .test_ib
= gfx_v9_0_ring_test_ib
,
4658 .insert_nop
= amdgpu_ring_insert_nop
,
4659 .pad_ib
= amdgpu_ring_generic_pad_ib
,
4660 .emit_switch_buffer
= gfx_v9_ring_emit_sb
,
4661 .emit_cntxcntl
= gfx_v9_ring_emit_cntxcntl
,
4662 .init_cond_exec
= gfx_v9_0_ring_emit_init_cond_exec
,
4663 .patch_cond_exec
= gfx_v9_0_ring_emit_patch_cond_exec
,
4664 .emit_tmz
= gfx_v9_0_ring_emit_tmz
,
4665 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
4666 .emit_reg_wait
= gfx_v9_0_ring_emit_reg_wait
,
4667 .emit_reg_write_reg_wait
= gfx_v9_0_ring_emit_reg_write_reg_wait
,
4670 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute
= {
4671 .type
= AMDGPU_RING_TYPE_COMPUTE
,
4673 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
4674 .support_64bit_ptrs
= true,
4675 .vmhub
= AMDGPU_GFXHUB
,
4676 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
4677 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
4678 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
4680 20 + /* gfx_v9_0_ring_emit_gds_switch */
4681 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4682 5 + /* hdp invalidate */
4683 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4684 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
4685 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
4686 2 + /* gfx_v9_0_ring_emit_vm_flush */
4687 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
4688 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_compute */
4689 .emit_ib
= gfx_v9_0_ring_emit_ib_compute
,
4690 .emit_fence
= gfx_v9_0_ring_emit_fence
,
4691 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
4692 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
4693 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
4694 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
4695 .test_ring
= gfx_v9_0_ring_test_ring
,
4696 .test_ib
= gfx_v9_0_ring_test_ib
,
4697 .insert_nop
= amdgpu_ring_insert_nop
,
4698 .pad_ib
= amdgpu_ring_generic_pad_ib
,
4699 .set_priority
= gfx_v9_0_ring_set_priority_compute
,
4700 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
4701 .emit_reg_wait
= gfx_v9_0_ring_emit_reg_wait
,
4702 .emit_reg_write_reg_wait
= gfx_v9_0_ring_emit_reg_write_reg_wait
,
4705 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq
= {
4706 .type
= AMDGPU_RING_TYPE_KIQ
,
4708 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
4709 .support_64bit_ptrs
= true,
4710 .vmhub
= AMDGPU_GFXHUB
,
4711 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
4712 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
4713 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
4715 20 + /* gfx_v9_0_ring_emit_gds_switch */
4716 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4717 5 + /* hdp invalidate */
4718 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4719 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
4720 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
4721 2 + /* gfx_v9_0_ring_emit_vm_flush */
4722 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4723 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_compute */
4724 .emit_ib
= gfx_v9_0_ring_emit_ib_compute
,
4725 .emit_fence
= gfx_v9_0_ring_emit_fence_kiq
,
4726 .test_ring
= gfx_v9_0_ring_test_ring
,
4727 .test_ib
= gfx_v9_0_ring_test_ib
,
4728 .insert_nop
= amdgpu_ring_insert_nop
,
4729 .pad_ib
= amdgpu_ring_generic_pad_ib
,
4730 .emit_rreg
= gfx_v9_0_ring_emit_rreg
,
4731 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
4732 .emit_reg_wait
= gfx_v9_0_ring_emit_reg_wait
,
4733 .emit_reg_write_reg_wait
= gfx_v9_0_ring_emit_reg_write_reg_wait
,
4736 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
)
4740 adev
->gfx
.kiq
.ring
.funcs
= &gfx_v9_0_ring_funcs_kiq
;
4742 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
4743 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_gfx
;
4745 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
4746 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_compute
;
4749 static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs
= {
4750 .set
= gfx_v9_0_kiq_set_interrupt_state
,
4751 .process
= gfx_v9_0_kiq_irq
,
4754 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs
= {
4755 .set
= gfx_v9_0_set_eop_interrupt_state
,
4756 .process
= gfx_v9_0_eop_irq
,
4759 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs
= {
4760 .set
= gfx_v9_0_set_priv_reg_fault_state
,
4761 .process
= gfx_v9_0_priv_reg_irq
,
4764 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs
= {
4765 .set
= gfx_v9_0_set_priv_inst_fault_state
,
4766 .process
= gfx_v9_0_priv_inst_irq
,
4769 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
)
4771 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
4772 adev
->gfx
.eop_irq
.funcs
= &gfx_v9_0_eop_irq_funcs
;
4774 adev
->gfx
.priv_reg_irq
.num_types
= 1;
4775 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v9_0_priv_reg_irq_funcs
;
4777 adev
->gfx
.priv_inst_irq
.num_types
= 1;
4778 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v9_0_priv_inst_irq_funcs
;
4780 adev
->gfx
.kiq
.irq
.num_types
= AMDGPU_CP_KIQ_IRQ_LAST
;
4781 adev
->gfx
.kiq
.irq
.funcs
= &gfx_v9_0_kiq_irq_funcs
;
4784 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
)
4786 switch (adev
->asic_type
) {
4791 adev
->gfx
.rlc
.funcs
= &gfx_v9_0_rlc_funcs
;
4798 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
)
4800 /* init asci gds info */
4801 adev
->gds
.mem
.total_size
= RREG32_SOC15(GC
, 0, mmGDS_VMID0_SIZE
);
4802 adev
->gds
.gws
.total_size
= 64;
4803 adev
->gds
.oa
.total_size
= 16;
4805 if (adev
->gds
.mem
.total_size
== 64 * 1024) {
4806 adev
->gds
.mem
.gfx_partition_size
= 4096;
4807 adev
->gds
.mem
.cs_partition_size
= 4096;
4809 adev
->gds
.gws
.gfx_partition_size
= 4;
4810 adev
->gds
.gws
.cs_partition_size
= 4;
4812 adev
->gds
.oa
.gfx_partition_size
= 4;
4813 adev
->gds
.oa
.cs_partition_size
= 1;
4815 adev
->gds
.mem
.gfx_partition_size
= 1024;
4816 adev
->gds
.mem
.cs_partition_size
= 1024;
4818 adev
->gds
.gws
.gfx_partition_size
= 16;
4819 adev
->gds
.gws
.cs_partition_size
= 16;
4821 adev
->gds
.oa
.gfx_partition_size
= 4;
4822 adev
->gds
.oa
.cs_partition_size
= 4;
4826 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device
*adev
,
4834 data
= bitmap
<< GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT
;
4835 data
&= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK
;
4837 WREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
, data
);
4840 static u32
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device
*adev
)
4844 data
= RREG32_SOC15(GC
, 0, mmCC_GC_SHADER_ARRAY_CONFIG
);
4845 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
);
4847 data
&= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK
;
4848 data
>>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT
;
4850 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_cu_per_sh
);
4852 return (~data
) & mask
;
4855 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
4856 struct amdgpu_cu_info
*cu_info
)
4858 int i
, j
, k
, counter
, active_cu_number
= 0;
4859 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
4860 unsigned disable_masks
[4 * 2];
4862 if (!adev
|| !cu_info
)
4865 amdgpu_gfx_parse_disable_cu(disable_masks
, 4, 2);
4867 mutex_lock(&adev
->grbm_idx_mutex
);
4868 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
4869 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
4873 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
4875 gfx_v9_0_set_user_cu_inactive_bitmap(
4876 adev
, disable_masks
[i
* 2 + j
]);
4877 bitmap
= gfx_v9_0_get_cu_active_bitmap(adev
);
4878 cu_info
->bitmap
[i
][j
] = bitmap
;
4880 for (k
= 0; k
< adev
->gfx
.config
.max_cu_per_sh
; k
++) {
4881 if (bitmap
& mask
) {
4882 if (counter
< adev
->gfx
.config
.max_cu_per_sh
)
4888 active_cu_number
+= counter
;
4890 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
4891 cu_info
->ao_cu_bitmap
[i
][j
] = ao_bitmap
;
4894 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
4895 mutex_unlock(&adev
->grbm_idx_mutex
);
4897 cu_info
->number
= active_cu_number
;
4898 cu_info
->ao_cu_mask
= ao_cu_mask
;
4899 cu_info
->simd_per_cu
= NUM_SIMD_PER_CU
;
4904 const struct amdgpu_ip_block_version gfx_v9_0_ip_block
=
4906 .type
= AMD_IP_BLOCK_TYPE_GFX
,
4910 .funcs
= &gfx_v9_0_ip_funcs
,