2 * Copyright 2019 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
30 #include "amdgpu_gfx.h"
31 #include "amdgpu_psp.h"
32 #include "amdgpu_smu.h"
36 #include "gc/gc_10_1_0_offset.h"
37 #include "gc/gc_10_1_0_sh_mask.h"
38 #include "navi10_enum.h"
39 #include "hdp/hdp_5_0_0_offset.h"
40 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
44 #include "soc15_common.h"
45 #include "clearstate_gfx10.h"
46 #include "v10_structs.h"
47 #include "gfx_v10_0.h"
48 #include "nbio_v2_3.h"
51 * Navi10 has two graphic rings to share each graphic pipe.
55 #define GFX10_NUM_GFX_RINGS 2
56 #define GFX10_MEC_HPD_SIZE 2048
58 #define F32_CE_PROGRAM_RAM_SIZE 65536
59 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define mmCGTT_GS_NGG_CLK_CTRL 0x5087
62 #define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
64 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
65 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
66 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
67 MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
68 MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
69 MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
71 MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
72 MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
73 MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
74 MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
75 MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
76 MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
77 MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/navi14_me.bin");
79 MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
80 MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
83 MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
84 MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/navi12_me.bin");
86 MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
87 MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
90 static const struct soc15_reg_golden golden_settings_gc_10_1
[] =
92 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_4
, 0xffffffff, 0x00400014),
93 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_CPF_CLK_CTRL
, 0xfcff8fff, 0xf8000100),
94 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SPI_CLK_CTRL
, 0xcd000000, 0x0d000100),
95 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQ_CLK_CTRL
, 0x60000ff0, 0x60000100),
96 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQG_CLK_CTRL
, 0x40000000, 0x40000100),
97 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_VGT_CLK_CTRL
, 0xffff8fff, 0xffff8100),
98 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_WD_CLK_CTRL
, 0xfeff8fff, 0xfeff8100),
99 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
100 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_VC5_ENABLE
, 0x00000002, 0x00000000),
101 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_SD_CNTL
, 0x000007ff, 0x000005ff),
102 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG
, 0x20000000, 0x20000000),
103 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xffffffff, 0x00000420),
104 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0x00000200, 0x00000200),
105 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG4
, 0x07900000, 0x04900000),
106 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DFSM_TILES_IN_FLIGHT
, 0x0000ffff, 0x0000003f),
107 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_LAST_OF_BURST_CONFIG
, 0xffffffff, 0x03860204),
108 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGCR_GENERAL_CNTL
, 0x1ff0ffff, 0x00000500),
109 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGE_PRIV_CONTROL
, 0x000007ff, 0x000001fe),
110 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL1_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
111 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_0
, 0x77777777, 0x10321032),
112 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_1
, 0x77777777, 0x02310231),
113 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2A_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
114 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
115 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CGTT_SCLK_CTRL
, 0x10000000, 0x10000100),
116 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL2
, 0xffffffff, 0x1402002f),
117 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL3
, 0xffff9fff, 0x00001188),
118 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER
, 0xffffffff, 0x00000800),
119 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x08000009),
120 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0x00400000, 0x04440000),
121 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_2
, 0x00000800, 0x00000820),
122 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
123 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_SPARE
, 0xffffffff, 0xffff3101),
124 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0x001f0000, 0x00070104),
125 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ALU_CLK_CTRL
, 0xffffffff, 0xffffffff),
126 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ARB_CONFIG
, 0x00000100, 0x00000130),
127 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_LDS_CLK_CTRL
, 0xffffffff, 0xffffffff),
128 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfff7ffff, 0x01030000),
129 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CNTL
, 0x60000010, 0x479c0010),
130 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CGTT_CLK_CTRL
, 0xfeff0fff, 0x40000100),
131 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CTRL
, 0x00c00000, 0x00c00000)
134 static const struct soc15_reg_golden golden_settings_gc_10_0_nv10
[] =
136 /* Pending on emulation bring up */
139 static const struct soc15_reg_golden golden_settings_gc_10_1_1
[] =
141 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_4
, 0xffffffff, 0x003c0014),
142 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_GS_NGG_CLK_CTRL
, 0xffff8fff, 0xffff8100),
143 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_IA_CLK_CTRL
, 0xffff0fff, 0xffff0100),
144 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SPI_CLK_CTRL
, 0xcd000000, 0x0d000100),
145 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQ_CLK_CTRL
, 0xf8ff0fff, 0x60000100),
146 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQG_CLK_CTRL
, 0x40000ff0, 0x40000100),
147 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_VGT_CLK_CTRL
, 0xffff8fff, 0xffff8100),
148 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_WD_CLK_CTRL
, 0xffff8fff, 0xffff8100),
149 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
150 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_VC5_ENABLE
, 0x00000002, 0x00000000),
151 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_SD_CNTL
, 0x800007ff, 0x000005ff),
152 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG
, 0xffffffff, 0x20000000),
153 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xffffffff, 0x00000420),
154 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0x00000200, 0x00000200),
155 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG4
, 0xffffffff, 0x04900000),
156 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DFSM_TILES_IN_FLIGHT
, 0x0000ffff, 0x0000003f),
157 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_LAST_OF_BURST_CONFIG
, 0xffffffff, 0x03860204),
158 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGCR_GENERAL_CNTL
, 0x1ff0ffff, 0x00000500),
159 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGE_PRIV_CONTROL
, 0x000007ff, 0x000001fe),
160 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL1_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
161 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2A_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffe7),
162 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffe7),
163 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CGTT_SCLK_CTRL
, 0xffff0fff, 0x10000100),
164 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL2
, 0xffffffff, 0x1402002f),
165 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL3
, 0xffffbfff, 0x00000188),
166 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER
, 0xffffffff, 0x00000800),
167 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x08000009),
168 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0x00400000, 0x04440000),
169 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_2
, 0x00000800, 0x00000820),
170 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
171 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_SPARE
, 0xffffffff, 0xffff3101),
172 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSPI_CONFIG_CNTL_1
, 0x001f0000, 0x00070105),
173 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ALU_CLK_CTRL
, 0xffffffff, 0xffffffff),
174 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ARB_CONFIG
, 0x00000133, 0x00000130),
175 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_LDS_CLK_CTRL
, 0xffffffff, 0xffffffff),
176 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfff7ffff, 0x01030000),
177 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CNTL
, 0x60000010, 0x479c0010),
178 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CTRL
, 0x00c00000, 0x00c00000),
181 static const struct soc15_reg_golden golden_settings_gc_10_1_2
[] =
183 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCB_HW_CONTROL_4
, 0x003e001f, 0x003c0014),
184 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_GS_NGG_CLK_CTRL
, 0xffff8fff, 0xffff8100),
185 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_IA_CLK_CTRL
, 0xffff0fff, 0xffff0100),
186 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SPI_CLK_CTRL
, 0xff7f0fff, 0x0d000100),
187 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQ_CLK_CTRL
, 0xffffcfff, 0x60000100),
188 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_SQG_CLK_CTRL
, 0xffff0fff, 0x40000100),
189 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_VGT_CLK_CTRL
, 0xffff8fff, 0xffff8100),
190 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCGTT_WD_CLK_CTRL
, 0xffff8fff, 0xffff8100),
191 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
192 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCH_VC5_ENABLE
, 0x00000003, 0x00000000),
193 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmCP_SD_CNTL
, 0x800007ff, 0x000005ff),
194 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG
, 0xffffffff, 0x20000000),
195 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG2
, 0xffffffff, 0x00000420),
196 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG3
, 0xffffffff, 0x00000200),
197 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DEBUG4
, 0xffffffff, 0x04800000),
198 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_DFSM_TILES_IN_FLIGHT
, 0x0000ffff, 0x0000003f),
199 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmDB_LAST_OF_BURST_CONFIG
, 0xffffffff, 0x03860204),
200 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGCR_GENERAL_CNTL
, 0x1ff0ffff, 0x00000500),
201 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGE_PRIV_CONTROL
, 0x00007fff, 0x000001fe),
202 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL1_PIPE_STEER
, 0xffffffff, 0xe4e4e4e4),
203 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_0
, 0x77777777, 0x10321032),
204 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2_PIPE_STEER_1
, 0x77777777, 0x02310231),
205 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2A_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
206 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_ADDR_MATCH_MASK
, 0xffffffff, 0xffffffcf),
207 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CGTT_SCLK_CTRL
, 0xffff0fff, 0x10000100),
208 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL2
, 0xffffffff, 0x1402002f),
209 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmGL2C_CTRL3
, 0xffffbfff, 0x00000188),
210 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_0
, 0xffffffff, 0x842a4c02),
211 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER
, 0xffffffff, 0x00000800),
212 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE
, 0x3fffffff, 0x08000009),
213 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_1
, 0xffffffff, 0x04440000),
214 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_ENHANCE_2
, 0x00000820, 0x00000820),
215 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
, 0x0000ff0f, 0x00000000),
216 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmRMI_SPARE
, 0xffffffff, 0xffff3101),
217 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ALU_CLK_CTRL
, 0xffffffff, 0xffffffff),
218 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_ARB_CONFIG
, 0x00000133, 0x00000130),
219 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmSQ_LDS_CLK_CTRL
, 0xffffffff, 0xffffffff),
220 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTA_CNTL_AUX
, 0xfff7ffff, 0x01030000),
221 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmTCP_CNTL
, 0xffdf80ff, 0x479c0010),
222 SOC15_REG_GOLDEN_VALUE(GC
, 0, mmUTCL1_CTRL
, 0xffffffff, 0x00800000)
225 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14
[] =
227 /* Pending on emulation bring up */
230 static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12
[] =
232 /* Pending on emulation bring up */
235 #define DEFAULT_SH_MEM_CONFIG \
236 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
237 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
238 (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
239 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
242 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device
*adev
);
243 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device
*adev
);
244 static void gfx_v10_0_set_gds_init(struct amdgpu_device
*adev
);
245 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device
*adev
);
246 static int gfx_v10_0_get_cu_info(struct amdgpu_device
*adev
,
247 struct amdgpu_cu_info
*cu_info
);
248 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device
*adev
);
249 static void gfx_v10_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
,
250 u32 sh_num
, u32 instance
);
251 static u32
gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device
*adev
);
253 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device
*adev
);
254 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device
*adev
);
255 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device
*adev
);
256 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device
*adev
);
257 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
, bool resume
);
258 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring
*ring
, bool resume
);
259 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring
*ring
, bool start
);
261 static void gfx10_kiq_set_resources(struct amdgpu_ring
*kiq_ring
, uint64_t queue_mask
)
263 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_SET_RESOURCES
, 6));
264 amdgpu_ring_write(kiq_ring
, PACKET3_SET_RESOURCES_VMID_MASK(0) |
265 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
266 amdgpu_ring_write(kiq_ring
, lower_32_bits(queue_mask
)); /* queue mask lo */
267 amdgpu_ring_write(kiq_ring
, upper_32_bits(queue_mask
)); /* queue mask hi */
268 amdgpu_ring_write(kiq_ring
, 0); /* gws mask lo */
269 amdgpu_ring_write(kiq_ring
, 0); /* gws mask hi */
270 amdgpu_ring_write(kiq_ring
, 0); /* oac mask */
271 amdgpu_ring_write(kiq_ring
, 0); /* gds heap base:0, gds heap size:0 */
274 static void gfx10_kiq_map_queues(struct amdgpu_ring
*kiq_ring
,
275 struct amdgpu_ring
*ring
)
277 struct amdgpu_device
*adev
= kiq_ring
->adev
;
278 uint64_t mqd_addr
= amdgpu_bo_gpu_offset(ring
->mqd_obj
);
279 uint64_t wptr_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
280 uint32_t eng_sel
= ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
? 4 : 0;
282 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
283 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
284 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
285 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
286 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
287 PACKET3_MAP_QUEUES_QUEUE(ring
->queue
) |
288 PACKET3_MAP_QUEUES_PIPE(ring
->pipe
) |
289 PACKET3_MAP_QUEUES_ME((ring
->me
== 1 ? 0 : 1)) |
290 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
291 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
292 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel
) |
293 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
294 amdgpu_ring_write(kiq_ring
, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring
->doorbell_index
));
295 amdgpu_ring_write(kiq_ring
, lower_32_bits(mqd_addr
));
296 amdgpu_ring_write(kiq_ring
, upper_32_bits(mqd_addr
));
297 amdgpu_ring_write(kiq_ring
, lower_32_bits(wptr_addr
));
298 amdgpu_ring_write(kiq_ring
, upper_32_bits(wptr_addr
));
301 static void gfx10_kiq_unmap_queues(struct amdgpu_ring
*kiq_ring
,
302 struct amdgpu_ring
*ring
,
303 enum amdgpu_unmap_queues_action action
,
304 u64 gpu_addr
, u64 seq
)
306 uint32_t eng_sel
= ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
? 4 : 0;
308 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_UNMAP_QUEUES
, 4));
309 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
310 PACKET3_UNMAP_QUEUES_ACTION(action
) |
311 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
312 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel
) |
313 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
314 amdgpu_ring_write(kiq_ring
,
315 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring
->doorbell_index
));
317 if (action
== PREEMPT_QUEUES_NO_UNMAP
) {
318 amdgpu_ring_write(kiq_ring
, lower_32_bits(gpu_addr
));
319 amdgpu_ring_write(kiq_ring
, upper_32_bits(gpu_addr
));
320 amdgpu_ring_write(kiq_ring
, seq
);
322 amdgpu_ring_write(kiq_ring
, 0);
323 amdgpu_ring_write(kiq_ring
, 0);
324 amdgpu_ring_write(kiq_ring
, 0);
328 static void gfx10_kiq_query_status(struct amdgpu_ring
*kiq_ring
,
329 struct amdgpu_ring
*ring
,
333 uint32_t eng_sel
= ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
? 4 : 0;
335 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_QUERY_STATUS
, 5));
336 amdgpu_ring_write(kiq_ring
,
337 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
338 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
339 PACKET3_QUERY_STATUS_COMMAND(2));
340 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
341 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring
->doorbell_index
) |
342 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel
));
343 amdgpu_ring_write(kiq_ring
, lower_32_bits(addr
));
344 amdgpu_ring_write(kiq_ring
, upper_32_bits(addr
));
345 amdgpu_ring_write(kiq_ring
, lower_32_bits(seq
));
346 amdgpu_ring_write(kiq_ring
, upper_32_bits(seq
));
349 static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring
*kiq_ring
,
350 uint16_t pasid
, uint32_t flush_type
,
353 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_INVALIDATE_TLBS
, 0));
354 amdgpu_ring_write(kiq_ring
,
355 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
356 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub
) |
357 PACKET3_INVALIDATE_TLBS_PASID(pasid
) |
358 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type
));
361 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs
= {
362 .kiq_set_resources
= gfx10_kiq_set_resources
,
363 .kiq_map_queues
= gfx10_kiq_map_queues
,
364 .kiq_unmap_queues
= gfx10_kiq_unmap_queues
,
365 .kiq_query_status
= gfx10_kiq_query_status
,
366 .kiq_invalidate_tlbs
= gfx10_kiq_invalidate_tlbs
,
367 .set_resources_size
= 8,
368 .map_queues_size
= 7,
369 .unmap_queues_size
= 6,
370 .query_status_size
= 7,
371 .invalidate_tlbs_size
= 12,
374 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device
*adev
)
376 adev
->gfx
.kiq
.pmf
= &gfx_v10_0_kiq_pm4_funcs
;
379 static void gfx_v10_0_init_golden_registers(struct amdgpu_device
*adev
)
381 switch (adev
->asic_type
) {
383 soc15_program_register_sequence(adev
,
384 golden_settings_gc_10_1
,
385 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1
));
386 soc15_program_register_sequence(adev
,
387 golden_settings_gc_10_0_nv10
,
388 (const u32
)ARRAY_SIZE(golden_settings_gc_10_0_nv10
));
391 soc15_program_register_sequence(adev
,
392 golden_settings_gc_10_1_1
,
393 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_1
));
394 soc15_program_register_sequence(adev
,
395 golden_settings_gc_10_1_nv14
,
396 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_nv14
));
399 soc15_program_register_sequence(adev
,
400 golden_settings_gc_10_1_2
,
401 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_2
));
402 soc15_program_register_sequence(adev
,
403 golden_settings_gc_10_1_2_nv12
,
404 (const u32
)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12
));
411 static void gfx_v10_0_scratch_init(struct amdgpu_device
*adev
)
413 adev
->gfx
.scratch
.num_reg
= 8;
414 adev
->gfx
.scratch
.reg_base
= SOC15_REG_OFFSET(GC
, 0, mmSCRATCH_REG0
);
415 adev
->gfx
.scratch
.free_mask
= (1u << adev
->gfx
.scratch
.num_reg
) - 1;
418 static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring
*ring
, int eng_sel
,
419 bool wc
, uint32_t reg
, uint32_t val
)
421 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
422 amdgpu_ring_write(ring
, WRITE_DATA_ENGINE_SEL(eng_sel
) |
423 WRITE_DATA_DST_SEL(0) | (wc
? WR_CONFIRM
: 0));
424 amdgpu_ring_write(ring
, reg
);
425 amdgpu_ring_write(ring
, 0);
426 amdgpu_ring_write(ring
, val
);
429 static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring
*ring
, int eng_sel
,
430 int mem_space
, int opt
, uint32_t addr0
,
431 uint32_t addr1
, uint32_t ref
, uint32_t mask
,
434 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
435 amdgpu_ring_write(ring
,
436 /* memory (1) or register (0) */
437 (WAIT_REG_MEM_MEM_SPACE(mem_space
) |
438 WAIT_REG_MEM_OPERATION(opt
) | /* wait */
439 WAIT_REG_MEM_FUNCTION(3) | /* equal */
440 WAIT_REG_MEM_ENGINE(eng_sel
)));
443 BUG_ON(addr0
& 0x3); /* Dword align */
444 amdgpu_ring_write(ring
, addr0
);
445 amdgpu_ring_write(ring
, addr1
);
446 amdgpu_ring_write(ring
, ref
);
447 amdgpu_ring_write(ring
, mask
);
448 amdgpu_ring_write(ring
, inv
); /* poll interval */
451 static int gfx_v10_0_ring_test_ring(struct amdgpu_ring
*ring
)
453 struct amdgpu_device
*adev
= ring
->adev
;
459 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
461 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r
);
465 WREG32(scratch
, 0xCAFEDEAD);
467 r
= amdgpu_ring_alloc(ring
, 3);
469 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
471 amdgpu_gfx_scratch_free(adev
, scratch
);
475 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
476 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
477 amdgpu_ring_write(ring
, 0xDEADBEEF);
478 amdgpu_ring_commit(ring
);
480 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
481 tmp
= RREG32(scratch
);
482 if (tmp
== 0xDEADBEEF)
484 if (amdgpu_emu_mode
== 1)
490 if (i
>= adev
->usec_timeout
)
493 amdgpu_gfx_scratch_free(adev
, scratch
);
498 static int gfx_v10_0_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
500 struct amdgpu_device
*adev
= ring
->adev
;
502 struct dma_fence
*f
= NULL
;
507 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
509 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r
);
513 WREG32(scratch
, 0xCAFEDEAD);
515 memset(&ib
, 0, sizeof(ib
));
516 r
= amdgpu_ib_get(adev
, NULL
, 256, &ib
);
518 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r
);
522 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
523 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
));
524 ib
.ptr
[2] = 0xDEADBEEF;
527 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
531 r
= dma_fence_wait_timeout(f
, false, timeout
);
533 DRM_ERROR("amdgpu: IB test timed out.\n");
537 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
541 tmp
= RREG32(scratch
);
542 if (tmp
== 0xDEADBEEF)
547 amdgpu_ib_free(adev
, &ib
, NULL
);
550 amdgpu_gfx_scratch_free(adev
, scratch
);
555 static void gfx_v10_0_free_microcode(struct amdgpu_device
*adev
)
557 release_firmware(adev
->gfx
.pfp_fw
);
558 adev
->gfx
.pfp_fw
= NULL
;
559 release_firmware(adev
->gfx
.me_fw
);
560 adev
->gfx
.me_fw
= NULL
;
561 release_firmware(adev
->gfx
.ce_fw
);
562 adev
->gfx
.ce_fw
= NULL
;
563 release_firmware(adev
->gfx
.rlc_fw
);
564 adev
->gfx
.rlc_fw
= NULL
;
565 release_firmware(adev
->gfx
.mec_fw
);
566 adev
->gfx
.mec_fw
= NULL
;
567 release_firmware(adev
->gfx
.mec2_fw
);
568 adev
->gfx
.mec2_fw
= NULL
;
570 kfree(adev
->gfx
.rlc
.register_list_format
);
573 static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device
*adev
)
575 adev
->gfx
.cp_fw_write_wait
= false;
577 switch (adev
->asic_type
) {
581 if ((adev
->gfx
.me_fw_version
>= 0x00000046) &&
582 (adev
->gfx
.me_feature_version
>= 27) &&
583 (adev
->gfx
.pfp_fw_version
>= 0x00000068) &&
584 (adev
->gfx
.pfp_feature_version
>= 27) &&
585 (adev
->gfx
.mec_fw_version
>= 0x0000005b) &&
586 (adev
->gfx
.mec_feature_version
>= 27))
587 adev
->gfx
.cp_fw_write_wait
= true;
593 if (adev
->gfx
.cp_fw_write_wait
== false)
594 DRM_WARN_ONCE("CP firmware version too old, please update!");
598 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device
*adev
)
600 const struct rlc_firmware_header_v2_1
*rlc_hdr
;
602 rlc_hdr
= (const struct rlc_firmware_header_v2_1
*)adev
->gfx
.rlc_fw
->data
;
603 adev
->gfx
.rlc_srlc_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_ucode_ver
);
604 adev
->gfx
.rlc_srlc_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_feature_ver
);
605 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_cntl_size_bytes
);
606 adev
->gfx
.rlc
.save_restore_list_cntl
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_cntl_offset_bytes
);
607 adev
->gfx
.rlc_srlg_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_ucode_ver
);
608 adev
->gfx
.rlc_srlg_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_feature_ver
);
609 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_gpm_size_bytes
);
610 adev
->gfx
.rlc
.save_restore_list_gpm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_gpm_offset_bytes
);
611 adev
->gfx
.rlc_srls_fw_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_ucode_ver
);
612 adev
->gfx
.rlc_srls_feature_version
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_feature_ver
);
613 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
= le32_to_cpu(rlc_hdr
->save_restore_list_srm_size_bytes
);
614 adev
->gfx
.rlc
.save_restore_list_srm
= (u8
*)rlc_hdr
+ le32_to_cpu(rlc_hdr
->save_restore_list_srm_offset_bytes
);
615 adev
->gfx
.rlc
.reg_list_format_direct_reg_list_length
=
616 le32_to_cpu(rlc_hdr
->reg_list_format_direct_reg_list_length
);
619 static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device
*adev
)
623 switch (adev
->pdev
->revision
) {
636 static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device
*adev
)
638 switch (adev
->asic_type
) {
640 if (!gfx_v10_0_navi10_gfxoff_should_enable(adev
))
641 adev
->pm
.pp_feature
&= ~PP_GFXOFF_MASK
;
648 static int gfx_v10_0_init_microcode(struct amdgpu_device
*adev
)
650 const char *chip_name
;
654 struct amdgpu_firmware_info
*info
= NULL
;
655 const struct common_firmware_header
*header
= NULL
;
656 const struct gfx_firmware_header_v1_0
*cp_hdr
;
657 const struct rlc_firmware_header_v2_0
*rlc_hdr
;
658 unsigned int *tmp
= NULL
;
660 uint16_t version_major
;
661 uint16_t version_minor
;
665 memset(wks
, 0, sizeof(wks
));
666 switch (adev
->asic_type
) {
668 chip_name
= "navi10";
671 chip_name
= "navi14";
672 if (!(adev
->pdev
->device
== 0x7340 &&
673 adev
->pdev
->revision
!= 0x00))
674 snprintf(wks
, sizeof(wks
), "_wks");
677 chip_name
= "navi12";
683 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp%s.bin", chip_name
, wks
);
684 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
687 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
690 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
691 adev
->gfx
.pfp_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
692 adev
->gfx
.pfp_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
694 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me%s.bin", chip_name
, wks
);
695 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
698 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
701 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
702 adev
->gfx
.me_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
703 adev
->gfx
.me_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
705 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce%s.bin", chip_name
, wks
);
706 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
709 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
712 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
713 adev
->gfx
.ce_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
714 adev
->gfx
.ce_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
716 if (!amdgpu_sriov_vf(adev
)) {
717 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc.bin", chip_name
);
718 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
721 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
722 rlc_hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
723 version_major
= le16_to_cpu(rlc_hdr
->header
.header_version_major
);
724 version_minor
= le16_to_cpu(rlc_hdr
->header
.header_version_minor
);
725 if (version_major
== 2 && version_minor
== 1)
726 adev
->gfx
.rlc
.is_rlc_v2_1
= true;
728 adev
->gfx
.rlc_fw_version
= le32_to_cpu(rlc_hdr
->header
.ucode_version
);
729 adev
->gfx
.rlc_feature_version
= le32_to_cpu(rlc_hdr
->ucode_feature_version
);
730 adev
->gfx
.rlc
.save_and_restore_offset
=
731 le32_to_cpu(rlc_hdr
->save_and_restore_offset
);
732 adev
->gfx
.rlc
.clear_state_descriptor_offset
=
733 le32_to_cpu(rlc_hdr
->clear_state_descriptor_offset
);
734 adev
->gfx
.rlc
.avail_scratch_ram_locations
=
735 le32_to_cpu(rlc_hdr
->avail_scratch_ram_locations
);
736 adev
->gfx
.rlc
.reg_restore_list_size
=
737 le32_to_cpu(rlc_hdr
->reg_restore_list_size
);
738 adev
->gfx
.rlc
.reg_list_format_start
=
739 le32_to_cpu(rlc_hdr
->reg_list_format_start
);
740 adev
->gfx
.rlc
.reg_list_format_separate_start
=
741 le32_to_cpu(rlc_hdr
->reg_list_format_separate_start
);
742 adev
->gfx
.rlc
.starting_offsets_start
=
743 le32_to_cpu(rlc_hdr
->starting_offsets_start
);
744 adev
->gfx
.rlc
.reg_list_format_size_bytes
=
745 le32_to_cpu(rlc_hdr
->reg_list_format_size_bytes
);
746 adev
->gfx
.rlc
.reg_list_size_bytes
=
747 le32_to_cpu(rlc_hdr
->reg_list_size_bytes
);
748 adev
->gfx
.rlc
.register_list_format
=
749 kmalloc(adev
->gfx
.rlc
.reg_list_format_size_bytes
+
750 adev
->gfx
.rlc
.reg_list_size_bytes
, GFP_KERNEL
);
751 if (!adev
->gfx
.rlc
.register_list_format
) {
756 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
757 le32_to_cpu(rlc_hdr
->reg_list_format_array_offset_bytes
));
758 for (i
= 0 ; i
< (rlc_hdr
->reg_list_format_size_bytes
>> 2); i
++)
759 adev
->gfx
.rlc
.register_list_format
[i
] = le32_to_cpu(tmp
[i
]);
761 adev
->gfx
.rlc
.register_restore
= adev
->gfx
.rlc
.register_list_format
+ i
;
763 tmp
= (unsigned int *)((uintptr_t)rlc_hdr
+
764 le32_to_cpu(rlc_hdr
->reg_list_array_offset_bytes
));
765 for (i
= 0 ; i
< (rlc_hdr
->reg_list_size_bytes
>> 2); i
++)
766 adev
->gfx
.rlc
.register_restore
[i
] = le32_to_cpu(tmp
[i
]);
768 if (adev
->gfx
.rlc
.is_rlc_v2_1
)
769 gfx_v10_0_init_rlc_ext_microcode(adev
);
772 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec%s.bin", chip_name
, wks
);
773 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
776 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
779 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
780 adev
->gfx
.mec_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
781 adev
->gfx
.mec_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
783 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2%s.bin", chip_name
, wks
);
784 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
786 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
789 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
790 adev
->gfx
.mec2_fw
->data
;
791 adev
->gfx
.mec2_fw_version
=
792 le32_to_cpu(cp_hdr
->header
.ucode_version
);
793 adev
->gfx
.mec2_feature_version
=
794 le32_to_cpu(cp_hdr
->ucode_feature_version
);
797 adev
->gfx
.mec2_fw
= NULL
;
800 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
801 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
802 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
803 info
->fw
= adev
->gfx
.pfp_fw
;
804 header
= (const struct common_firmware_header
*)info
->fw
->data
;
805 adev
->firmware
.fw_size
+=
806 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
808 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
809 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
810 info
->fw
= adev
->gfx
.me_fw
;
811 header
= (const struct common_firmware_header
*)info
->fw
->data
;
812 adev
->firmware
.fw_size
+=
813 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
815 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
816 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
817 info
->fw
= adev
->gfx
.ce_fw
;
818 header
= (const struct common_firmware_header
*)info
->fw
->data
;
819 adev
->firmware
.fw_size
+=
820 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
822 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
823 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
824 info
->fw
= adev
->gfx
.rlc_fw
;
826 header
= (const struct common_firmware_header
*)info
->fw
->data
;
827 adev
->firmware
.fw_size
+=
828 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
830 if (adev
->gfx
.rlc
.is_rlc_v2_1
&&
831 adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
&&
832 adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
&&
833 adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
) {
834 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
];
835 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
;
836 info
->fw
= adev
->gfx
.rlc_fw
;
837 adev
->firmware
.fw_size
+=
838 ALIGN(adev
->gfx
.rlc
.save_restore_list_cntl_size_bytes
, PAGE_SIZE
);
840 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
];
841 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
;
842 info
->fw
= adev
->gfx
.rlc_fw
;
843 adev
->firmware
.fw_size
+=
844 ALIGN(adev
->gfx
.rlc
.save_restore_list_gpm_size_bytes
, PAGE_SIZE
);
846 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
];
847 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
;
848 info
->fw
= adev
->gfx
.rlc_fw
;
849 adev
->firmware
.fw_size
+=
850 ALIGN(adev
->gfx
.rlc
.save_restore_list_srm_size_bytes
, PAGE_SIZE
);
853 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
854 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
855 info
->fw
= adev
->gfx
.mec_fw
;
856 header
= (const struct common_firmware_header
*)info
->fw
->data
;
857 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
858 adev
->firmware
.fw_size
+=
859 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) -
860 le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
862 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1_JT
];
863 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1_JT
;
864 info
->fw
= adev
->gfx
.mec_fw
;
865 adev
->firmware
.fw_size
+=
866 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
868 if (adev
->gfx
.mec2_fw
) {
869 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
870 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
871 info
->fw
= adev
->gfx
.mec2_fw
;
872 header
= (const struct common_firmware_header
*)info
->fw
->data
;
873 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
874 adev
->firmware
.fw_size
+=
875 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) -
876 le32_to_cpu(cp_hdr
->jt_size
) * 4,
878 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2_JT
];
879 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2_JT
;
880 info
->fw
= adev
->gfx
.mec2_fw
;
881 adev
->firmware
.fw_size
+=
882 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4,
887 gfx_v10_0_check_fw_write_wait(adev
);
891 "gfx10: Failed to load firmware \"%s\"\n",
893 release_firmware(adev
->gfx
.pfp_fw
);
894 adev
->gfx
.pfp_fw
= NULL
;
895 release_firmware(adev
->gfx
.me_fw
);
896 adev
->gfx
.me_fw
= NULL
;
897 release_firmware(adev
->gfx
.ce_fw
);
898 adev
->gfx
.ce_fw
= NULL
;
899 release_firmware(adev
->gfx
.rlc_fw
);
900 adev
->gfx
.rlc_fw
= NULL
;
901 release_firmware(adev
->gfx
.mec_fw
);
902 adev
->gfx
.mec_fw
= NULL
;
903 release_firmware(adev
->gfx
.mec2_fw
);
904 adev
->gfx
.mec2_fw
= NULL
;
907 gfx_v10_0_check_gfxoff_flag(adev
);
912 static u32
gfx_v10_0_get_csb_size(struct amdgpu_device
*adev
)
915 const struct cs_section_def
*sect
= NULL
;
916 const struct cs_extent_def
*ext
= NULL
;
918 /* begin clear state */
920 /* context control state */
923 for (sect
= gfx10_cs_data
; sect
->section
!= NULL
; ++sect
) {
924 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
925 if (sect
->id
== SECT_CONTEXT
)
926 count
+= 2 + ext
->reg_count
;
932 /* set PA_SC_TILE_STEERING_OVERRIDE */
934 /* end clear state */
942 static void gfx_v10_0_get_csb_buffer(struct amdgpu_device
*adev
,
943 volatile u32
*buffer
)
946 const struct cs_section_def
*sect
= NULL
;
947 const struct cs_extent_def
*ext
= NULL
;
950 if (adev
->gfx
.rlc
.cs_data
== NULL
)
955 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
956 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
958 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
959 buffer
[count
++] = cpu_to_le32(0x80000000);
960 buffer
[count
++] = cpu_to_le32(0x80000000);
962 for (sect
= adev
->gfx
.rlc
.cs_data
; sect
->section
!= NULL
; ++sect
) {
963 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
964 if (sect
->id
== SECT_CONTEXT
) {
966 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, ext
->reg_count
));
967 buffer
[count
++] = cpu_to_le32(ext
->reg_index
-
968 PACKET3_SET_CONTEXT_REG_START
);
969 for (i
= 0; i
< ext
->reg_count
; i
++)
970 buffer
[count
++] = cpu_to_le32(ext
->extent
[i
]);
978 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_TILE_STEERING_OVERRIDE
) - PACKET3_SET_CONTEXT_REG_START
;
979 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG
, 1));
980 buffer
[count
++] = cpu_to_le32(ctx_reg_offset
);
981 buffer
[count
++] = cpu_to_le32(adev
->gfx
.config
.pa_sc_tile_steering_override
);
983 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
984 buffer
[count
++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE
);
986 buffer
[count
++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE
, 0));
987 buffer
[count
++] = cpu_to_le32(0);
990 static void gfx_v10_0_rlc_fini(struct amdgpu_device
*adev
)
992 /* clear state block */
993 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.clear_state_obj
,
994 &adev
->gfx
.rlc
.clear_state_gpu_addr
,
995 (void **)&adev
->gfx
.rlc
.cs_ptr
);
997 /* jump table block */
998 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.cp_table_obj
,
999 &adev
->gfx
.rlc
.cp_table_gpu_addr
,
1000 (void **)&adev
->gfx
.rlc
.cp_table_ptr
);
1003 static int gfx_v10_0_rlc_init(struct amdgpu_device
*adev
)
1005 const struct cs_section_def
*cs_data
;
1008 adev
->gfx
.rlc
.cs_data
= gfx10_cs_data
;
1010 cs_data
= adev
->gfx
.rlc
.cs_data
;
1013 /* init clear state block */
1014 r
= amdgpu_gfx_rlc_init_csb(adev
);
1022 static void gfx_v10_0_mec_fini(struct amdgpu_device
*adev
)
1024 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.hpd_eop_obj
, NULL
, NULL
);
1025 amdgpu_bo_free_kernel(&adev
->gfx
.mec
.mec_fw_obj
, NULL
, NULL
);
1028 static int gfx_v10_0_me_init(struct amdgpu_device
*adev
)
1032 bitmap_zero(adev
->gfx
.me
.queue_bitmap
, AMDGPU_MAX_GFX_QUEUES
);
1034 amdgpu_gfx_graphics_queue_acquire(adev
);
1036 r
= gfx_v10_0_init_microcode(adev
);
1038 DRM_ERROR("Failed to load gfx firmware!\n");
1043 static int gfx_v10_0_mec_init(struct amdgpu_device
*adev
)
1047 const __le32
*fw_data
= NULL
;
1050 size_t mec_hpd_size
;
1052 const struct gfx_firmware_header_v1_0
*mec_hdr
= NULL
;
1054 bitmap_zero(adev
->gfx
.mec
.queue_bitmap
, AMDGPU_MAX_COMPUTE_QUEUES
);
1056 /* take ownership of the relevant compute queues */
1057 amdgpu_gfx_compute_queue_acquire(adev
);
1058 mec_hpd_size
= adev
->gfx
.num_compute_rings
* GFX10_MEC_HPD_SIZE
;
1060 r
= amdgpu_bo_create_reserved(adev
, mec_hpd_size
, PAGE_SIZE
,
1061 AMDGPU_GEM_DOMAIN_GTT
,
1062 &adev
->gfx
.mec
.hpd_eop_obj
,
1063 &adev
->gfx
.mec
.hpd_eop_gpu_addr
,
1066 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
1067 gfx_v10_0_mec_fini(adev
);
1071 memset(hpd
, 0, adev
->gfx
.mec
.hpd_eop_obj
->tbo
.mem
.size
);
1073 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
1074 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
1076 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
1077 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1079 fw_data
= (const __le32
*) (adev
->gfx
.mec_fw
->data
+
1080 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
1081 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
);
1083 r
= amdgpu_bo_create_reserved(adev
, mec_hdr
->header
.ucode_size_bytes
,
1084 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
1085 &adev
->gfx
.mec
.mec_fw_obj
,
1086 &adev
->gfx
.mec
.mec_fw_gpu_addr
,
1089 dev_err(adev
->dev
, "(%d) failed to create mec fw bo\n", r
);
1090 gfx_v10_0_mec_fini(adev
);
1094 memcpy(fw
, fw_data
, fw_size
);
1096 amdgpu_bo_kunmap(adev
->gfx
.mec
.mec_fw_obj
);
1097 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
1103 static uint32_t wave_read_ind(struct amdgpu_device
*adev
, uint32_t wave
, uint32_t address
)
1105 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1106 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1107 (address
<< SQ_IND_INDEX__INDEX__SHIFT
));
1108 return RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1111 static void wave_read_regs(struct amdgpu_device
*adev
, uint32_t wave
,
1112 uint32_t thread
, uint32_t regno
,
1113 uint32_t num
, uint32_t *out
)
1115 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
1116 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
1117 (regno
<< SQ_IND_INDEX__INDEX__SHIFT
) |
1118 (thread
<< SQ_IND_INDEX__WORKITEM_ID__SHIFT
) |
1119 (SQ_IND_INDEX__AUTO_INCR_MASK
));
1121 *(out
++) = RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
1124 static void gfx_v10_0_read_wave_data(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t *dst
, int *no_fields
)
1126 /* in gfx10 the SIMD_ID is specified as part of the INSTANCE
1127 * field when performing a select_se_sh so it should be
1131 /* type 2 wave data */
1132 dst
[(*no_fields
)++] = 2;
1133 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_STATUS
);
1134 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_PC_LO
);
1135 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_PC_HI
);
1136 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_EXEC_LO
);
1137 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_EXEC_HI
);
1138 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_HW_ID1
);
1139 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_HW_ID2
);
1140 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_INST_DW0
);
1141 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_GPR_ALLOC
);
1142 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_LDS_ALLOC
);
1143 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_TRAPSTS
);
1144 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_IB_STS
);
1145 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_IB_STS2
);
1146 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_IB_DBG1
);
1147 dst
[(*no_fields
)++] = wave_read_ind(adev
, wave
, ixSQ_WAVE_M0
);
1150 static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1151 uint32_t wave
, uint32_t start
,
1152 uint32_t size
, uint32_t *dst
)
1157 adev
, wave
, 0, start
+ SQIND_WAVE_SGPRS_OFFSET
, size
,
1161 static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device
*adev
, uint32_t simd
,
1162 uint32_t wave
, uint32_t thread
,
1163 uint32_t start
, uint32_t size
,
1168 start
+ SQIND_WAVE_VGPRS_OFFSET
, size
, dst
);
1171 static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device
*adev
,
1172 u32 me
, u32 pipe
, u32 q
, u32 vm
)
1174 nv_grbm_select(adev
, me
, pipe
, q
, vm
);
1178 static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs
= {
1179 .get_gpu_clock_counter
= &gfx_v10_0_get_gpu_clock_counter
,
1180 .select_se_sh
= &gfx_v10_0_select_se_sh
,
1181 .read_wave_data
= &gfx_v10_0_read_wave_data
,
1182 .read_wave_sgprs
= &gfx_v10_0_read_wave_sgprs
,
1183 .read_wave_vgprs
= &gfx_v10_0_read_wave_vgprs
,
1184 .select_me_pipe_q
= &gfx_v10_0_select_me_pipe_q
,
1187 static void gfx_v10_0_gpu_early_init(struct amdgpu_device
*adev
)
1191 adev
->gfx
.funcs
= &gfx_v10_0_gfx_funcs
;
1193 switch (adev
->asic_type
) {
1197 adev
->gfx
.config
.max_hw_contexts
= 8;
1198 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
1199 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
1200 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0;
1201 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
1202 gb_addr_config
= RREG32_SOC15(GC
, 0, mmGB_ADDR_CONFIG
);
1209 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
1211 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
= 1 <<
1212 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1213 GB_ADDR_CONFIG
, NUM_PIPES
);
1215 adev
->gfx
.config
.max_tile_pipes
=
1216 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
;
1218 adev
->gfx
.config
.gb_addr_config_fields
.max_compress_frags
= 1 <<
1219 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1220 GB_ADDR_CONFIG
, MAX_COMPRESSED_FRAGS
);
1221 adev
->gfx
.config
.gb_addr_config_fields
.num_rb_per_se
= 1 <<
1222 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1223 GB_ADDR_CONFIG
, NUM_RB_PER_SE
);
1224 adev
->gfx
.config
.gb_addr_config_fields
.num_se
= 1 <<
1225 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1226 GB_ADDR_CONFIG
, NUM_SHADER_ENGINES
);
1227 adev
->gfx
.config
.gb_addr_config_fields
.pipe_interleave_size
= 1 << (8 +
1228 REG_GET_FIELD(adev
->gfx
.config
.gb_addr_config
,
1229 GB_ADDR_CONFIG
, PIPE_INTERLEAVE_SIZE
));
1232 static int gfx_v10_0_gfx_ring_init(struct amdgpu_device
*adev
, int ring_id
,
1233 int me
, int pipe
, int queue
)
1236 struct amdgpu_ring
*ring
;
1237 unsigned int irq_type
;
1239 ring
= &adev
->gfx
.gfx_ring
[ring_id
];
1243 ring
->queue
= queue
;
1245 ring
->ring_obj
= NULL
;
1246 ring
->use_doorbell
= true;
1249 ring
->doorbell_index
= adev
->doorbell_index
.gfx_ring0
<< 1;
1251 ring
->doorbell_index
= adev
->doorbell_index
.gfx_ring1
<< 1;
1252 sprintf(ring
->name
, "gfx_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1254 irq_type
= AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP
+ ring
->pipe
;
1255 r
= amdgpu_ring_init(adev
, ring
, 1024,
1256 &adev
->gfx
.eop_irq
, irq_type
);
1262 static int gfx_v10_0_compute_ring_init(struct amdgpu_device
*adev
, int ring_id
,
1263 int mec
, int pipe
, int queue
)
1267 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[ring_id
];
1269 ring
= &adev
->gfx
.compute_ring
[ring_id
];
1274 ring
->queue
= queue
;
1276 ring
->ring_obj
= NULL
;
1277 ring
->use_doorbell
= true;
1278 ring
->doorbell_index
= (adev
->doorbell_index
.mec_ring0
+ ring_id
) << 1;
1279 ring
->eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
1280 + (ring_id
* GFX10_MEC_HPD_SIZE
);
1281 sprintf(ring
->name
, "comp_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1283 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1284 + ((ring
->me
- 1) * adev
->gfx
.mec
.num_pipe_per_mec
)
1287 /* type-2 packets are deprecated on MEC, use type-3 instead */
1288 r
= amdgpu_ring_init(adev
, ring
, 1024,
1289 &adev
->gfx
.eop_irq
, irq_type
);
1296 static int gfx_v10_0_sw_init(void *handle
)
1298 int i
, j
, k
, r
, ring_id
= 0;
1299 struct amdgpu_kiq
*kiq
;
1300 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1302 switch (adev
->asic_type
) {
1306 adev
->gfx
.me
.num_me
= 1;
1307 adev
->gfx
.me
.num_pipe_per_me
= 2;
1308 adev
->gfx
.me
.num_queue_per_pipe
= 1;
1309 adev
->gfx
.mec
.num_mec
= 2;
1310 adev
->gfx
.mec
.num_pipe_per_mec
= 4;
1311 adev
->gfx
.mec
.num_queue_per_pipe
= 8;
1314 adev
->gfx
.me
.num_me
= 1;
1315 adev
->gfx
.me
.num_pipe_per_me
= 1;
1316 adev
->gfx
.me
.num_queue_per_pipe
= 1;
1317 adev
->gfx
.mec
.num_mec
= 1;
1318 adev
->gfx
.mec
.num_pipe_per_mec
= 4;
1319 adev
->gfx
.mec
.num_queue_per_pipe
= 8;
1324 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
,
1325 GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT
,
1326 &adev
->gfx
.kiq
.irq
);
1331 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
,
1332 GFX_10_1__SRCID__CP_EOP_INTERRUPT
,
1333 &adev
->gfx
.eop_irq
);
1337 /* Privileged reg */
1338 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_10_1__SRCID__CP_PRIV_REG_FAULT
,
1339 &adev
->gfx
.priv_reg_irq
);
1343 /* Privileged inst */
1344 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_GRBM_CP
, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT
,
1345 &adev
->gfx
.priv_inst_irq
);
1349 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
1351 gfx_v10_0_scratch_init(adev
);
1353 r
= gfx_v10_0_me_init(adev
);
1357 r
= gfx_v10_0_rlc_init(adev
);
1359 DRM_ERROR("Failed to init rlc BOs!\n");
1363 r
= gfx_v10_0_mec_init(adev
);
1365 DRM_ERROR("Failed to init MEC BOs!\n");
1369 /* set up the gfx ring */
1370 for (i
= 0; i
< adev
->gfx
.me
.num_me
; i
++) {
1371 for (j
= 0; j
< adev
->gfx
.me
.num_queue_per_pipe
; j
++) {
1372 for (k
= 0; k
< adev
->gfx
.me
.num_pipe_per_me
; k
++) {
1373 if (!amdgpu_gfx_is_me_queue_enabled(adev
, i
, k
, j
))
1376 r
= gfx_v10_0_gfx_ring_init(adev
, ring_id
,
1386 /* set up the compute queues - allocate horizontally across pipes */
1387 for (i
= 0; i
< adev
->gfx
.mec
.num_mec
; ++i
) {
1388 for (j
= 0; j
< adev
->gfx
.mec
.num_queue_per_pipe
; j
++) {
1389 for (k
= 0; k
< adev
->gfx
.mec
.num_pipe_per_mec
; k
++) {
1390 if (!amdgpu_gfx_is_mec_queue_enabled(adev
, i
, k
,
1394 r
= gfx_v10_0_compute_ring_init(adev
, ring_id
,
1404 r
= amdgpu_gfx_kiq_init(adev
, GFX10_MEC_HPD_SIZE
);
1406 DRM_ERROR("Failed to init KIQ BOs!\n");
1410 kiq
= &adev
->gfx
.kiq
;
1411 r
= amdgpu_gfx_kiq_init_ring(adev
, &kiq
->ring
, &kiq
->irq
);
1415 r
= amdgpu_gfx_mqd_sw_init(adev
, sizeof(struct v10_compute_mqd
));
1419 /* allocate visible FB for rlc auto-loading fw */
1420 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
1421 r
= gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev
);
1426 adev
->gfx
.ce_ram_size
= F32_CE_PROGRAM_RAM_SIZE
;
1428 gfx_v10_0_gpu_early_init(adev
);
1433 static void gfx_v10_0_pfp_fini(struct amdgpu_device
*adev
)
1435 amdgpu_bo_free_kernel(&adev
->gfx
.pfp
.pfp_fw_obj
,
1436 &adev
->gfx
.pfp
.pfp_fw_gpu_addr
,
1437 (void **)&adev
->gfx
.pfp
.pfp_fw_ptr
);
1440 static void gfx_v10_0_ce_fini(struct amdgpu_device
*adev
)
1442 amdgpu_bo_free_kernel(&adev
->gfx
.ce
.ce_fw_obj
,
1443 &adev
->gfx
.ce
.ce_fw_gpu_addr
,
1444 (void **)&adev
->gfx
.ce
.ce_fw_ptr
);
1447 static void gfx_v10_0_me_fini(struct amdgpu_device
*adev
)
1449 amdgpu_bo_free_kernel(&adev
->gfx
.me
.me_fw_obj
,
1450 &adev
->gfx
.me
.me_fw_gpu_addr
,
1451 (void **)&adev
->gfx
.me
.me_fw_ptr
);
1454 static int gfx_v10_0_sw_fini(void *handle
)
1457 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1459 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1460 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
1461 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1462 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
1464 amdgpu_gfx_mqd_sw_fini(adev
);
1465 amdgpu_gfx_kiq_free_ring(&adev
->gfx
.kiq
.ring
);
1466 amdgpu_gfx_kiq_fini(adev
);
1468 gfx_v10_0_pfp_fini(adev
);
1469 gfx_v10_0_ce_fini(adev
);
1470 gfx_v10_0_me_fini(adev
);
1471 gfx_v10_0_rlc_fini(adev
);
1472 gfx_v10_0_mec_fini(adev
);
1474 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
)
1475 gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev
);
1477 gfx_v10_0_free_microcode(adev
);
1483 static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
1488 static void gfx_v10_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
,
1489 u32 sh_num
, u32 instance
)
1493 if (instance
== 0xffffffff)
1494 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
,
1495 INSTANCE_BROADCAST_WRITES
, 1);
1497 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_INDEX
,
1500 if (se_num
== 0xffffffff)
1501 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
,
1504 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1506 if (sh_num
== 0xffffffff)
1507 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SA_BROADCAST_WRITES
,
1510 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SA_INDEX
, sh_num
);
1512 WREG32_SOC15(GC
, 0, mmGRBM_GFX_INDEX
, data
);
1515 static u32
gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device
*adev
)
1519 data
= RREG32_SOC15(GC
, 0, mmCC_RB_BACKEND_DISABLE
);
1520 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_RB_BACKEND_DISABLE
);
1522 data
&= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK
;
1523 data
>>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT
;
1525 mask
= amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_backends_per_se
/
1526 adev
->gfx
.config
.max_sh_per_se
);
1528 return (~data
) & mask
;
1531 static void gfx_v10_0_setup_rb(struct amdgpu_device
*adev
)
1536 u32 rb_bitmap_width_per_sh
= adev
->gfx
.config
.max_backends_per_se
/
1537 adev
->gfx
.config
.max_sh_per_se
;
1539 mutex_lock(&adev
->grbm_idx_mutex
);
1540 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1541 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1542 gfx_v10_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1543 data
= gfx_v10_0_get_rb_active_bitmap(adev
);
1544 active_rbs
|= data
<< ((i
* adev
->gfx
.config
.max_sh_per_se
+ j
) *
1545 rb_bitmap_width_per_sh
);
1548 gfx_v10_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1549 mutex_unlock(&adev
->grbm_idx_mutex
);
1551 adev
->gfx
.config
.backend_enable_mask
= active_rbs
;
1552 adev
->gfx
.config
.num_rbs
= hweight32(active_rbs
);
1555 static u32
gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device
*adev
)
1558 uint32_t enabled_rb_per_sh
;
1559 uint32_t active_rb_bitmap
;
1560 uint32_t num_rb_per_sc
;
1561 uint32_t num_packer_per_sc
;
1562 uint32_t pa_sc_tile_steering_override
;
1565 num_sc
= adev
->gfx
.config
.max_shader_engines
* adev
->gfx
.config
.max_sh_per_se
*
1566 adev
->gfx
.config
.num_sc_per_sh
;
1567 /* init num_rb_per_sc */
1568 active_rb_bitmap
= gfx_v10_0_get_rb_active_bitmap(adev
);
1569 enabled_rb_per_sh
= hweight32(active_rb_bitmap
);
1570 num_rb_per_sc
= enabled_rb_per_sh
/ adev
->gfx
.config
.num_sc_per_sh
;
1571 /* init num_packer_per_sc */
1572 num_packer_per_sc
= adev
->gfx
.config
.num_packer_per_sc
;
1574 pa_sc_tile_steering_override
= 0;
1575 pa_sc_tile_steering_override
|=
1576 (order_base_2(num_sc
) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT
) &
1577 PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK
;
1578 pa_sc_tile_steering_override
|=
1579 (order_base_2(num_rb_per_sc
) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT
) &
1580 PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK
;
1581 pa_sc_tile_steering_override
|=
1582 (order_base_2(num_packer_per_sc
) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT
) &
1583 PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK
;
1585 return pa_sc_tile_steering_override
;
1588 #define DEFAULT_SH_MEM_BASES (0x6000)
1589 #define FIRST_COMPUTE_VMID (8)
1590 #define LAST_COMPUTE_VMID (16)
1592 static void gfx_v10_0_init_compute_vmid(struct amdgpu_device
*adev
)
1595 uint32_t sh_mem_bases
;
1598 * Configure apertures:
1599 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1600 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1601 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1603 sh_mem_bases
= DEFAULT_SH_MEM_BASES
| (DEFAULT_SH_MEM_BASES
<< 16);
1605 mutex_lock(&adev
->srbm_mutex
);
1606 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1607 nv_grbm_select(adev
, 0, 0, 0, i
);
1608 /* CP and shaders */
1609 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, DEFAULT_SH_MEM_CONFIG
);
1610 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, sh_mem_bases
);
1612 nv_grbm_select(adev
, 0, 0, 0, 0);
1613 mutex_unlock(&adev
->srbm_mutex
);
1615 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1616 acccess. These should be enabled by FW for target VMIDs. */
1617 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1618 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_BASE
, 2 * i
, 0);
1619 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
, 2 * i
, 0);
1620 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_GWS_VMID0
, i
, 0);
1621 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_OA_VMID0
, i
, 0);
1625 static void gfx_v10_0_init_gds_vmid(struct amdgpu_device
*adev
)
1630 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1631 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1632 * the driver can enable them for graphics. VMID0 should maintain
1633 * access so that HWS firmware can save/restore entries.
1635 for (vmid
= 1; vmid
< 16; vmid
++) {
1636 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_BASE
, 2 * vmid
, 0);
1637 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
, 2 * vmid
, 0);
1638 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_GWS_VMID0
, vmid
, 0);
1639 WREG32_SOC15_OFFSET(GC
, 0, mmGDS_OA_VMID0
, vmid
, 0);
1644 static void gfx_v10_0_tcp_harvest(struct amdgpu_device
*adev
)
1647 int max_wgp_per_sh
= adev
->gfx
.config
.max_cu_per_sh
>> 1;
1648 u32 tmp
, wgp_active_bitmap
= 0;
1649 u32 gcrd_targets_disable_tcp
= 0;
1650 u32 utcl_invreq_disable
= 0;
1652 * GCRD_TARGETS_DISABLE field contains
1653 * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
1654 * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]
1656 u32 gcrd_targets_disable_mask
= amdgpu_gfx_create_bitmask(
1657 2 * max_wgp_per_sh
+ /* TCP */
1658 max_wgp_per_sh
+ /* SQC */
1661 * UTCL1_UTCL0_INVREQ_DISABLE field contains
1662 * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
1663 * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]
1665 u32 utcl_invreq_disable_mask
= amdgpu_gfx_create_bitmask(
1666 2 * max_wgp_per_sh
+ /* TCP */
1667 2 * max_wgp_per_sh
+ /* SQC */
1671 if (adev
->asic_type
== CHIP_NAVI10
||
1672 adev
->asic_type
== CHIP_NAVI14
||
1673 adev
->asic_type
== CHIP_NAVI12
) {
1674 mutex_lock(&adev
->grbm_idx_mutex
);
1675 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1676 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1677 gfx_v10_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1678 wgp_active_bitmap
= gfx_v10_0_get_wgp_active_bitmap_per_sh(adev
);
1680 * Set corresponding TCP bits for the inactive WGPs in
1681 * GCRD_SA_TARGETS_DISABLE
1683 gcrd_targets_disable_tcp
= 0;
1684 /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
1685 utcl_invreq_disable
= 0;
1687 for (k
= 0; k
< max_wgp_per_sh
; k
++) {
1688 if (!(wgp_active_bitmap
& (1 << k
))) {
1689 gcrd_targets_disable_tcp
|= 3 << (2 * k
);
1690 utcl_invreq_disable
|= (3 << (2 * k
)) |
1691 (3 << (2 * (max_wgp_per_sh
+ k
)));
1695 tmp
= RREG32_SOC15(GC
, 0, mmUTCL1_UTCL0_INVREQ_DISABLE
);
1696 /* only override TCP & SQC bits */
1697 tmp
&= 0xffffffff << (4 * max_wgp_per_sh
);
1698 tmp
|= (utcl_invreq_disable
& utcl_invreq_disable_mask
);
1699 WREG32_SOC15(GC
, 0, mmUTCL1_UTCL0_INVREQ_DISABLE
, tmp
);
1701 tmp
= RREG32_SOC15(GC
, 0, mmGCRD_SA_TARGETS_DISABLE
);
1702 /* only override TCP bits */
1703 tmp
&= 0xffffffff << (2 * max_wgp_per_sh
);
1704 tmp
|= (gcrd_targets_disable_tcp
& gcrd_targets_disable_mask
);
1705 WREG32_SOC15(GC
, 0, mmGCRD_SA_TARGETS_DISABLE
, tmp
);
1709 gfx_v10_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1710 mutex_unlock(&adev
->grbm_idx_mutex
);
1714 static void gfx_v10_0_get_tcc_info(struct amdgpu_device
*adev
)
1716 /* TCCs are global (not instanced). */
1717 uint32_t tcc_disable
= RREG32_SOC15(GC
, 0, mmCGTS_TCC_DISABLE
) |
1718 RREG32_SOC15(GC
, 0, mmCGTS_USER_TCC_DISABLE
);
1720 adev
->gfx
.config
.tcc_disabled_mask
=
1721 REG_GET_FIELD(tcc_disable
, CGTS_TCC_DISABLE
, TCC_DISABLE
) |
1722 (REG_GET_FIELD(tcc_disable
, CGTS_TCC_DISABLE
, HI_TCC_DISABLE
) << 16);
1725 static void gfx_v10_0_constants_init(struct amdgpu_device
*adev
)
1730 WREG32_FIELD15(GC
, 0, GRBM_CNTL
, READ_TIMEOUT
, 0xff);
1732 gfx_v10_0_tiling_mode_table_init(adev
);
1734 gfx_v10_0_setup_rb(adev
);
1735 gfx_v10_0_get_cu_info(adev
, &adev
->gfx
.cu_info
);
1736 gfx_v10_0_get_tcc_info(adev
);
1737 adev
->gfx
.config
.pa_sc_tile_steering_override
=
1738 gfx_v10_0_init_pa_sc_tile_steering_override(adev
);
1740 /* XXX SH_MEM regs */
1741 /* where to put LDS, scratch, GPUVM in FSA64 space */
1742 mutex_lock(&adev
->srbm_mutex
);
1743 for (i
= 0; i
< adev
->vm_manager
.id_mgr
[AMDGPU_GFXHUB_0
].num_ids
; i
++) {
1744 nv_grbm_select(adev
, 0, 0, 0, i
);
1745 /* CP and shaders */
1746 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, DEFAULT_SH_MEM_CONFIG
);
1748 tmp
= REG_SET_FIELD(0, SH_MEM_BASES
, PRIVATE_BASE
,
1749 (adev
->gmc
.private_aperture_start
>> 48));
1750 tmp
= REG_SET_FIELD(tmp
, SH_MEM_BASES
, SHARED_BASE
,
1751 (adev
->gmc
.shared_aperture_start
>> 48));
1752 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, tmp
);
1755 nv_grbm_select(adev
, 0, 0, 0, 0);
1757 mutex_unlock(&adev
->srbm_mutex
);
1759 gfx_v10_0_init_compute_vmid(adev
);
1760 gfx_v10_0_init_gds_vmid(adev
);
1764 static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
1767 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
);
1769 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
,
1771 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
,
1773 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
,
1775 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
,
1778 WREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
, tmp
);
1781 static int gfx_v10_0_init_csb(struct amdgpu_device
*adev
)
1783 adev
->gfx
.rlc
.funcs
->get_csb_buffer(adev
, adev
->gfx
.rlc
.cs_ptr
);
1786 WREG32_SOC15(GC
, 0, mmRLC_CSIB_ADDR_HI
,
1787 adev
->gfx
.rlc
.clear_state_gpu_addr
>> 32);
1788 WREG32_SOC15(GC
, 0, mmRLC_CSIB_ADDR_LO
,
1789 adev
->gfx
.rlc
.clear_state_gpu_addr
& 0xfffffffc);
1790 WREG32_SOC15(GC
, 0, mmRLC_CSIB_LENGTH
, adev
->gfx
.rlc
.clear_state_size
);
1795 void gfx_v10_0_rlc_stop(struct amdgpu_device
*adev
)
1797 u32 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
1799 tmp
= REG_SET_FIELD(tmp
, RLC_CNTL
, RLC_ENABLE_F32
, 0);
1800 WREG32_SOC15(GC
, 0, mmRLC_CNTL
, tmp
);
1803 static void gfx_v10_0_rlc_reset(struct amdgpu_device
*adev
)
1805 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
1807 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
1811 static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device
*adev
,
1814 uint32_t rlc_pg_cntl
;
1816 rlc_pg_cntl
= RREG32_SOC15(GC
, 0, mmRLC_PG_CNTL
);
1819 /* RLC_PG_CNTL[23] = 0 (default)
1820 * RLC will wait for handshake acks with SMU
1821 * GFXOFF will be enabled
1822 * RLC_PG_CNTL[23] = 1
1823 * RLC will not issue any message to SMU
1824 * hence no handshake between SMU & RLC
1825 * GFXOFF will be disabled
1827 rlc_pg_cntl
|= 0x800000;
1829 rlc_pg_cntl
&= ~0x800000;
1830 WREG32_SOC15(GC
, 0, mmRLC_PG_CNTL
, rlc_pg_cntl
);
1833 static void gfx_v10_0_rlc_start(struct amdgpu_device
*adev
)
1835 /* TODO: enable rlc & smu handshake until smu
1836 * and gfxoff feature works as expected */
1837 if (!(amdgpu_pp_feature_mask
& PP_GFXOFF_MASK
))
1838 gfx_v10_0_rlc_smu_handshake_cntl(adev
, false);
1840 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 1);
1844 static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device
*adev
)
1848 /* enable Save Restore Machine */
1849 tmp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
));
1850 tmp
|= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK
;
1851 tmp
|= RLC_SRM_CNTL__SRM_ENABLE_MASK
;
1852 WREG32(SOC15_REG_OFFSET(GC
, 0, mmRLC_SRM_CNTL
), tmp
);
1855 static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device
*adev
)
1857 const struct rlc_firmware_header_v2_0
*hdr
;
1858 const __le32
*fw_data
;
1859 unsigned i
, fw_size
;
1861 if (!adev
->gfx
.rlc_fw
)
1864 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
1865 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
1867 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
1868 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1869 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
1871 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
,
1872 RLCG_UCODE_LOADING_START_ADDRESS
);
1874 for (i
= 0; i
< fw_size
; i
++)
1875 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_DATA
,
1876 le32_to_cpup(fw_data
++));
1878 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
, adev
->gfx
.rlc_fw_version
);
1883 static int gfx_v10_0_rlc_resume(struct amdgpu_device
*adev
)
1887 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
1889 r
= gfx_v10_0_wait_for_rlc_autoload_complete(adev
);
1893 gfx_v10_0_init_csb(adev
);
1895 if (!amdgpu_sriov_vf(adev
)) /* enable RLC SRM */
1896 gfx_v10_0_rlc_enable_srm(adev
);
1898 adev
->gfx
.rlc
.funcs
->stop(adev
);
1901 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, 0);
1904 WREG32_SOC15(GC
, 0, mmRLC_PG_CNTL
, 0);
1906 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
1907 /* legacy rlc firmware loading */
1908 r
= gfx_v10_0_rlc_load_microcode(adev
);
1911 } else if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
1912 /* rlc backdoor autoload firmware */
1913 r
= gfx_v10_0_rlc_backdoor_autoload_enable(adev
);
1918 gfx_v10_0_init_csb(adev
);
1920 adev
->gfx
.rlc
.funcs
->start(adev
);
1922 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
1923 r
= gfx_v10_0_wait_for_rlc_autoload_complete(adev
);
1933 unsigned int offset
;
1935 } rlc_autoload_info
[FIRMWARE_ID_MAX
];
1937 static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device
*adev
)
1940 RLC_TABLE_OF_CONTENT
*rlc_toc
;
1942 ret
= amdgpu_bo_create_reserved(adev
, adev
->psp
.toc_bin_size
, PAGE_SIZE
,
1943 AMDGPU_GEM_DOMAIN_GTT
,
1944 &adev
->gfx
.rlc
.rlc_toc_bo
,
1945 &adev
->gfx
.rlc
.rlc_toc_gpu_addr
,
1946 (void **)&adev
->gfx
.rlc
.rlc_toc_buf
);
1948 dev_err(adev
->dev
, "(%d) failed to create rlc toc bo\n", ret
);
1952 /* Copy toc from psp sos fw to rlc toc buffer */
1953 memcpy(adev
->gfx
.rlc
.rlc_toc_buf
, adev
->psp
.toc_start_addr
, adev
->psp
.toc_bin_size
);
1955 rlc_toc
= (RLC_TABLE_OF_CONTENT
*)adev
->gfx
.rlc
.rlc_toc_buf
;
1956 while (rlc_toc
&& (rlc_toc
->id
> FIRMWARE_ID_INVALID
) &&
1957 (rlc_toc
->id
< FIRMWARE_ID_MAX
)) {
1958 if ((rlc_toc
->id
>= FIRMWARE_ID_CP_CE
) &&
1959 (rlc_toc
->id
<= FIRMWARE_ID_CP_MES
)) {
1960 /* Offset needs 4KB alignment */
1961 rlc_toc
->offset
= ALIGN(rlc_toc
->offset
* 4, PAGE_SIZE
);
1964 rlc_autoload_info
[rlc_toc
->id
].id
= rlc_toc
->id
;
1965 rlc_autoload_info
[rlc_toc
->id
].offset
= rlc_toc
->offset
* 4;
1966 rlc_autoload_info
[rlc_toc
->id
].size
= rlc_toc
->size
* 4;
1974 static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device
*adev
)
1976 uint32_t total_size
= 0;
1980 ret
= gfx_v10_0_parse_rlc_toc(adev
);
1982 dev_err(adev
->dev
, "failed to parse rlc toc\n");
1986 for (id
= FIRMWARE_ID_RLC_G_UCODE
; id
< FIRMWARE_ID_MAX
; id
++)
1987 total_size
+= rlc_autoload_info
[id
].size
;
1989 /* In case the offset in rlc toc ucode is aligned */
1990 if (total_size
< rlc_autoload_info
[FIRMWARE_ID_MAX
-1].offset
)
1991 total_size
= rlc_autoload_info
[FIRMWARE_ID_MAX
-1].offset
+
1992 rlc_autoload_info
[FIRMWARE_ID_MAX
-1].size
;
1997 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device
*adev
)
2000 uint32_t total_size
;
2002 total_size
= gfx_v10_0_calc_toc_total_size(adev
);
2004 r
= amdgpu_bo_create_reserved(adev
, total_size
, PAGE_SIZE
,
2005 AMDGPU_GEM_DOMAIN_GTT
,
2006 &adev
->gfx
.rlc
.rlc_autoload_bo
,
2007 &adev
->gfx
.rlc
.rlc_autoload_gpu_addr
,
2008 (void **)&adev
->gfx
.rlc
.rlc_autoload_ptr
);
2010 dev_err(adev
->dev
, "(%d) failed to create fw autoload bo\n", r
);
2017 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device
*adev
)
2019 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.rlc_toc_bo
,
2020 &adev
->gfx
.rlc
.rlc_toc_gpu_addr
,
2021 (void **)&adev
->gfx
.rlc
.rlc_toc_buf
);
2022 amdgpu_bo_free_kernel(&adev
->gfx
.rlc
.rlc_autoload_bo
,
2023 &adev
->gfx
.rlc
.rlc_autoload_gpu_addr
,
2024 (void **)&adev
->gfx
.rlc
.rlc_autoload_ptr
);
2027 static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device
*adev
,
2029 const void *fw_data
,
2032 uint32_t toc_offset
;
2033 uint32_t toc_fw_size
;
2034 char *ptr
= adev
->gfx
.rlc
.rlc_autoload_ptr
;
2036 if (id
<= FIRMWARE_ID_INVALID
|| id
>= FIRMWARE_ID_MAX
)
2039 toc_offset
= rlc_autoload_info
[id
].offset
;
2040 toc_fw_size
= rlc_autoload_info
[id
].size
;
2043 fw_size
= toc_fw_size
;
2045 if (fw_size
> toc_fw_size
)
2046 fw_size
= toc_fw_size
;
2048 memcpy(ptr
+ toc_offset
, fw_data
, fw_size
);
2050 if (fw_size
< toc_fw_size
)
2051 memset(ptr
+ toc_offset
+ fw_size
, 0, toc_fw_size
- fw_size
);
2054 static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device
*adev
)
2059 data
= adev
->gfx
.rlc
.rlc_toc_buf
;
2060 size
= rlc_autoload_info
[FIRMWARE_ID_RLC_TOC
].size
;
2062 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2063 FIRMWARE_ID_RLC_TOC
,
2067 static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device
*adev
)
2069 const __le32
*fw_data
;
2071 const struct gfx_firmware_header_v1_0
*cp_hdr
;
2072 const struct rlc_firmware_header_v2_0
*rlc_hdr
;
2075 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2076 adev
->gfx
.pfp_fw
->data
;
2077 fw_data
= (const __le32
*)(adev
->gfx
.pfp_fw
->data
+
2078 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2079 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
);
2080 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2085 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2086 adev
->gfx
.ce_fw
->data
;
2087 fw_data
= (const __le32
*)(adev
->gfx
.ce_fw
->data
+
2088 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2089 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
);
2090 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2095 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2096 adev
->gfx
.me_fw
->data
;
2097 fw_data
= (const __le32
*)(adev
->gfx
.me_fw
->data
+
2098 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2099 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
);
2100 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2105 rlc_hdr
= (const struct rlc_firmware_header_v2_0
*)
2106 adev
->gfx
.rlc_fw
->data
;
2107 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
2108 le32_to_cpu(rlc_hdr
->header
.ucode_array_offset_bytes
));
2109 fw_size
= le32_to_cpu(rlc_hdr
->header
.ucode_size_bytes
);
2110 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2111 FIRMWARE_ID_RLC_G_UCODE
,
2115 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2116 adev
->gfx
.mec_fw
->data
;
2117 fw_data
= (const __le32
*) (adev
->gfx
.mec_fw
->data
+
2118 le32_to_cpu(cp_hdr
->header
.ucode_array_offset_bytes
));
2119 fw_size
= le32_to_cpu(cp_hdr
->header
.ucode_size_bytes
) -
2120 cp_hdr
->jt_size
* 4;
2121 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2124 /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */
2127 /* Temporarily put sdma part here */
2128 static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device
*adev
)
2130 const __le32
*fw_data
;
2132 const struct sdma_firmware_header_v1_0
*sdma_hdr
;
2135 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
2136 sdma_hdr
= (const struct sdma_firmware_header_v1_0
*)
2137 adev
->sdma
.instance
[i
].fw
->data
;
2138 fw_data
= (const __le32
*) (adev
->sdma
.instance
[i
].fw
->data
+
2139 le32_to_cpu(sdma_hdr
->header
.ucode_array_offset_bytes
));
2140 fw_size
= le32_to_cpu(sdma_hdr
->header
.ucode_size_bytes
);
2143 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2144 FIRMWARE_ID_SDMA0_UCODE
, fw_data
, fw_size
);
2145 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2146 FIRMWARE_ID_SDMA0_JT
,
2147 (uint32_t *)fw_data
+
2148 sdma_hdr
->jt_offset
,
2149 sdma_hdr
->jt_size
* 4);
2150 } else if (i
== 1) {
2151 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2152 FIRMWARE_ID_SDMA1_UCODE
, fw_data
, fw_size
);
2153 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev
,
2154 FIRMWARE_ID_SDMA1_JT
,
2155 (uint32_t *)fw_data
+
2156 sdma_hdr
->jt_offset
,
2157 sdma_hdr
->jt_size
* 4);
2162 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device
*adev
)
2164 uint32_t rlc_g_offset
, rlc_g_size
, tmp
;
2167 gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev
);
2168 gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev
);
2169 gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev
);
2171 rlc_g_offset
= rlc_autoload_info
[FIRMWARE_ID_RLC_G_UCODE
].offset
;
2172 rlc_g_size
= rlc_autoload_info
[FIRMWARE_ID_RLC_G_UCODE
].size
;
2173 gpu_addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+ rlc_g_offset
;
2175 WREG32_SOC15(GC
, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI
, upper_32_bits(gpu_addr
));
2176 WREG32_SOC15(GC
, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO
, lower_32_bits(gpu_addr
));
2177 WREG32_SOC15(GC
, 0, mmRLC_HYP_BOOTLOAD_SIZE
, rlc_g_size
);
2179 tmp
= RREG32_SOC15(GC
, 0, mmRLC_HYP_RESET_VECTOR
);
2180 if (!(tmp
& (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK
|
2181 RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK
))) {
2182 DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n");
2186 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
2187 if (tmp
& RLC_CNTL__RLC_ENABLE_F32_MASK
) {
2188 DRM_ERROR("RLC ROM should halt itself\n");
2195 static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device
*adev
)
2197 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2202 /* Trigger an invalidation of the L1 instruction caches */
2203 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2204 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2205 WREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
, tmp
);
2207 /* Wait for invalidation complete */
2208 for (i
= 0; i
< usec_timeout
; i
++) {
2209 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2210 if (1 == REG_GET_FIELD(tmp
, CP_ME_IC_OP_CNTL
,
2211 INVALIDATE_CACHE_COMPLETE
))
2216 if (i
>= usec_timeout
) {
2217 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2221 /* Program me ucode address into intruction cache address register */
2222 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2223 rlc_autoload_info
[FIRMWARE_ID_CP_ME
].offset
;
2224 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_LO
,
2225 lower_32_bits(addr
) & 0xFFFFF000);
2226 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_HI
,
2227 upper_32_bits(addr
));
2232 static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device
*adev
)
2234 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2239 /* Trigger an invalidation of the L1 instruction caches */
2240 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2241 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2242 WREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
, tmp
);
2244 /* Wait for invalidation complete */
2245 for (i
= 0; i
< usec_timeout
; i
++) {
2246 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2247 if (1 == REG_GET_FIELD(tmp
, CP_CE_IC_OP_CNTL
,
2248 INVALIDATE_CACHE_COMPLETE
))
2253 if (i
>= usec_timeout
) {
2254 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2258 /* Program ce ucode address into intruction cache address register */
2259 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2260 rlc_autoload_info
[FIRMWARE_ID_CP_CE
].offset
;
2261 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_LO
,
2262 lower_32_bits(addr
) & 0xFFFFF000);
2263 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_HI
,
2264 upper_32_bits(addr
));
2269 static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device
*adev
)
2271 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2276 /* Trigger an invalidation of the L1 instruction caches */
2277 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2278 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2279 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
, tmp
);
2281 /* Wait for invalidation complete */
2282 for (i
= 0; i
< usec_timeout
; i
++) {
2283 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2284 if (1 == REG_GET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
,
2285 INVALIDATE_CACHE_COMPLETE
))
2290 if (i
>= usec_timeout
) {
2291 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2295 /* Program pfp ucode address into intruction cache address register */
2296 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2297 rlc_autoload_info
[FIRMWARE_ID_CP_PFP
].offset
;
2298 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_LO
,
2299 lower_32_bits(addr
) & 0xFFFFF000);
2300 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_HI
,
2301 upper_32_bits(addr
));
2306 static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device
*adev
)
2308 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2313 /* Trigger an invalidation of the L1 instruction caches */
2314 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2315 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2316 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
, tmp
);
2318 /* Wait for invalidation complete */
2319 for (i
= 0; i
< usec_timeout
; i
++) {
2320 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2321 if (1 == REG_GET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
,
2322 INVALIDATE_CACHE_COMPLETE
))
2327 if (i
>= usec_timeout
) {
2328 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2332 /* Program mec1 ucode address into intruction cache address register */
2333 addr
= adev
->gfx
.rlc
.rlc_autoload_gpu_addr
+
2334 rlc_autoload_info
[FIRMWARE_ID_CP_MEC
].offset
;
2335 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_LO
,
2336 lower_32_bits(addr
) & 0xFFFFF000);
2337 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_HI
,
2338 upper_32_bits(addr
));
2343 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device
*adev
)
2346 uint32_t bootload_status
;
2349 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2350 cp_status
= RREG32_SOC15(GC
, 0, mmCP_STAT
);
2351 bootload_status
= RREG32_SOC15(GC
, 0, mmRLC_RLCS_BOOTLOAD_STATUS
);
2352 if ((cp_status
== 0) &&
2353 (REG_GET_FIELD(bootload_status
,
2354 RLC_RLCS_BOOTLOAD_STATUS
, BOOTLOAD_COMPLETE
) == 1)) {
2360 if (i
>= adev
->usec_timeout
) {
2361 dev_err(adev
->dev
, "rlc autoload: gc ucode autoload timeout\n");
2365 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO
) {
2366 r
= gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev
);
2370 r
= gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev
);
2374 r
= gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev
);
2378 r
= gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev
);
2386 static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
2389 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_CNTL
);
2391 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, enable
? 0 : 1);
2392 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, enable
? 0 : 1);
2393 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, enable
? 0 : 1);
2395 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
2396 adev
->gfx
.gfx_ring
[i
].sched
.ready
= false;
2398 WREG32_SOC15(GC
, 0, mmCP_ME_CNTL
, tmp
);
2400 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2401 if (RREG32_SOC15(GC
, 0, mmCP_STAT
) == 0)
2406 if (i
>= adev
->usec_timeout
)
2407 DRM_ERROR("failed to %s cp gfx\n", enable
? "unhalt" : "halt");
2412 static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device
*adev
)
2415 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
2416 const __le32
*fw_data
;
2417 unsigned i
, fw_size
;
2419 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2421 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
2422 adev
->gfx
.pfp_fw
->data
;
2424 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
2426 fw_data
= (const __le32
*)(adev
->gfx
.pfp_fw
->data
+
2427 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
2428 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
);
2430 r
= amdgpu_bo_create_reserved(adev
, pfp_hdr
->header
.ucode_size_bytes
,
2431 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
2432 &adev
->gfx
.pfp
.pfp_fw_obj
,
2433 &adev
->gfx
.pfp
.pfp_fw_gpu_addr
,
2434 (void **)&adev
->gfx
.pfp
.pfp_fw_ptr
);
2436 dev_err(adev
->dev
, "(%d) failed to create pfp fw bo\n", r
);
2437 gfx_v10_0_pfp_fini(adev
);
2441 memcpy(adev
->gfx
.pfp
.pfp_fw_ptr
, fw_data
, fw_size
);
2443 amdgpu_bo_kunmap(adev
->gfx
.pfp
.pfp_fw_obj
);
2444 amdgpu_bo_unreserve(adev
->gfx
.pfp
.pfp_fw_obj
);
2446 /* Trigger an invalidation of the L1 instruction caches */
2447 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2448 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2449 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
, tmp
);
2451 /* Wait for invalidation complete */
2452 for (i
= 0; i
< usec_timeout
; i
++) {
2453 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_OP_CNTL
);
2454 if (1 == REG_GET_FIELD(tmp
, CP_PFP_IC_OP_CNTL
,
2455 INVALIDATE_CACHE_COMPLETE
))
2460 if (i
>= usec_timeout
) {
2461 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2465 if (amdgpu_emu_mode
== 1)
2466 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2468 tmp
= RREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_CNTL
);
2469 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, VMID
, 0);
2470 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2471 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2472 tmp
= REG_SET_FIELD(tmp
, CP_PFP_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2473 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_CNTL
, tmp
);
2474 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_LO
,
2475 adev
->gfx
.pfp
.pfp_fw_gpu_addr
& 0xFFFFF000);
2476 WREG32_SOC15(GC
, 0, mmCP_PFP_IC_BASE_HI
,
2477 upper_32_bits(adev
->gfx
.pfp
.pfp_fw_gpu_addr
));
2482 static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device
*adev
)
2485 const struct gfx_firmware_header_v1_0
*ce_hdr
;
2486 const __le32
*fw_data
;
2487 unsigned i
, fw_size
;
2489 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2491 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
2492 adev
->gfx
.ce_fw
->data
;
2494 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
2496 fw_data
= (const __le32
*)(adev
->gfx
.ce_fw
->data
+
2497 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
2498 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
);
2500 r
= amdgpu_bo_create_reserved(adev
, ce_hdr
->header
.ucode_size_bytes
,
2501 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
2502 &adev
->gfx
.ce
.ce_fw_obj
,
2503 &adev
->gfx
.ce
.ce_fw_gpu_addr
,
2504 (void **)&adev
->gfx
.ce
.ce_fw_ptr
);
2506 dev_err(adev
->dev
, "(%d) failed to create ce fw bo\n", r
);
2507 gfx_v10_0_ce_fini(adev
);
2511 memcpy(adev
->gfx
.ce
.ce_fw_ptr
, fw_data
, fw_size
);
2513 amdgpu_bo_kunmap(adev
->gfx
.ce
.ce_fw_obj
);
2514 amdgpu_bo_unreserve(adev
->gfx
.ce
.ce_fw_obj
);
2516 /* Trigger an invalidation of the L1 instruction caches */
2517 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2518 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2519 WREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
, tmp
);
2521 /* Wait for invalidation complete */
2522 for (i
= 0; i
< usec_timeout
; i
++) {
2523 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_OP_CNTL
);
2524 if (1 == REG_GET_FIELD(tmp
, CP_CE_IC_OP_CNTL
,
2525 INVALIDATE_CACHE_COMPLETE
))
2530 if (i
>= usec_timeout
) {
2531 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2535 if (amdgpu_emu_mode
== 1)
2536 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2538 tmp
= RREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_CNTL
);
2539 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, VMID
, 0);
2540 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2541 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2542 tmp
= REG_SET_FIELD(tmp
, CP_CE_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2543 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_LO
,
2544 adev
->gfx
.ce
.ce_fw_gpu_addr
& 0xFFFFF000);
2545 WREG32_SOC15(GC
, 0, mmCP_CE_IC_BASE_HI
,
2546 upper_32_bits(adev
->gfx
.ce
.ce_fw_gpu_addr
));
2551 static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device
*adev
)
2554 const struct gfx_firmware_header_v1_0
*me_hdr
;
2555 const __le32
*fw_data
;
2556 unsigned i
, fw_size
;
2558 uint32_t usec_timeout
= 50000; /* wait for 50ms */
2560 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
2561 adev
->gfx
.me_fw
->data
;
2563 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
2565 fw_data
= (const __le32
*)(adev
->gfx
.me_fw
->data
+
2566 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
2567 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
);
2569 r
= amdgpu_bo_create_reserved(adev
, me_hdr
->header
.ucode_size_bytes
,
2570 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GTT
,
2571 &adev
->gfx
.me
.me_fw_obj
,
2572 &adev
->gfx
.me
.me_fw_gpu_addr
,
2573 (void **)&adev
->gfx
.me
.me_fw_ptr
);
2575 dev_err(adev
->dev
, "(%d) failed to create me fw bo\n", r
);
2576 gfx_v10_0_me_fini(adev
);
2580 memcpy(adev
->gfx
.me
.me_fw_ptr
, fw_data
, fw_size
);
2582 amdgpu_bo_kunmap(adev
->gfx
.me
.me_fw_obj
);
2583 amdgpu_bo_unreserve(adev
->gfx
.me
.me_fw_obj
);
2585 /* Trigger an invalidation of the L1 instruction caches */
2586 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2587 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2588 WREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
, tmp
);
2590 /* Wait for invalidation complete */
2591 for (i
= 0; i
< usec_timeout
; i
++) {
2592 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_OP_CNTL
);
2593 if (1 == REG_GET_FIELD(tmp
, CP_ME_IC_OP_CNTL
,
2594 INVALIDATE_CACHE_COMPLETE
))
2599 if (i
>= usec_timeout
) {
2600 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2604 if (amdgpu_emu_mode
== 1)
2605 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2607 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_CNTL
);
2608 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, VMID
, 0);
2609 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2610 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2611 tmp
= REG_SET_FIELD(tmp
, CP_ME_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2612 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_LO
,
2613 adev
->gfx
.me
.me_fw_gpu_addr
& 0xFFFFF000);
2614 WREG32_SOC15(GC
, 0, mmCP_ME_IC_BASE_HI
,
2615 upper_32_bits(adev
->gfx
.me
.me_fw_gpu_addr
));
2620 static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
2624 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
2627 gfx_v10_0_cp_gfx_enable(adev
, false);
2629 r
= gfx_v10_0_cp_gfx_load_pfp_microcode(adev
);
2631 dev_err(adev
->dev
, "(%d) failed to load pfp fw\n", r
);
2635 r
= gfx_v10_0_cp_gfx_load_ce_microcode(adev
);
2637 dev_err(adev
->dev
, "(%d) failed to load ce fw\n", r
);
2641 r
= gfx_v10_0_cp_gfx_load_me_microcode(adev
);
2643 dev_err(adev
->dev
, "(%d) failed to load me fw\n", r
);
2650 static int gfx_v10_0_cp_gfx_start(struct amdgpu_device
*adev
)
2652 struct amdgpu_ring
*ring
;
2653 const struct cs_section_def
*sect
= NULL
;
2654 const struct cs_extent_def
*ext
= NULL
;
2659 WREG32_SOC15(GC
, 0, mmCP_MAX_CONTEXT
,
2660 adev
->gfx
.config
.max_hw_contexts
- 1);
2661 WREG32_SOC15(GC
, 0, mmCP_DEVICE_ID
, 1);
2663 gfx_v10_0_cp_gfx_enable(adev
, true);
2665 ring
= &adev
->gfx
.gfx_ring
[0];
2666 r
= amdgpu_ring_alloc(ring
, gfx_v10_0_get_csb_size(adev
) + 4);
2668 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
2672 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2673 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2675 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
2676 amdgpu_ring_write(ring
, 0x80000000);
2677 amdgpu_ring_write(ring
, 0x80000000);
2679 for (sect
= gfx10_cs_data
; sect
->section
!= NULL
; ++sect
) {
2680 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
2681 if (sect
->id
== SECT_CONTEXT
) {
2682 amdgpu_ring_write(ring
,
2683 PACKET3(PACKET3_SET_CONTEXT_REG
,
2685 amdgpu_ring_write(ring
, ext
->reg_index
-
2686 PACKET3_SET_CONTEXT_REG_START
);
2687 for (i
= 0; i
< ext
->reg_count
; i
++)
2688 amdgpu_ring_write(ring
, ext
->extent
[i
]);
2694 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_TILE_STEERING_OVERRIDE
) - PACKET3_SET_CONTEXT_REG_START
;
2695 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 1));
2696 amdgpu_ring_write(ring
, ctx_reg_offset
);
2697 amdgpu_ring_write(ring
, adev
->gfx
.config
.pa_sc_tile_steering_override
);
2699 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2700 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2702 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2703 amdgpu_ring_write(ring
, 0);
2705 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
2706 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
2707 amdgpu_ring_write(ring
, 0x8000);
2708 amdgpu_ring_write(ring
, 0x8000);
2710 amdgpu_ring_commit(ring
);
2712 /* submit cs packet to copy state 0 to next available state */
2713 ring
= &adev
->gfx
.gfx_ring
[1];
2714 r
= amdgpu_ring_alloc(ring
, 2);
2716 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
2720 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2721 amdgpu_ring_write(ring
, 0);
2723 amdgpu_ring_commit(ring
);
2728 static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device
*adev
,
2733 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_GFX_CNTL
);
2734 tmp
= REG_SET_FIELD(tmp
, GRBM_GFX_CNTL
, PIPEID
, pipe
);
2736 WREG32_SOC15(GC
, 0, mmGRBM_GFX_CNTL
, tmp
);
2739 static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device
*adev
,
2740 struct amdgpu_ring
*ring
)
2744 tmp
= RREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
);
2745 if (ring
->use_doorbell
) {
2746 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2747 DOORBELL_OFFSET
, ring
->doorbell_index
);
2748 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2751 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
2754 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
, tmp
);
2755 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
2756 DOORBELL_RANGE_LOWER
, ring
->doorbell_index
);
2757 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_LOWER
, tmp
);
2759 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_UPPER
,
2760 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
2763 static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device
*adev
)
2765 struct amdgpu_ring
*ring
;
2768 u64 rb_addr
, rptr_addr
, wptr_gpu_addr
;
2771 /* Set the write pointer delay */
2772 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_DELAY
, 0);
2774 /* set the RB to use vmid 0 */
2775 WREG32_SOC15(GC
, 0, mmCP_RB_VMID
, 0);
2777 /* Init gfx ring 0 for pipe 0 */
2778 mutex_lock(&adev
->srbm_mutex
);
2779 gfx_v10_0_cp_gfx_switch_pipe(adev
, PIPE_ID0
);
2781 /* Set ring buffer size */
2782 ring
= &adev
->gfx
.gfx_ring
[0];
2783 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2784 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
2785 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
2787 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
2789 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2791 /* Initialize the ring buffer's write pointers */
2793 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
2794 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
2796 /* set the wb address wether it's enabled or not */
2797 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2798 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR
, lower_32_bits(rptr_addr
));
2799 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) &
2800 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
2802 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2803 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
,
2804 lower_32_bits(wptr_gpu_addr
));
2805 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
,
2806 upper_32_bits(wptr_gpu_addr
));
2809 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
2811 rb_addr
= ring
->gpu_addr
>> 8;
2812 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE
, rb_addr
);
2813 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
2815 WREG32_SOC15(GC
, 0, mmCP_RB_ACTIVE
, 1);
2817 gfx_v10_0_cp_gfx_set_doorbell(adev
, ring
);
2818 mutex_unlock(&adev
->srbm_mutex
);
2820 /* Init gfx ring 1 for pipe 1 */
2821 mutex_lock(&adev
->srbm_mutex
);
2822 gfx_v10_0_cp_gfx_switch_pipe(adev
, PIPE_ID1
);
2823 ring
= &adev
->gfx
.gfx_ring
[1];
2824 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
2825 tmp
= REG_SET_FIELD(0, CP_RB1_CNTL
, RB_BUFSZ
, rb_bufsz
);
2826 tmp
= REG_SET_FIELD(tmp
, CP_RB1_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
2827 WREG32_SOC15(GC
, 0, mmCP_RB1_CNTL
, tmp
);
2828 /* Initialize the ring buffer's write pointers */
2830 WREG32_SOC15(GC
, 0, mmCP_RB1_WPTR
, lower_32_bits(ring
->wptr
));
2831 WREG32_SOC15(GC
, 0, mmCP_RB1_WPTR_HI
, upper_32_bits(ring
->wptr
));
2832 /* Set the wb address wether it's enabled or not */
2833 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
2834 WREG32_SOC15(GC
, 0, mmCP_RB1_RPTR_ADDR
, lower_32_bits(rptr_addr
));
2835 WREG32_SOC15(GC
, 0, mmCP_RB1_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) &
2836 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
2837 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
2838 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
,
2839 lower_32_bits(wptr_gpu_addr
));
2840 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
,
2841 upper_32_bits(wptr_gpu_addr
));
2844 WREG32_SOC15(GC
, 0, mmCP_RB1_CNTL
, tmp
);
2846 rb_addr
= ring
->gpu_addr
>> 8;
2847 WREG32_SOC15(GC
, 0, mmCP_RB1_BASE
, rb_addr
);
2848 WREG32_SOC15(GC
, 0, mmCP_RB1_BASE_HI
, upper_32_bits(rb_addr
));
2849 WREG32_SOC15(GC
, 0, mmCP_RB1_ACTIVE
, 1);
2851 gfx_v10_0_cp_gfx_set_doorbell(adev
, ring
);
2852 mutex_unlock(&adev
->srbm_mutex
);
2854 /* Switch to pipe 0 */
2855 mutex_lock(&adev
->srbm_mutex
);
2856 gfx_v10_0_cp_gfx_switch_pipe(adev
, PIPE_ID0
);
2857 mutex_unlock(&adev
->srbm_mutex
);
2859 /* start the ring */
2860 gfx_v10_0_cp_gfx_start(adev
);
2862 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
2863 ring
= &adev
->gfx
.gfx_ring
[i
];
2864 ring
->sched
.ready
= true;
2870 static void gfx_v10_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
2875 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
, 0);
2877 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
,
2878 (CP_MEC_CNTL__MEC_ME1_HALT_MASK
|
2879 CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
2880 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
2881 adev
->gfx
.compute_ring
[i
].sched
.ready
= false;
2882 adev
->gfx
.kiq
.ring
.sched
.ready
= false;
2887 static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
2889 const struct gfx_firmware_header_v1_0
*mec_hdr
;
2890 const __le32
*fw_data
;
2893 u32 usec_timeout
= 50000; /* Wait for 50 ms */
2895 if (!adev
->gfx
.mec_fw
)
2898 gfx_v10_0_cp_compute_enable(adev
, false);
2900 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
2901 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
2903 fw_data
= (const __le32
*)
2904 (adev
->gfx
.mec_fw
->data
+
2905 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
2907 /* Trigger an invalidation of the L1 instruction caches */
2908 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2909 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
, INVALIDATE_CACHE
, 1);
2910 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
, tmp
);
2912 /* Wait for invalidation complete */
2913 for (i
= 0; i
< usec_timeout
; i
++) {
2914 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_OP_CNTL
);
2915 if (1 == REG_GET_FIELD(tmp
, CP_CPC_IC_OP_CNTL
,
2916 INVALIDATE_CACHE_COMPLETE
))
2921 if (i
>= usec_timeout
) {
2922 dev_err(adev
->dev
, "failed to invalidate instruction cache\n");
2926 if (amdgpu_emu_mode
== 1)
2927 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
2929 tmp
= RREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_CNTL
);
2930 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, CACHE_POLICY
, 0);
2931 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, EXE_DISABLE
, 0);
2932 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, ADDRESS_CLAMP
, 1);
2933 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_CNTL
, tmp
);
2935 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_LO
, adev
->gfx
.mec
.mec_fw_gpu_addr
&
2937 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_HI
,
2938 upper_32_bits(adev
->gfx
.mec
.mec_fw_gpu_addr
));
2941 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
, 0);
2943 for (i
= 0; i
< mec_hdr
->jt_size
; i
++)
2944 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_DATA
,
2945 le32_to_cpup(fw_data
+ mec_hdr
->jt_offset
+ i
));
2947 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
, adev
->gfx
.mec_fw_version
);
2950 * TODO: Loading MEC2 firmware is only necessary if MEC2 should run
2951 * different microcode than MEC1.
2957 static void gfx_v10_0_kiq_setting(struct amdgpu_ring
*ring
)
2960 struct amdgpu_device
*adev
= ring
->adev
;
2962 /* tell RLC which is KIQ queue */
2963 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
);
2965 tmp
|= (ring
->me
<< 5) | (ring
->pipe
<< 3) | (ring
->queue
);
2966 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
2968 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
2971 static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring
*ring
)
2973 struct amdgpu_device
*adev
= ring
->adev
;
2974 struct v10_gfx_mqd
*mqd
= ring
->mqd_ptr
;
2975 uint64_t hqd_gpu_addr
, wb_gpu_addr
;
2979 /* set up gfx hqd wptr */
2980 mqd
->cp_gfx_hqd_wptr
= 0;
2981 mqd
->cp_gfx_hqd_wptr_hi
= 0;
2983 /* set the pointer to the MQD */
2984 mqd
->cp_mqd_base_addr
= ring
->mqd_gpu_addr
& 0xfffffffc;
2985 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
2987 /* set up mqd control */
2988 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_MQD_CONTROL
);
2989 tmp
= REG_SET_FIELD(tmp
, CP_GFX_MQD_CONTROL
, VMID
, 0);
2990 tmp
= REG_SET_FIELD(tmp
, CP_GFX_MQD_CONTROL
, PRIV_STATE
, 1);
2991 tmp
= REG_SET_FIELD(tmp
, CP_GFX_MQD_CONTROL
, CACHE_POLICY
, 0);
2992 mqd
->cp_gfx_mqd_control
= tmp
;
2994 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
2995 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_VMID
);
2996 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_VMID
, VMID
, 0);
2997 mqd
->cp_gfx_hqd_vmid
= 0;
2999 /* set up default queue priority level
3000 * 0x0 = low priority, 0x1 = high priority */
3001 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUEUE_PRIORITY
);
3002 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_QUEUE_PRIORITY
, PRIORITY_LEVEL
, 0);
3003 mqd
->cp_gfx_hqd_queue_priority
= tmp
;
3005 /* set up time quantum */
3006 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUANTUM
);
3007 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_QUANTUM
, QUANTUM_EN
, 1);
3008 mqd
->cp_gfx_hqd_quantum
= tmp
;
3010 /* set up gfx hqd base. this is similar as CP_RB_BASE */
3011 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
3012 mqd
->cp_gfx_hqd_base
= hqd_gpu_addr
;
3013 mqd
->cp_gfx_hqd_base_hi
= upper_32_bits(hqd_gpu_addr
);
3015 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3016 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
3017 mqd
->cp_gfx_hqd_rptr_addr
= wb_gpu_addr
& 0xfffffffc;
3018 mqd
->cp_gfx_hqd_rptr_addr_hi
=
3019 upper_32_bits(wb_gpu_addr
) & 0xffff;
3021 /* set up rb_wptr_poll addr */
3022 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
3023 mqd
->cp_rb_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3024 mqd
->cp_rb_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3026 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3027 rb_bufsz
= order_base_2(ring
->ring_size
/ 4) - 1;
3028 tmp
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_CNTL
);
3029 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_CNTL
, RB_BUFSZ
, rb_bufsz
);
3030 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
3032 tmp
= REG_SET_FIELD(tmp
, CP_GFX_HQD_CNTL
, BUF_SWAP
, 1);
3034 mqd
->cp_gfx_hqd_cntl
= tmp
;
3036 /* set up cp_doorbell_control */
3037 tmp
= RREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
);
3038 if (ring
->use_doorbell
) {
3039 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
3040 DOORBELL_OFFSET
, ring
->doorbell_index
);
3041 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
3044 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
3046 mqd
->cp_rb_doorbell_control
= tmp
;
3048 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3050 mqd
->cp_gfx_hqd_rptr
= RREG32_SOC15(GC
, 0, mmCP_GFX_HQD_RPTR
);
3052 /* active the queue */
3053 mqd
->cp_gfx_hqd_active
= 1;
3058 #ifdef BRING_UP_DEBUG
3059 static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring
*ring
)
3061 struct amdgpu_device
*adev
= ring
->adev
;
3062 struct v10_gfx_mqd
*mqd
= ring
->mqd_ptr
;
3064 /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3065 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_WPTR
, mqd
->cp_gfx_hqd_wptr
);
3066 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_WPTR_HI
, mqd
->cp_gfx_hqd_wptr_hi
);
3068 /* set GFX_MQD_BASE */
3069 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
, mqd
->cp_mqd_base_addr
);
3070 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
, mqd
->cp_mqd_base_addr_hi
);
3072 /* set GFX_MQD_CONTROL */
3073 WREG32_SOC15(GC
, 0, mmCP_GFX_MQD_CONTROL
, mqd
->cp_gfx_mqd_control
);
3075 /* set GFX_HQD_VMID to 0 */
3076 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_VMID
, mqd
->cp_gfx_hqd_vmid
);
3078 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUEUE_PRIORITY
,
3079 mqd
->cp_gfx_hqd_queue_priority
);
3080 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_QUANTUM
, mqd
->cp_gfx_hqd_quantum
);
3082 /* set GFX_HQD_BASE, similar as CP_RB_BASE */
3083 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_BASE
, mqd
->cp_gfx_hqd_base
);
3084 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_BASE_HI
, mqd
->cp_gfx_hqd_base_hi
);
3086 /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3087 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_RPTR_ADDR
, mqd
->cp_gfx_hqd_rptr_addr
);
3088 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_RPTR_ADDR_HI
, mqd
->cp_gfx_hqd_rptr_addr_hi
);
3090 /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3091 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_CNTL
, mqd
->cp_gfx_hqd_cntl
);
3093 /* set RB_WPTR_POLL_ADDR */
3094 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
, mqd
->cp_rb_wptr_poll_addr_lo
);
3095 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
, mqd
->cp_rb_wptr_poll_addr_hi
);
3097 /* set RB_DOORBELL_CONTROL */
3098 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
, mqd
->cp_rb_doorbell_control
);
3100 /* active the queue */
3101 WREG32_SOC15(GC
, 0, mmCP_GFX_HQD_ACTIVE
, mqd
->cp_gfx_hqd_active
);
3107 static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring
*ring
)
3109 struct amdgpu_device
*adev
= ring
->adev
;
3110 struct v10_gfx_mqd
*mqd
= ring
->mqd_ptr
;
3111 int mqd_idx
= ring
- &adev
->gfx
.gfx_ring
[0];
3113 if (!adev
->in_gpu_reset
&& !adev
->in_suspend
) {
3114 memset((void *)mqd
, 0, sizeof(*mqd
));
3115 mutex_lock(&adev
->srbm_mutex
);
3116 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3117 gfx_v10_0_gfx_mqd_init(ring
);
3118 #ifdef BRING_UP_DEBUG
3119 gfx_v10_0_gfx_queue_init_register(ring
);
3121 nv_grbm_select(adev
, 0, 0, 0, 0);
3122 mutex_unlock(&adev
->srbm_mutex
);
3123 if (adev
->gfx
.me
.mqd_backup
[mqd_idx
])
3124 memcpy(adev
->gfx
.me
.mqd_backup
[mqd_idx
], mqd
, sizeof(*mqd
));
3125 } else if (adev
->in_gpu_reset
) {
3126 /* reset mqd with the backup copy */
3127 if (adev
->gfx
.me
.mqd_backup
[mqd_idx
])
3128 memcpy(mqd
, adev
->gfx
.me
.mqd_backup
[mqd_idx
], sizeof(*mqd
));
3129 /* reset the ring */
3131 adev
->wb
.wb
[ring
->wptr_offs
] = 0;
3132 amdgpu_ring_clear_ring(ring
);
3133 #ifdef BRING_UP_DEBUG
3134 mutex_lock(&adev
->srbm_mutex
);
3135 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3136 gfx_v10_0_gfx_queue_init_register(ring
);
3137 nv_grbm_select(adev
, 0, 0, 0, 0);
3138 mutex_unlock(&adev
->srbm_mutex
);
3141 amdgpu_ring_clear_ring(ring
);
3147 #ifndef BRING_UP_DEBUG
3148 static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device
*adev
)
3150 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
3151 struct amdgpu_ring
*kiq_ring
= &adev
->gfx
.kiq
.ring
;
3154 if (!kiq
->pmf
|| !kiq
->pmf
->kiq_map_queues
)
3157 r
= amdgpu_ring_alloc(kiq_ring
, kiq
->pmf
->map_queues_size
*
3158 adev
->gfx
.num_gfx_rings
);
3160 DRM_ERROR("Failed to lock KIQ (%d).\n", r
);
3164 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
3165 kiq
->pmf
->kiq_map_queues(kiq_ring
, &adev
->gfx
.gfx_ring
[i
]);
3167 r
= amdgpu_ring_test_ring(kiq_ring
);
3169 DRM_ERROR("kfq enable failed\n");
3170 kiq_ring
->sched
.ready
= false;
3176 static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device
*adev
)
3179 struct amdgpu_ring
*ring
;
3181 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
3182 ring
= &adev
->gfx
.gfx_ring
[i
];
3184 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3185 if (unlikely(r
!= 0))
3188 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3190 r
= gfx_v10_0_gfx_init_queue(ring
);
3191 amdgpu_bo_kunmap(ring
->mqd_obj
);
3192 ring
->mqd_ptr
= NULL
;
3194 amdgpu_bo_unreserve(ring
->mqd_obj
);
3198 #ifndef BRING_UP_DEBUG
3199 r
= gfx_v10_0_kiq_enable_kgq(adev
);
3203 r
= gfx_v10_0_cp_gfx_start(adev
);
3207 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
3208 ring
= &adev
->gfx
.gfx_ring
[i
];
3209 ring
->sched
.ready
= true;
3215 static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring
*ring
)
3217 struct amdgpu_device
*adev
= ring
->adev
;
3218 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3219 uint64_t hqd_gpu_addr
, wb_gpu_addr
, eop_base_addr
;
3222 mqd
->header
= 0xC0310800;
3223 mqd
->compute_pipelinestat_enable
= 0x00000001;
3224 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
3225 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
3226 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
3227 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
3228 mqd
->compute_misc_reserved
= 0x00000003;
3230 eop_base_addr
= ring
->eop_gpu_addr
>> 8;
3231 mqd
->cp_hqd_eop_base_addr_lo
= eop_base_addr
;
3232 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_base_addr
);
3234 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3235 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
);
3236 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
3237 (order_base_2(GFX10_MEC_HPD_SIZE
/ 4) - 1));
3239 mqd
->cp_hqd_eop_control
= tmp
;
3241 /* enable doorbell? */
3242 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
3244 if (ring
->use_doorbell
) {
3245 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3246 DOORBELL_OFFSET
, ring
->doorbell_index
);
3247 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3249 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3250 DOORBELL_SOURCE
, 0);
3251 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3254 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3258 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3260 /* disable the queue if it's active */
3262 mqd
->cp_hqd_dequeue_request
= 0;
3263 mqd
->cp_hqd_pq_rptr
= 0;
3264 mqd
->cp_hqd_pq_wptr_lo
= 0;
3265 mqd
->cp_hqd_pq_wptr_hi
= 0;
3267 /* set the pointer to the MQD */
3268 mqd
->cp_mqd_base_addr_lo
= ring
->mqd_gpu_addr
& 0xfffffffc;
3269 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
3271 /* set MQD vmid to 0 */
3272 tmp
= RREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
);
3273 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
3274 mqd
->cp_mqd_control
= tmp
;
3276 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3277 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
3278 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
3279 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
3281 /* set up the HQD, this is similar to CP_RB0_CNTL */
3282 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
);
3283 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
3284 (order_base_2(ring
->ring_size
/ 4) - 1));
3285 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
3286 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
3288 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
3290 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
3291 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, TUNNEL_DISPATCH
, 0);
3292 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
3293 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
3294 mqd
->cp_hqd_pq_control
= tmp
;
3296 /* set the wb address whether it's enabled or not */
3297 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
3298 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3299 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
3300 upper_32_bits(wb_gpu_addr
) & 0xffff;
3302 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3303 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
3304 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3305 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3308 /* enable the doorbell if requested */
3309 if (ring
->use_doorbell
) {
3310 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
3311 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3312 DOORBELL_OFFSET
, ring
->doorbell_index
);
3314 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3316 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3317 DOORBELL_SOURCE
, 0);
3318 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3322 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3324 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3326 mqd
->cp_hqd_pq_rptr
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
);
3328 /* set the vmid for the queue */
3329 mqd
->cp_hqd_vmid
= 0;
3331 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
);
3332 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
3333 mqd
->cp_hqd_persistent_state
= tmp
;
3335 /* set MIN_IB_AVAIL_SIZE */
3336 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_IB_CONTROL
);
3337 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IB_CONTROL
, MIN_IB_AVAIL_SIZE
, 3);
3338 mqd
->cp_hqd_ib_control
= tmp
;
3340 /* map_queues packet doesn't need activate the queue,
3341 * so only kiq need set this field.
3343 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_KIQ
)
3344 mqd
->cp_hqd_active
= 1;
3349 static int gfx_v10_0_kiq_init_register(struct amdgpu_ring
*ring
)
3351 struct amdgpu_device
*adev
= ring
->adev
;
3352 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3355 /* disable wptr polling */
3356 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
3358 /* write the EOP addr */
3359 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
,
3360 mqd
->cp_hqd_eop_base_addr_lo
);
3361 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
,
3362 mqd
->cp_hqd_eop_base_addr_hi
);
3364 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3365 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
,
3366 mqd
->cp_hqd_eop_control
);
3368 /* enable doorbell? */
3369 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
3370 mqd
->cp_hqd_pq_doorbell_control
);
3372 /* disable the queue if it's active */
3373 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
3374 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
3375 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
3376 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
3380 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
,
3381 mqd
->cp_hqd_dequeue_request
);
3382 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
,
3383 mqd
->cp_hqd_pq_rptr
);
3384 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
3385 mqd
->cp_hqd_pq_wptr_lo
);
3386 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
3387 mqd
->cp_hqd_pq_wptr_hi
);
3390 /* set the pointer to the MQD */
3391 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
,
3392 mqd
->cp_mqd_base_addr_lo
);
3393 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
,
3394 mqd
->cp_mqd_base_addr_hi
);
3396 /* set MQD vmid to 0 */
3397 WREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
,
3398 mqd
->cp_mqd_control
);
3400 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3401 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE
,
3402 mqd
->cp_hqd_pq_base_lo
);
3403 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE_HI
,
3404 mqd
->cp_hqd_pq_base_hi
);
3406 /* set up the HQD, this is similar to CP_RB0_CNTL */
3407 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
,
3408 mqd
->cp_hqd_pq_control
);
3410 /* set the wb address whether it's enabled or not */
3411 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
,
3412 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
3413 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
3414 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
3416 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3417 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
,
3418 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
3419 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
,
3420 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
3422 /* enable the doorbell if requested */
3423 if (ring
->use_doorbell
) {
3424 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
,
3425 (adev
->doorbell_index
.kiq
* 2) << 2);
3426 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
,
3427 (adev
->doorbell_index
.userqueue_end
* 2) << 2);
3430 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
3431 mqd
->cp_hqd_pq_doorbell_control
);
3433 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3434 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
3435 mqd
->cp_hqd_pq_wptr_lo
);
3436 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
3437 mqd
->cp_hqd_pq_wptr_hi
);
3439 /* set the vmid for the queue */
3440 WREG32_SOC15(GC
, 0, mmCP_HQD_VMID
, mqd
->cp_hqd_vmid
);
3442 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
,
3443 mqd
->cp_hqd_persistent_state
);
3445 /* activate the queue */
3446 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
,
3447 mqd
->cp_hqd_active
);
3449 if (ring
->use_doorbell
)
3450 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
3455 static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring
*ring
)
3457 struct amdgpu_device
*adev
= ring
->adev
;
3458 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3459 int mqd_idx
= AMDGPU_MAX_COMPUTE_RINGS
;
3461 gfx_v10_0_kiq_setting(ring
);
3463 if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
3464 /* reset MQD to a clean status */
3465 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3466 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(*mqd
));
3468 /* reset ring buffer */
3470 amdgpu_ring_clear_ring(ring
);
3472 mutex_lock(&adev
->srbm_mutex
);
3473 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3474 gfx_v10_0_kiq_init_register(ring
);
3475 nv_grbm_select(adev
, 0, 0, 0, 0);
3476 mutex_unlock(&adev
->srbm_mutex
);
3478 memset((void *)mqd
, 0, sizeof(*mqd
));
3479 mutex_lock(&adev
->srbm_mutex
);
3480 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3481 gfx_v10_0_compute_mqd_init(ring
);
3482 gfx_v10_0_kiq_init_register(ring
);
3483 nv_grbm_select(adev
, 0, 0, 0, 0);
3484 mutex_unlock(&adev
->srbm_mutex
);
3486 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3487 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(*mqd
));
3493 static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring
*ring
)
3495 struct amdgpu_device
*adev
= ring
->adev
;
3496 struct v10_compute_mqd
*mqd
= ring
->mqd_ptr
;
3497 int mqd_idx
= ring
- &adev
->gfx
.compute_ring
[0];
3499 if (!adev
->in_gpu_reset
&& !adev
->in_suspend
) {
3500 memset((void *)mqd
, 0, sizeof(*mqd
));
3501 mutex_lock(&adev
->srbm_mutex
);
3502 nv_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3503 gfx_v10_0_compute_mqd_init(ring
);
3504 nv_grbm_select(adev
, 0, 0, 0, 0);
3505 mutex_unlock(&adev
->srbm_mutex
);
3507 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3508 memcpy(adev
->gfx
.mec
.mqd_backup
[mqd_idx
], mqd
, sizeof(*mqd
));
3509 } else if (adev
->in_gpu_reset
) { /* for GPU_RESET case */
3510 /* reset MQD to a clean status */
3511 if (adev
->gfx
.mec
.mqd_backup
[mqd_idx
])
3512 memcpy(mqd
, adev
->gfx
.mec
.mqd_backup
[mqd_idx
], sizeof(*mqd
));
3514 /* reset ring buffer */
3516 amdgpu_ring_clear_ring(ring
);
3518 amdgpu_ring_clear_ring(ring
);
3524 static int gfx_v10_0_kiq_resume(struct amdgpu_device
*adev
)
3526 struct amdgpu_ring
*ring
;
3529 ring
= &adev
->gfx
.kiq
.ring
;
3531 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3532 if (unlikely(r
!= 0))
3535 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3536 if (unlikely(r
!= 0))
3539 gfx_v10_0_kiq_init_queue(ring
);
3540 amdgpu_bo_kunmap(ring
->mqd_obj
);
3541 ring
->mqd_ptr
= NULL
;
3542 amdgpu_bo_unreserve(ring
->mqd_obj
);
3543 ring
->sched
.ready
= true;
3547 static int gfx_v10_0_kcq_resume(struct amdgpu_device
*adev
)
3549 struct amdgpu_ring
*ring
= NULL
;
3552 gfx_v10_0_cp_compute_enable(adev
, true);
3554 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3555 ring
= &adev
->gfx
.compute_ring
[i
];
3557 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3558 if (unlikely(r
!= 0))
3560 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
3562 r
= gfx_v10_0_kcq_init_queue(ring
);
3563 amdgpu_bo_kunmap(ring
->mqd_obj
);
3564 ring
->mqd_ptr
= NULL
;
3566 amdgpu_bo_unreserve(ring
->mqd_obj
);
3571 r
= amdgpu_gfx_enable_kcq(adev
);
3576 static int gfx_v10_0_cp_resume(struct amdgpu_device
*adev
)
3579 struct amdgpu_ring
*ring
;
3581 if (!(adev
->flags
& AMD_IS_APU
))
3582 gfx_v10_0_enable_gui_idle_interrupt(adev
, false);
3584 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
3585 /* legacy firmware loading */
3586 r
= gfx_v10_0_cp_gfx_load_microcode(adev
);
3590 r
= gfx_v10_0_cp_compute_load_microcode(adev
);
3595 r
= gfx_v10_0_kiq_resume(adev
);
3599 r
= gfx_v10_0_kcq_resume(adev
);
3603 if (!amdgpu_async_gfx_ring
) {
3604 r
= gfx_v10_0_cp_gfx_resume(adev
);
3608 r
= gfx_v10_0_cp_async_gfx_ring_resume(adev
);
3613 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
3614 ring
= &adev
->gfx
.gfx_ring
[i
];
3615 r
= amdgpu_ring_test_helper(ring
);
3620 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3621 ring
= &adev
->gfx
.compute_ring
[i
];
3622 r
= amdgpu_ring_test_helper(ring
);
3630 static void gfx_v10_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
3632 gfx_v10_0_cp_gfx_enable(adev
, enable
);
3633 gfx_v10_0_cp_compute_enable(adev
, enable
);
3636 static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device
*adev
)
3638 uint32_t data
, pattern
= 0xDEADBEEF;
3640 /* check if mmVGT_ESGS_RING_SIZE_UMD
3641 * has been remapped to mmVGT_ESGS_RING_SIZE */
3642 data
= RREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
);
3644 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
, 0);
3646 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE_UMD
, pattern
);
3648 if (RREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
) == pattern
) {
3649 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE_UMD
, data
);
3652 WREG32_SOC15(GC
, 0, mmVGT_ESGS_RING_SIZE
, data
);
3657 static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device
*adev
)
3661 /* initialize cam_index to 0
3662 * index will auto-inc after each data writting */
3663 WREG32_SOC15(GC
, 0, mmGRBM_CAM_INDEX
, 0);
3665 /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
3666 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_RING_SIZE_UMD
) <<
3667 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3668 (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_RING_SIZE
) <<
3669 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3670 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3671 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3673 /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
3674 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE_UMD
) <<
3675 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3676 (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE
) <<
3677 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3678 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3679 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3681 /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
3682 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE_HI_UMD
) <<
3683 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3684 (SOC15_REG_OFFSET(GC
, 0, mmVGT_TF_MEMORY_BASE_HI
) <<
3685 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3686 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3687 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3689 /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
3690 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_HS_OFFCHIP_PARAM_UMD
) <<
3691 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3692 (SOC15_REG_OFFSET(GC
, 0, mmVGT_HS_OFFCHIP_PARAM
) <<
3693 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3694 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3695 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3697 /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
3698 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_ESGS_RING_SIZE_UMD
) <<
3699 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3700 (SOC15_REG_OFFSET(GC
, 0, mmVGT_ESGS_RING_SIZE
) <<
3701 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3702 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3703 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3705 /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
3706 data
= (SOC15_REG_OFFSET(GC
, 0, mmVGT_GSVS_RING_SIZE_UMD
) <<
3707 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3708 (SOC15_REG_OFFSET(GC
, 0, mmVGT_GSVS_RING_SIZE
) <<
3709 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3710 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3711 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3713 /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
3714 data
= (SOC15_REG_OFFSET(GC
, 0, mmSPI_CONFIG_CNTL_REMAP
) <<
3715 GRBM_CAM_DATA__CAM_ADDR__SHIFT
) |
3716 (SOC15_REG_OFFSET(GC
, 0, mmSPI_CONFIG_CNTL
) <<
3717 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT
);
3718 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA_UPPER
, 0);
3719 WREG32_SOC15(GC
, 0, mmGRBM_CAM_DATA
, data
);
3722 static int gfx_v10_0_hw_init(void *handle
)
3725 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3727 if (!amdgpu_emu_mode
)
3728 gfx_v10_0_init_golden_registers(adev
);
3730 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_DIRECT
) {
3732 * For gfx 10, rlc firmware loading relies on smu firmware is
3733 * loaded firstly, so in direct type, it has to load smc ucode
3736 r
= smu_load_microcode(&adev
->smu
);
3740 r
= smu_check_fw_status(&adev
->smu
);
3742 pr_err("SMC firmware status is not correct\n");
3747 /* if GRBM CAM not remapped, set up the remapping */
3748 if (!gfx_v10_0_check_grbm_cam_remapping(adev
))
3749 gfx_v10_0_setup_grbm_cam_remapping(adev
);
3751 gfx_v10_0_constants_init(adev
);
3753 r
= gfx_v10_0_rlc_resume(adev
);
3758 * init golden registers and rlc resume may override some registers,
3759 * reconfig them here
3761 gfx_v10_0_tcp_harvest(adev
);
3763 r
= gfx_v10_0_cp_resume(adev
);
3770 #ifndef BRING_UP_DEBUG
3771 static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device
*adev
)
3773 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
3774 struct amdgpu_ring
*kiq_ring
= &kiq
->ring
;
3777 if (!kiq
->pmf
|| !kiq
->pmf
->kiq_unmap_queues
)
3780 if (amdgpu_ring_alloc(kiq_ring
, kiq
->pmf
->unmap_queues_size
*
3781 adev
->gfx
.num_gfx_rings
))
3784 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
3785 kiq
->pmf
->kiq_unmap_queues(kiq_ring
, &adev
->gfx
.gfx_ring
[i
],
3786 PREEMPT_QUEUES
, 0, 0);
3788 return amdgpu_ring_test_ring(kiq_ring
);
3792 static int gfx_v10_0_hw_fini(void *handle
)
3794 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3797 amdgpu_irq_put(adev
, &adev
->gfx
.priv_reg_irq
, 0);
3798 amdgpu_irq_put(adev
, &adev
->gfx
.priv_inst_irq
, 0);
3799 #ifndef BRING_UP_DEBUG
3800 if (amdgpu_async_gfx_ring
) {
3801 r
= gfx_v10_0_kiq_disable_kgq(adev
);
3803 DRM_ERROR("KGQ disable failed\n");
3806 if (amdgpu_gfx_disable_kcq(adev
))
3807 DRM_ERROR("KCQ disable failed\n");
3808 if (amdgpu_sriov_vf(adev
)) {
3809 gfx_v10_0_cp_gfx_enable(adev
, false);
3812 gfx_v10_0_cp_enable(adev
, false);
3813 gfx_v10_0_enable_gui_idle_interrupt(adev
, false);
3818 static int gfx_v10_0_suspend(void *handle
)
3820 return gfx_v10_0_hw_fini(handle
);
3823 static int gfx_v10_0_resume(void *handle
)
3825 return gfx_v10_0_hw_init(handle
);
3828 static bool gfx_v10_0_is_idle(void *handle
)
3830 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3832 if (REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmGRBM_STATUS
),
3833 GRBM_STATUS
, GUI_ACTIVE
))
3839 static int gfx_v10_0_wait_for_idle(void *handle
)
3843 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3845 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
3846 /* read MC_STATUS */
3847 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
) &
3848 GRBM_STATUS__GUI_ACTIVE_MASK
;
3850 if (!REG_GET_FIELD(tmp
, GRBM_STATUS
, GUI_ACTIVE
))
3857 static int gfx_v10_0_soft_reset(void *handle
)
3859 u32 grbm_soft_reset
= 0;
3861 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3864 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
);
3865 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
3866 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
3867 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__DB_BUSY_MASK
|
3868 GRBM_STATUS__CB_BUSY_MASK
| GRBM_STATUS__GDS_BUSY_MASK
|
3869 GRBM_STATUS__SPI_BUSY_MASK
| GRBM_STATUS__GE_BUSY_NO_DMA_MASK
3870 | GRBM_STATUS__BCI_BUSY_MASK
)) {
3871 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3872 GRBM_SOFT_RESET
, SOFT_RESET_CP
,
3874 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3875 GRBM_SOFT_RESET
, SOFT_RESET_GFX
,
3879 if (tmp
& (GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
3880 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3881 GRBM_SOFT_RESET
, SOFT_RESET_CP
,
3886 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS2
);
3887 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
3888 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
3889 GRBM_SOFT_RESET
, SOFT_RESET_RLC
,
3892 if (grbm_soft_reset
) {
3894 gfx_v10_0_rlc_stop(adev
);
3896 /* Disable GFX parsing/prefetching */
3897 gfx_v10_0_cp_gfx_enable(adev
, false);
3899 /* Disable MEC parsing/prefetching */
3900 gfx_v10_0_cp_compute_enable(adev
, false);
3902 if (grbm_soft_reset
) {
3903 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3904 tmp
|= grbm_soft_reset
;
3905 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
3906 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3907 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3911 tmp
&= ~grbm_soft_reset
;
3912 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
3913 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
3916 /* Wait a little for things to settle down */
3922 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
3926 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
3927 WREG32_SOC15(GC
, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
3928 clock
= (uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_LSB
) |
3929 ((uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
3930 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
3934 static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
3936 uint32_t gds_base
, uint32_t gds_size
,
3937 uint32_t gws_base
, uint32_t gws_size
,
3938 uint32_t oa_base
, uint32_t oa_size
)
3940 struct amdgpu_device
*adev
= ring
->adev
;
3943 gfx_v10_0_write_data_to_reg(ring
, 0, false,
3944 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_BASE
) + 2 * vmid
,
3948 gfx_v10_0_write_data_to_reg(ring
, 0, false,
3949 SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
) + 2 * vmid
,
3953 gfx_v10_0_write_data_to_reg(ring
, 0, false,
3954 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID0
) + vmid
,
3955 gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
3958 gfx_v10_0_write_data_to_reg(ring
, 0, false,
3959 SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID0
) + vmid
,
3960 (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
3963 static int gfx_v10_0_early_init(void *handle
)
3965 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3967 adev
->gfx
.num_gfx_rings
= GFX10_NUM_GFX_RINGS
;
3968 adev
->gfx
.num_compute_rings
= AMDGPU_MAX_COMPUTE_RINGS
;
3970 gfx_v10_0_set_kiq_pm4_funcs(adev
);
3971 gfx_v10_0_set_ring_funcs(adev
);
3972 gfx_v10_0_set_irq_funcs(adev
);
3973 gfx_v10_0_set_gds_init(adev
);
3974 gfx_v10_0_set_rlc_funcs(adev
);
3979 static int gfx_v10_0_late_init(void *handle
)
3981 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
3984 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_reg_irq
, 0);
3988 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_inst_irq
, 0);
3995 static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device
*adev
)
3999 /* if RLC is not enabled, do nothing */
4000 rlc_cntl
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
4001 return (REG_GET_FIELD(rlc_cntl
, RLC_CNTL
, RLC_ENABLE_F32
)) ? true : false;
4004 static void gfx_v10_0_set_safe_mode(struct amdgpu_device
*adev
)
4009 data
= RLC_SAFE_MODE__CMD_MASK
;
4010 data
|= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT
);
4011 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
4013 /* wait for RLC_SAFE_MODE */
4014 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
4015 if (!REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
4021 static void gfx_v10_0_unset_safe_mode(struct amdgpu_device
*adev
)
4025 data
= RLC_SAFE_MODE__CMD_MASK
;
4026 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
4029 static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device
*adev
,
4034 /* It is disabled by HW by default */
4035 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
)) {
4036 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4037 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4038 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
4039 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
4040 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
4042 /* only for Vega10 & Raven1 */
4043 data
|= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
;
4046 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4048 /* MGLS is a global flag to control all MGLS in GFX */
4049 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
4050 /* 2 - RLC memory Light sleep */
4051 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
) {
4052 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
4053 data
|= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
4055 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
4057 /* 3 - CP memory Light sleep */
4058 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
) {
4059 def
= data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
4060 data
|= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
4062 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
4066 /* 1 - MGCG_OVERRIDE */
4067 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4068 data
|= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
|
4069 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
4070 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
4071 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
4073 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4075 /* 2 - disable MGLS in RLC */
4076 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
4077 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
) {
4078 data
&= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
4079 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
4082 /* 3 - disable MGLS in CP */
4083 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
4084 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
) {
4085 data
&= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
4086 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
4091 static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device
*adev
,
4096 /* Enable 3D CGCG/CGLS */
4097 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
4098 /* write cmd to clear cgcg/cgls ov */
4099 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4100 /* unset CGCG override */
4101 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK
;
4102 /* update CGCG and CGLS override bits */
4104 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4105 /* enable 3Dcgcg FSM(0x0000363f) */
4106 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
4107 data
= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
4108 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
;
4109 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGLS
)
4110 data
|= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
4111 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
;
4113 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
4115 /* set IDLE_POLL_COUNT(0x00900100) */
4116 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
4117 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
4118 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
4120 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
4122 /* Disable CGCG/CGLS */
4123 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
4124 /* disable cgcg, cgls should be disabled */
4125 data
&= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
|
4126 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
);
4127 /* disable cgcg and cgls in FSM */
4129 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
4133 static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device
*adev
,
4138 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
)) {
4139 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4140 /* unset CGCG override */
4141 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK
;
4142 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
4143 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
4145 data
|= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
4146 /* update CGCG and CGLS override bits */
4148 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
4150 /* enable cgcg FSM(0x0000363F) */
4151 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
4152 data
= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
4153 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
;
4154 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
4155 data
|= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
4156 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
4158 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
4160 /* set IDLE_POLL_COUNT(0x00900100) */
4161 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
4162 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
4163 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
4165 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
4167 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
4168 /* reset CGCG/CGLS bits */
4169 data
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
| RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
4170 /* disable cgcg and cgls in FSM */
4172 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
4176 static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device
*adev
,
4179 amdgpu_gfx_rlc_enter_safe_mode(adev
);
4182 /* CGCG/CGLS should be enabled after MGCG/MGLS
4183 * === MGCG + MGLS ===
4185 gfx_v10_0_update_medium_grain_clock_gating(adev
, enable
);
4186 /* === CGCG /CGLS for GFX 3D Only === */
4187 gfx_v10_0_update_3d_clock_gating(adev
, enable
);
4188 /* === CGCG + CGLS === */
4189 gfx_v10_0_update_coarse_grain_clock_gating(adev
, enable
);
4191 /* CGCG/CGLS should be disabled before MGCG/MGLS
4192 * === CGCG + CGLS ===
4194 gfx_v10_0_update_coarse_grain_clock_gating(adev
, enable
);
4195 /* === CGCG /CGLS for GFX 3D Only === */
4196 gfx_v10_0_update_3d_clock_gating(adev
, enable
);
4197 /* === MGCG + MGLS === */
4198 gfx_v10_0_update_medium_grain_clock_gating(adev
, enable
);
4201 if (adev
->cg_flags
&
4202 (AMD_CG_SUPPORT_GFX_MGCG
|
4203 AMD_CG_SUPPORT_GFX_CGLS
|
4204 AMD_CG_SUPPORT_GFX_CGCG
|
4205 AMD_CG_SUPPORT_GFX_CGLS
|
4206 AMD_CG_SUPPORT_GFX_3D_CGCG
|
4207 AMD_CG_SUPPORT_GFX_3D_CGLS
))
4208 gfx_v10_0_enable_gui_idle_interrupt(adev
, enable
);
4210 amdgpu_gfx_rlc_exit_safe_mode(adev
);
4215 static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs
= {
4216 .is_rlc_enabled
= gfx_v10_0_is_rlc_enabled
,
4217 .set_safe_mode
= gfx_v10_0_set_safe_mode
,
4218 .unset_safe_mode
= gfx_v10_0_unset_safe_mode
,
4219 .init
= gfx_v10_0_rlc_init
,
4220 .get_csb_size
= gfx_v10_0_get_csb_size
,
4221 .get_csb_buffer
= gfx_v10_0_get_csb_buffer
,
4222 .resume
= gfx_v10_0_rlc_resume
,
4223 .stop
= gfx_v10_0_rlc_stop
,
4224 .reset
= gfx_v10_0_rlc_reset
,
4225 .start
= gfx_v10_0_rlc_start
4228 static int gfx_v10_0_set_powergating_state(void *handle
,
4229 enum amd_powergating_state state
)
4231 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4232 bool enable
= (state
== AMD_PG_STATE_GATE
) ? true : false;
4233 switch (adev
->asic_type
) {
4237 amdgpu_gfx_off_ctrl(adev
, false);
4238 cancel_delayed_work_sync(&adev
->gfx
.gfx_off_delay_work
);
4240 amdgpu_gfx_off_ctrl(adev
, true);
4248 static int gfx_v10_0_set_clockgating_state(void *handle
,
4249 enum amd_clockgating_state state
)
4251 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4253 switch (adev
->asic_type
) {
4257 gfx_v10_0_update_gfx_clock_gating(adev
,
4258 state
== AMD_CG_STATE_GATE
? true : false);
4266 static void gfx_v10_0_get_clockgating_state(void *handle
, u32
*flags
)
4268 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
4271 /* AMD_CG_SUPPORT_GFX_MGCG */
4272 data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
4273 if (!(data
& RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
))
4274 *flags
|= AMD_CG_SUPPORT_GFX_MGCG
;
4276 /* AMD_CG_SUPPORT_GFX_CGCG */
4277 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
4278 if (data
& RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
)
4279 *flags
|= AMD_CG_SUPPORT_GFX_CGCG
;
4281 /* AMD_CG_SUPPORT_GFX_CGLS */
4282 if (data
& RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
)
4283 *flags
|= AMD_CG_SUPPORT_GFX_CGLS
;
4285 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4286 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
4287 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
)
4288 *flags
|= AMD_CG_SUPPORT_GFX_RLC_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
4290 /* AMD_CG_SUPPORT_GFX_CP_LS */
4291 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
4292 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
)
4293 *flags
|= AMD_CG_SUPPORT_GFX_CP_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
4295 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4296 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
4297 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
)
4298 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGCG
;
4300 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4301 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
)
4302 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGLS
;
4305 static u64
gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring
*ring
)
4307 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx10 is 32bit rptr*/
4310 static u64
gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
4312 struct amdgpu_device
*adev
= ring
->adev
;
4315 /* XXX check if swapping is necessary on BE */
4316 if (ring
->use_doorbell
) {
4317 wptr
= atomic64_read((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
]);
4319 wptr
= RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
);
4320 wptr
+= (u64
)RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
) << 32;
4326 static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
4328 struct amdgpu_device
*adev
= ring
->adev
;
4330 if (ring
->use_doorbell
) {
4331 /* XXX check if swapping is necessary on BE */
4332 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
4333 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
4335 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
4336 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
4340 static u64
gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring
*ring
)
4342 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx10 hardware is 32bit rptr */
4345 static u64
gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
4349 /* XXX check if swapping is necessary on BE */
4350 if (ring
->use_doorbell
)
4351 wptr
= atomic64_read((atomic64_t
*)&ring
->adev
->wb
.wb
[ring
->wptr_offs
]);
4357 static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
4359 struct amdgpu_device
*adev
= ring
->adev
;
4361 /* XXX check if swapping is necessary on BE */
4362 if (ring
->use_doorbell
) {
4363 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
4364 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
4366 BUG(); /* only DOORBELL method supported on gfx10 now */
4370 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring
*ring
)
4372 struct amdgpu_device
*adev
= ring
->adev
;
4373 u32 ref_and_mask
, reg_mem_engine
;
4374 const struct nbio_hdp_flush_reg
*nbio_hf_reg
= adev
->nbio
.hdp_flush_reg
;
4376 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
4379 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp2
<< ring
->pipe
;
4382 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp6
<< ring
->pipe
;
4389 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp0
;
4390 reg_mem_engine
= 1; /* pfp */
4393 gfx_v10_0_wait_reg_mem(ring
, reg_mem_engine
, 0, 1,
4394 adev
->nbio
.funcs
->get_hdp_flush_req_offset(adev
),
4395 adev
->nbio
.funcs
->get_hdp_flush_done_offset(adev
),
4396 ref_and_mask
, ref_and_mask
, 0x20);
4399 static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring
*ring
,
4400 struct amdgpu_job
*job
,
4401 struct amdgpu_ib
*ib
,
4404 unsigned vmid
= AMDGPU_JOB_GET_VMID(job
);
4405 u32 header
, control
= 0;
4407 if (ib
->flags
& AMDGPU_IB_FLAG_CE
)
4408 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CNST
, 2);
4410 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
4412 control
|= ib
->length_dw
| (vmid
<< 24);
4414 if (amdgpu_mcbp
&& (ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
)) {
4415 control
|= INDIRECT_BUFFER_PRE_ENB(1);
4417 if (flags
& AMDGPU_IB_PREEMPTED
)
4418 control
|= INDIRECT_BUFFER_PRE_RESUME(1);
4420 if (!(ib
->flags
& AMDGPU_IB_FLAG_CE
))
4421 gfx_v10_0_ring_emit_de_meta(ring
,
4422 flags
& AMDGPU_IB_PREEMPTED
? true : false);
4425 amdgpu_ring_write(ring
, header
);
4426 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
4427 amdgpu_ring_write(ring
,
4431 lower_32_bits(ib
->gpu_addr
));
4432 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
4433 amdgpu_ring_write(ring
, control
);
4436 static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring
*ring
,
4437 struct amdgpu_job
*job
,
4438 struct amdgpu_ib
*ib
,
4441 unsigned vmid
= AMDGPU_JOB_GET_VMID(job
);
4442 u32 control
= INDIRECT_BUFFER_VALID
| ib
->length_dw
| (vmid
<< 24);
4444 /* Currently, there is a high possibility to get wave ID mismatch
4445 * between ME and GDS, leading to a hw deadlock, because ME generates
4446 * different wave IDs than the GDS expects. This situation happens
4447 * randomly when at least 5 compute pipes use GDS ordered append.
4448 * The wave IDs generated by ME are also wrong after suspend/resume.
4449 * Those are probably bugs somewhere else in the kernel driver.
4451 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4452 * GDS to 0 for this ring (me/pipe).
4454 if (ib
->flags
& AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
) {
4455 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
4456 amdgpu_ring_write(ring
, mmGDS_COMPUTE_MAX_WAVE_ID
);
4457 amdgpu_ring_write(ring
, ring
->adev
->gds
.gds_compute_max_wave_id
);
4460 amdgpu_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
4461 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
4462 amdgpu_ring_write(ring
,
4466 lower_32_bits(ib
->gpu_addr
));
4467 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
4468 amdgpu_ring_write(ring
, control
);
4471 static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
,
4472 u64 seq
, unsigned flags
)
4474 struct amdgpu_device
*adev
= ring
->adev
;
4475 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
4476 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
4478 /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
4479 if (adev
->pdev
->device
== 0x50)
4482 /* RELEASE_MEM - flush caches, send int */
4483 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 6));
4484 amdgpu_ring_write(ring
, (PACKET3_RELEASE_MEM_GCR_SEQ
|
4485 PACKET3_RELEASE_MEM_GCR_GL2_WB
|
4486 PACKET3_RELEASE_MEM_GCR_GLM_INV
| /* must be set with GLM_WB */
4487 PACKET3_RELEASE_MEM_GCR_GLM_WB
|
4488 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
4489 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
4490 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
4491 amdgpu_ring_write(ring
, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit
? 2 : 1) |
4492 PACKET3_RELEASE_MEM_INT_SEL(int_sel
? 2 : 0)));
4495 * the address should be Qword aligned if 64bit write, Dword
4496 * aligned if only send 32bit data low (discard data high)
4502 amdgpu_ring_write(ring
, lower_32_bits(addr
));
4503 amdgpu_ring_write(ring
, upper_32_bits(addr
));
4504 amdgpu_ring_write(ring
, lower_32_bits(seq
));
4505 amdgpu_ring_write(ring
, upper_32_bits(seq
));
4506 amdgpu_ring_write(ring
, 0);
4509 static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
4511 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
4512 uint32_t seq
= ring
->fence_drv
.sync_seq
;
4513 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
4515 gfx_v10_0_wait_reg_mem(ring
, usepfp
, 1, 0, lower_32_bits(addr
),
4516 upper_32_bits(addr
), seq
, 0xffffffff, 4);
4519 static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
4520 unsigned vmid
, uint64_t pd_addr
)
4522 amdgpu_gmc_emit_flush_gpu_tlb(ring
, vmid
, pd_addr
);
4524 /* compute doesn't have PFP */
4525 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
) {
4526 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4527 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
4528 amdgpu_ring_write(ring
, 0x0);
4532 static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring
*ring
, u64 addr
,
4533 u64 seq
, unsigned int flags
)
4535 struct amdgpu_device
*adev
= ring
->adev
;
4537 /* we only allocate 32bit for each seq wb address */
4538 BUG_ON(flags
& AMDGPU_FENCE_FLAG_64BIT
);
4540 /* write fence seq to the "addr" */
4541 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4542 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4543 WRITE_DATA_DST_SEL(5) | WR_CONFIRM
));
4544 amdgpu_ring_write(ring
, lower_32_bits(addr
));
4545 amdgpu_ring_write(ring
, upper_32_bits(addr
));
4546 amdgpu_ring_write(ring
, lower_32_bits(seq
));
4548 if (flags
& AMDGPU_FENCE_FLAG_INT
) {
4549 /* set register to trigger INT */
4550 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4551 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4552 WRITE_DATA_DST_SEL(0) | WR_CONFIRM
));
4553 amdgpu_ring_write(ring
, SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_STATUS
));
4554 amdgpu_ring_write(ring
, 0);
4555 amdgpu_ring_write(ring
, 0x20000000); /* src_id is 178 */
4559 static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring
*ring
)
4561 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
4562 amdgpu_ring_write(ring
, 0);
4565 static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring
*ring
, uint32_t flags
)
4570 gfx_v10_0_ring_emit_ce_meta(ring
,
4571 flags
& AMDGPU_IB_PREEMPTED
? true : false);
4573 gfx_v10_0_ring_emit_tmz(ring
, true);
4575 dw2
|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4576 if (flags
& AMDGPU_HAVE_CTX_SWITCH
) {
4577 /* set load_global_config & load_global_uconfig */
4579 /* set load_cs_sh_regs */
4581 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4584 /* set load_ce_ram if preamble presented */
4585 if (AMDGPU_PREAMBLE_IB_PRESENT
& flags
)
4588 /* still load_ce_ram if this is the first time preamble presented
4589 * although there is no context switch happens.
4591 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST
& flags
)
4595 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
4596 amdgpu_ring_write(ring
, dw2
);
4597 amdgpu_ring_write(ring
, 0);
4600 static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring
*ring
)
4604 amdgpu_ring_write(ring
, PACKET3(PACKET3_COND_EXEC
, 3));
4605 amdgpu_ring_write(ring
, lower_32_bits(ring
->cond_exe_gpu_addr
));
4606 amdgpu_ring_write(ring
, upper_32_bits(ring
->cond_exe_gpu_addr
));
4607 amdgpu_ring_write(ring
, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4608 ret
= ring
->wptr
& ring
->buf_mask
;
4609 amdgpu_ring_write(ring
, 0x55aa55aa); /* patch dummy value later */
4614 static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring
*ring
, unsigned offset
)
4617 BUG_ON(offset
> ring
->buf_mask
);
4618 BUG_ON(ring
->ring
[offset
] != 0x55aa55aa);
4620 cur
= (ring
->wptr
- 1) & ring
->buf_mask
;
4621 if (likely(cur
> offset
))
4622 ring
->ring
[offset
] = cur
- offset
;
4624 ring
->ring
[offset
] = (ring
->buf_mask
+ 1) - offset
+ cur
;
4627 static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring
*ring
)
4630 struct amdgpu_device
*adev
= ring
->adev
;
4631 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
4632 struct amdgpu_ring
*kiq_ring
= &kiq
->ring
;
4634 if (!kiq
->pmf
|| !kiq
->pmf
->kiq_unmap_queues
)
4637 if (amdgpu_ring_alloc(kiq_ring
, kiq
->pmf
->unmap_queues_size
))
4640 /* assert preemption condition */
4641 amdgpu_ring_set_preempt_cond_exec(ring
, false);
4643 /* assert IB preemption, emit the trailing fence */
4644 kiq
->pmf
->kiq_unmap_queues(kiq_ring
, ring
, PREEMPT_QUEUES_NO_UNMAP
,
4645 ring
->trail_fence_gpu_addr
,
4647 amdgpu_ring_commit(kiq_ring
);
4649 /* poll the trailing fence */
4650 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
4651 if (ring
->trail_seq
==
4652 le32_to_cpu(*(ring
->trail_fence_cpu_addr
)))
4657 if (i
>= adev
->usec_timeout
) {
4659 DRM_ERROR("ring %d failed to preempt ib\n", ring
->idx
);
4662 /* deassert preemption condition */
4663 amdgpu_ring_set_preempt_cond_exec(ring
, true);
4667 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
, bool resume
)
4669 struct amdgpu_device
*adev
= ring
->adev
;
4670 struct v10_ce_ib_state ce_payload
= {0};
4674 cnt
= (sizeof(ce_payload
) >> 2) + 4 - 2;
4675 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4677 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4678 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
4679 WRITE_DATA_DST_SEL(8) |
4681 WRITE_DATA_CACHE_POLICY(0));
4682 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+
4683 offsetof(struct v10_gfx_meta_data
, ce_payload
)));
4684 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+
4685 offsetof(struct v10_gfx_meta_data
, ce_payload
)));
4688 amdgpu_ring_write_multiple(ring
, adev
->virt
.csa_cpu_addr
+
4689 offsetof(struct v10_gfx_meta_data
,
4691 sizeof(ce_payload
) >> 2);
4693 amdgpu_ring_write_multiple(ring
, (void *)&ce_payload
,
4694 sizeof(ce_payload
) >> 2);
4697 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring
*ring
, bool resume
)
4699 struct amdgpu_device
*adev
= ring
->adev
;
4700 struct v10_de_ib_state de_payload
= {0};
4701 uint64_t csa_addr
, gds_addr
;
4704 csa_addr
= amdgpu_csa_vaddr(ring
->adev
);
4705 gds_addr
= ALIGN(csa_addr
+ AMDGPU_CSA_SIZE
- adev
->gds
.gds_size
,
4707 de_payload
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
4708 de_payload
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
4710 cnt
= (sizeof(de_payload
) >> 2) + 4 - 2;
4711 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
4712 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(1) |
4713 WRITE_DATA_DST_SEL(8) |
4715 WRITE_DATA_CACHE_POLICY(0));
4716 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+
4717 offsetof(struct v10_gfx_meta_data
, de_payload
)));
4718 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+
4719 offsetof(struct v10_gfx_meta_data
, de_payload
)));
4722 amdgpu_ring_write_multiple(ring
, adev
->virt
.csa_cpu_addr
+
4723 offsetof(struct v10_gfx_meta_data
,
4725 sizeof(de_payload
) >> 2);
4727 amdgpu_ring_write_multiple(ring
, (void *)&de_payload
,
4728 sizeof(de_payload
) >> 2);
4731 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring
*ring
, bool start
)
4733 amdgpu_ring_write(ring
, PACKET3(PACKET3_FRAME_CONTROL
, 0));
4734 amdgpu_ring_write(ring
, FRAME_CMD(start
? 0 : 1)); /* frame_end */
4737 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring
*ring
, uint32_t reg
)
4739 struct amdgpu_device
*adev
= ring
->adev
;
4741 amdgpu_ring_write(ring
, PACKET3(PACKET3_COPY_DATA
, 4));
4742 amdgpu_ring_write(ring
, 0 | /* src: register*/
4743 (5 << 8) | /* dst: memory */
4744 (1 << 20)); /* write confirm */
4745 amdgpu_ring_write(ring
, reg
);
4746 amdgpu_ring_write(ring
, 0);
4747 amdgpu_ring_write(ring
, lower_32_bits(adev
->wb
.gpu_addr
+
4748 adev
->virt
.reg_val_offs
* 4));
4749 amdgpu_ring_write(ring
, upper_32_bits(adev
->wb
.gpu_addr
+
4750 adev
->virt
.reg_val_offs
* 4));
4753 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring
*ring
, uint32_t reg
,
4758 switch (ring
->funcs
->type
) {
4759 case AMDGPU_RING_TYPE_GFX
:
4760 cmd
= WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM
;
4762 case AMDGPU_RING_TYPE_KIQ
:
4763 cmd
= (1 << 16); /* no inc addr */
4769 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4770 amdgpu_ring_write(ring
, cmd
);
4771 amdgpu_ring_write(ring
, reg
);
4772 amdgpu_ring_write(ring
, 0);
4773 amdgpu_ring_write(ring
, val
);
4776 static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring
*ring
, uint32_t reg
,
4777 uint32_t val
, uint32_t mask
)
4779 gfx_v10_0_wait_reg_mem(ring
, 0, 0, 0, reg
, 0, val
, mask
, 0x20);
4782 static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring
*ring
,
4783 uint32_t reg0
, uint32_t reg1
,
4784 uint32_t ref
, uint32_t mask
)
4786 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
4787 struct amdgpu_device
*adev
= ring
->adev
;
4788 bool fw_version_ok
= false;
4790 fw_version_ok
= adev
->gfx
.cp_fw_write_wait
;
4793 gfx_v10_0_wait_reg_mem(ring
, usepfp
, 0, 1, reg0
, reg1
,
4796 amdgpu_ring_emit_reg_write_reg_wait_helper(ring
, reg0
, reg1
,
4801 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
4802 uint32_t me
, uint32_t pipe
,
4803 enum amdgpu_interrupt_state state
)
4805 uint32_t cp_int_cntl
, cp_int_cntl_reg
;
4810 cp_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_INT_CNTL_RING0
);
4813 cp_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_INT_CNTL_RING1
);
4816 DRM_DEBUG("invalid pipe %d\n", pipe
);
4820 DRM_DEBUG("invalid me %d\n", me
);
4825 case AMDGPU_IRQ_STATE_DISABLE
:
4826 cp_int_cntl
= RREG32(cp_int_cntl_reg
);
4827 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
4828 TIME_STAMP_INT_ENABLE
, 0);
4829 WREG32(cp_int_cntl_reg
, cp_int_cntl
);
4831 case AMDGPU_IRQ_STATE_ENABLE
:
4832 cp_int_cntl
= RREG32(cp_int_cntl_reg
);
4833 cp_int_cntl
= REG_SET_FIELD(cp_int_cntl
, CP_INT_CNTL_RING0
,
4834 TIME_STAMP_INT_ENABLE
, 1);
4835 WREG32(cp_int_cntl_reg
, cp_int_cntl
);
4842 static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
4844 enum amdgpu_interrupt_state state
)
4846 u32 mec_int_cntl
, mec_int_cntl_reg
;
4849 * amdgpu controls only the first MEC. That's why this function only
4850 * handles the setting of interrupts for this specific MEC. All other
4851 * pipes' interrupts are set by amdkfd.
4857 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
4860 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE1_INT_CNTL
);
4863 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE2_INT_CNTL
);
4866 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE3_INT_CNTL
);
4869 DRM_DEBUG("invalid pipe %d\n", pipe
);
4873 DRM_DEBUG("invalid me %d\n", me
);
4878 case AMDGPU_IRQ_STATE_DISABLE
:
4879 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
4880 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
4881 TIME_STAMP_INT_ENABLE
, 0);
4882 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
4884 case AMDGPU_IRQ_STATE_ENABLE
:
4885 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
4886 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
4887 TIME_STAMP_INT_ENABLE
, 1);
4888 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
4895 static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
4896 struct amdgpu_irq_src
*src
,
4898 enum amdgpu_interrupt_state state
)
4901 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP
:
4902 gfx_v10_0_set_gfx_eop_interrupt_state(adev
, 0, 0, state
);
4904 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP
:
4905 gfx_v10_0_set_gfx_eop_interrupt_state(adev
, 0, 1, state
);
4907 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
4908 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
4910 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
4911 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
4913 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
4914 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
4916 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
4917 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
4919 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
4920 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
4922 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
4923 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
4925 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
4926 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
4928 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
4929 gfx_v10_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
4937 static int gfx_v10_0_eop_irq(struct amdgpu_device
*adev
,
4938 struct amdgpu_irq_src
*source
,
4939 struct amdgpu_iv_entry
*entry
)
4942 u8 me_id
, pipe_id
, queue_id
;
4943 struct amdgpu_ring
*ring
;
4945 DRM_DEBUG("IH: CP EOP\n");
4946 me_id
= (entry
->ring_id
& 0x0c) >> 2;
4947 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
4948 queue_id
= (entry
->ring_id
& 0x70) >> 4;
4953 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
4955 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[1]);
4959 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
4960 ring
= &adev
->gfx
.compute_ring
[i
];
4961 /* Per-queue interrupt is supported for MEC starting from VI.
4962 * The interrupt can only be enabled/disabled per pipe instead of per queue.
4964 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
4965 amdgpu_fence_process(ring
);
4972 static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
4973 struct amdgpu_irq_src
*source
,
4975 enum amdgpu_interrupt_state state
)
4978 case AMDGPU_IRQ_STATE_DISABLE
:
4979 case AMDGPU_IRQ_STATE_ENABLE
:
4980 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
4981 PRIV_REG_INT_ENABLE
,
4982 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
4991 static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
4992 struct amdgpu_irq_src
*source
,
4994 enum amdgpu_interrupt_state state
)
4997 case AMDGPU_IRQ_STATE_DISABLE
:
4998 case AMDGPU_IRQ_STATE_ENABLE
:
4999 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
5000 PRIV_INSTR_INT_ENABLE
,
5001 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
5009 static void gfx_v10_0_handle_priv_fault(struct amdgpu_device
*adev
,
5010 struct amdgpu_iv_entry
*entry
)
5012 u8 me_id
, pipe_id
, queue_id
;
5013 struct amdgpu_ring
*ring
;
5016 me_id
= (entry
->ring_id
& 0x0c) >> 2;
5017 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
5018 queue_id
= (entry
->ring_id
& 0x70) >> 4;
5022 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
5023 ring
= &adev
->gfx
.gfx_ring
[i
];
5024 /* we only enabled 1 gfx queue per pipe for now */
5025 if (ring
->me
== me_id
&& ring
->pipe
== pipe_id
)
5026 drm_sched_fault(&ring
->sched
);
5031 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
5032 ring
= &adev
->gfx
.compute_ring
[i
];
5033 if (ring
->me
== me_id
&& ring
->pipe
== pipe_id
&&
5034 ring
->queue
== queue_id
)
5035 drm_sched_fault(&ring
->sched
);
5043 static int gfx_v10_0_priv_reg_irq(struct amdgpu_device
*adev
,
5044 struct amdgpu_irq_src
*source
,
5045 struct amdgpu_iv_entry
*entry
)
5047 DRM_ERROR("Illegal register access in command stream\n");
5048 gfx_v10_0_handle_priv_fault(adev
, entry
);
5052 static int gfx_v10_0_priv_inst_irq(struct amdgpu_device
*adev
,
5053 struct amdgpu_irq_src
*source
,
5054 struct amdgpu_iv_entry
*entry
)
5056 DRM_ERROR("Illegal instruction in command stream\n");
5057 gfx_v10_0_handle_priv_fault(adev
, entry
);
5061 static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device
*adev
,
5062 struct amdgpu_irq_src
*src
,
5064 enum amdgpu_interrupt_state state
)
5066 uint32_t tmp
, target
;
5067 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
5070 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
5072 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME2_PIPE0_INT_CNTL
);
5073 target
+= ring
->pipe
;
5076 case AMDGPU_CP_KIQ_IRQ_DRIVER0
:
5077 if (state
== AMDGPU_IRQ_STATE_DISABLE
) {
5078 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
5079 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
5080 GENERIC2_INT_ENABLE
, 0);
5081 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
5083 tmp
= RREG32(target
);
5084 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
5085 GENERIC2_INT_ENABLE
, 0);
5086 WREG32(target
, tmp
);
5088 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
5089 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
5090 GENERIC2_INT_ENABLE
, 1);
5091 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
5093 tmp
= RREG32(target
);
5094 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
5095 GENERIC2_INT_ENABLE
, 1);
5096 WREG32(target
, tmp
);
5100 BUG(); /* kiq only support GENERIC2_INT now */
5106 static int gfx_v10_0_kiq_irq(struct amdgpu_device
*adev
,
5107 struct amdgpu_irq_src
*source
,
5108 struct amdgpu_iv_entry
*entry
)
5110 u8 me_id
, pipe_id
, queue_id
;
5111 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
5113 me_id
= (entry
->ring_id
& 0x0c) >> 2;
5114 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
5115 queue_id
= (entry
->ring_id
& 0x70) >> 4;
5116 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
5117 me_id
, pipe_id
, queue_id
);
5119 amdgpu_fence_process(ring
);
5123 static const struct amd_ip_funcs gfx_v10_0_ip_funcs
= {
5124 .name
= "gfx_v10_0",
5125 .early_init
= gfx_v10_0_early_init
,
5126 .late_init
= gfx_v10_0_late_init
,
5127 .sw_init
= gfx_v10_0_sw_init
,
5128 .sw_fini
= gfx_v10_0_sw_fini
,
5129 .hw_init
= gfx_v10_0_hw_init
,
5130 .hw_fini
= gfx_v10_0_hw_fini
,
5131 .suspend
= gfx_v10_0_suspend
,
5132 .resume
= gfx_v10_0_resume
,
5133 .is_idle
= gfx_v10_0_is_idle
,
5134 .wait_for_idle
= gfx_v10_0_wait_for_idle
,
5135 .soft_reset
= gfx_v10_0_soft_reset
,
5136 .set_clockgating_state
= gfx_v10_0_set_clockgating_state
,
5137 .set_powergating_state
= gfx_v10_0_set_powergating_state
,
5138 .get_clockgating_state
= gfx_v10_0_get_clockgating_state
,
5141 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx
= {
5142 .type
= AMDGPU_RING_TYPE_GFX
,
5144 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
5145 .support_64bit_ptrs
= true,
5146 .vmhub
= AMDGPU_GFXHUB_0
,
5147 .get_rptr
= gfx_v10_0_ring_get_rptr_gfx
,
5148 .get_wptr
= gfx_v10_0_ring_get_wptr_gfx
,
5149 .set_wptr
= gfx_v10_0_ring_set_wptr_gfx
,
5150 .emit_frame_size
= /* totally 242 maximum if 16 IBs */
5152 7 + /* PIPELINE_SYNC */
5153 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
5154 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
5156 8 + /* FENCE for VM_FLUSH */
5157 20 + /* GDS switch */
5158 4 + /* double SWITCH_BUFFER,
5159 * the first COND_EXEC jump to the place
5160 * just prior to this double SWITCH_BUFFER
5169 8 + 8 + /* FENCE x2 */
5170 2, /* SWITCH_BUFFER */
5171 .emit_ib_size
= 4, /* gfx_v10_0_ring_emit_ib_gfx */
5172 .emit_ib
= gfx_v10_0_ring_emit_ib_gfx
,
5173 .emit_fence
= gfx_v10_0_ring_emit_fence
,
5174 .emit_pipeline_sync
= gfx_v10_0_ring_emit_pipeline_sync
,
5175 .emit_vm_flush
= gfx_v10_0_ring_emit_vm_flush
,
5176 .emit_gds_switch
= gfx_v10_0_ring_emit_gds_switch
,
5177 .emit_hdp_flush
= gfx_v10_0_ring_emit_hdp_flush
,
5178 .test_ring
= gfx_v10_0_ring_test_ring
,
5179 .test_ib
= gfx_v10_0_ring_test_ib
,
5180 .insert_nop
= amdgpu_ring_insert_nop
,
5181 .pad_ib
= amdgpu_ring_generic_pad_ib
,
5182 .emit_switch_buffer
= gfx_v10_0_ring_emit_sb
,
5183 .emit_cntxcntl
= gfx_v10_0_ring_emit_cntxcntl
,
5184 .init_cond_exec
= gfx_v10_0_ring_emit_init_cond_exec
,
5185 .patch_cond_exec
= gfx_v10_0_ring_emit_patch_cond_exec
,
5186 .preempt_ib
= gfx_v10_0_ring_preempt_ib
,
5187 .emit_tmz
= gfx_v10_0_ring_emit_tmz
,
5188 .emit_wreg
= gfx_v10_0_ring_emit_wreg
,
5189 .emit_reg_wait
= gfx_v10_0_ring_emit_reg_wait
,
5190 .emit_reg_write_reg_wait
= gfx_v10_0_ring_emit_reg_write_reg_wait
,
5193 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute
= {
5194 .type
= AMDGPU_RING_TYPE_COMPUTE
,
5196 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
5197 .support_64bit_ptrs
= true,
5198 .vmhub
= AMDGPU_GFXHUB_0
,
5199 .get_rptr
= gfx_v10_0_ring_get_rptr_compute
,
5200 .get_wptr
= gfx_v10_0_ring_get_wptr_compute
,
5201 .set_wptr
= gfx_v10_0_ring_set_wptr_compute
,
5203 20 + /* gfx_v10_0_ring_emit_gds_switch */
5204 7 + /* gfx_v10_0_ring_emit_hdp_flush */
5205 5 + /* hdp invalidate */
5206 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
5207 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
5208 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
5209 2 + /* gfx_v10_0_ring_emit_vm_flush */
5210 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
5211 .emit_ib_size
= 7, /* gfx_v10_0_ring_emit_ib_compute */
5212 .emit_ib
= gfx_v10_0_ring_emit_ib_compute
,
5213 .emit_fence
= gfx_v10_0_ring_emit_fence
,
5214 .emit_pipeline_sync
= gfx_v10_0_ring_emit_pipeline_sync
,
5215 .emit_vm_flush
= gfx_v10_0_ring_emit_vm_flush
,
5216 .emit_gds_switch
= gfx_v10_0_ring_emit_gds_switch
,
5217 .emit_hdp_flush
= gfx_v10_0_ring_emit_hdp_flush
,
5218 .test_ring
= gfx_v10_0_ring_test_ring
,
5219 .test_ib
= gfx_v10_0_ring_test_ib
,
5220 .insert_nop
= amdgpu_ring_insert_nop
,
5221 .pad_ib
= amdgpu_ring_generic_pad_ib
,
5222 .emit_wreg
= gfx_v10_0_ring_emit_wreg
,
5223 .emit_reg_wait
= gfx_v10_0_ring_emit_reg_wait
,
5224 .emit_reg_write_reg_wait
= gfx_v10_0_ring_emit_reg_write_reg_wait
,
5227 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq
= {
5228 .type
= AMDGPU_RING_TYPE_KIQ
,
5230 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
5231 .support_64bit_ptrs
= true,
5232 .vmhub
= AMDGPU_GFXHUB_0
,
5233 .get_rptr
= gfx_v10_0_ring_get_rptr_compute
,
5234 .get_wptr
= gfx_v10_0_ring_get_wptr_compute
,
5235 .set_wptr
= gfx_v10_0_ring_set_wptr_compute
,
5237 20 + /* gfx_v10_0_ring_emit_gds_switch */
5238 7 + /* gfx_v10_0_ring_emit_hdp_flush */
5239 5 + /*hdp invalidate */
5240 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
5241 SOC15_FLUSH_GPU_TLB_NUM_WREG
* 5 +
5242 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT
* 7 +
5243 2 + /* gfx_v10_0_ring_emit_vm_flush */
5244 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5245 .emit_ib_size
= 7, /* gfx_v10_0_ring_emit_ib_compute */
5246 .emit_ib
= gfx_v10_0_ring_emit_ib_compute
,
5247 .emit_fence
= gfx_v10_0_ring_emit_fence_kiq
,
5248 .test_ring
= gfx_v10_0_ring_test_ring
,
5249 .test_ib
= gfx_v10_0_ring_test_ib
,
5250 .insert_nop
= amdgpu_ring_insert_nop
,
5251 .pad_ib
= amdgpu_ring_generic_pad_ib
,
5252 .emit_rreg
= gfx_v10_0_ring_emit_rreg
,
5253 .emit_wreg
= gfx_v10_0_ring_emit_wreg
,
5254 .emit_reg_wait
= gfx_v10_0_ring_emit_reg_wait
,
5255 .emit_reg_write_reg_wait
= gfx_v10_0_ring_emit_reg_write_reg_wait
,
5258 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device
*adev
)
5262 adev
->gfx
.kiq
.ring
.funcs
= &gfx_v10_0_ring_funcs_kiq
;
5264 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
5265 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v10_0_ring_funcs_gfx
;
5267 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
5268 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v10_0_ring_funcs_compute
;
5271 static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs
= {
5272 .set
= gfx_v10_0_set_eop_interrupt_state
,
5273 .process
= gfx_v10_0_eop_irq
,
5276 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs
= {
5277 .set
= gfx_v10_0_set_priv_reg_fault_state
,
5278 .process
= gfx_v10_0_priv_reg_irq
,
5281 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs
= {
5282 .set
= gfx_v10_0_set_priv_inst_fault_state
,
5283 .process
= gfx_v10_0_priv_inst_irq
,
5286 static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs
= {
5287 .set
= gfx_v10_0_kiq_set_interrupt_state
,
5288 .process
= gfx_v10_0_kiq_irq
,
5291 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device
*adev
)
5293 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
5294 adev
->gfx
.eop_irq
.funcs
= &gfx_v10_0_eop_irq_funcs
;
5296 adev
->gfx
.kiq
.irq
.num_types
= AMDGPU_CP_KIQ_IRQ_LAST
;
5297 adev
->gfx
.kiq
.irq
.funcs
= &gfx_v10_0_kiq_irq_funcs
;
5299 adev
->gfx
.priv_reg_irq
.num_types
= 1;
5300 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v10_0_priv_reg_irq_funcs
;
5302 adev
->gfx
.priv_inst_irq
.num_types
= 1;
5303 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v10_0_priv_inst_irq_funcs
;
5306 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device
*adev
)
5308 switch (adev
->asic_type
) {
5312 adev
->gfx
.rlc
.funcs
= &gfx_v10_0_rlc_funcs
;
5319 static void gfx_v10_0_set_gds_init(struct amdgpu_device
*adev
)
5321 unsigned total_cu
= adev
->gfx
.config
.max_cu_per_sh
*
5322 adev
->gfx
.config
.max_sh_per_se
*
5323 adev
->gfx
.config
.max_shader_engines
;
5325 adev
->gds
.gds_size
= 0x10000;
5326 adev
->gds
.gds_compute_max_wave_id
= total_cu
* 32 - 1;
5327 adev
->gds
.gws_size
= 64;
5328 adev
->gds
.oa_size
= 16;
5331 static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
*adev
,
5339 data
= bitmap
<< GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT
;
5340 data
&= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK
;
5342 WREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
, data
);
5345 static u32
gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device
*adev
)
5347 u32 data
, wgp_bitmask
;
5348 data
= RREG32_SOC15(GC
, 0, mmCC_GC_SHADER_ARRAY_CONFIG
);
5349 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
);
5351 data
&= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK
;
5352 data
>>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT
;
5355 amdgpu_gfx_create_bitmask(adev
->gfx
.config
.max_cu_per_sh
>> 1);
5357 return (~data
) & wgp_bitmask
;
5360 static u32
gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device
*adev
)
5362 u32 wgp_idx
, wgp_active_bitmap
;
5363 u32 cu_bitmap_per_wgp
, cu_active_bitmap
;
5365 wgp_active_bitmap
= gfx_v10_0_get_wgp_active_bitmap_per_sh(adev
);
5366 cu_active_bitmap
= 0;
5368 for (wgp_idx
= 0; wgp_idx
< 16; wgp_idx
++) {
5369 /* if there is one WGP enabled, it means 2 CUs will be enabled */
5370 cu_bitmap_per_wgp
= 3 << (2 * wgp_idx
);
5371 if (wgp_active_bitmap
& (1 << wgp_idx
))
5372 cu_active_bitmap
|= cu_bitmap_per_wgp
;
5375 return cu_active_bitmap
;
5378 static int gfx_v10_0_get_cu_info(struct amdgpu_device
*adev
,
5379 struct amdgpu_cu_info
*cu_info
)
5381 int i
, j
, k
, counter
, active_cu_number
= 0;
5382 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
5383 unsigned disable_masks
[4 * 2];
5385 if (!adev
|| !cu_info
)
5388 amdgpu_gfx_parse_disable_cu(disable_masks
, 4, 2);
5390 mutex_lock(&adev
->grbm_idx_mutex
);
5391 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
5392 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
5396 gfx_v10_0_select_se_sh(adev
, i
, j
, 0xffffffff);
5398 gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
5399 adev
, disable_masks
[i
* 2 + j
]);
5400 bitmap
= gfx_v10_0_get_cu_active_bitmap_per_sh(adev
);
5401 cu_info
->bitmap
[i
][j
] = bitmap
;
5403 for (k
= 0; k
< adev
->gfx
.config
.max_cu_per_sh
; k
++) {
5404 if (bitmap
& mask
) {
5405 if (counter
< adev
->gfx
.config
.max_cu_per_sh
)
5411 active_cu_number
+= counter
;
5413 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
5414 cu_info
->ao_cu_bitmap
[i
][j
] = ao_bitmap
;
5417 gfx_v10_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
5418 mutex_unlock(&adev
->grbm_idx_mutex
);
5420 cu_info
->number
= active_cu_number
;
5421 cu_info
->ao_cu_mask
= ao_cu_mask
;
5422 cu_info
->simd_per_cu
= NUM_SIMD_PER_CU
;
5427 const struct amdgpu_ip_block_version gfx_v10_0_ip_block
=
5429 .type
= AMD_IP_BLOCK_TYPE_GFX
,
5433 .funcs
= &gfx_v10_0_ip_funcs
,