Linux 4.19.133
[linux/fpc-iii.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
blobe1cb7fa89e4d69f7eb3fe6df079f505b89f19665
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #include "ivsrcid/ivsrcid_vislands30.h"
56 #define GFX8_NUM_GFX_RINGS 1
57 #define GFX8_MEC_HPD_SIZE 2048
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD 1
83 #define CLE_BPM_SERDES_CMD 0
85 /* BPM Register Address*/
86 enum {
87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 BPM_REG_FGCG_MAX
95 #define RLC_FormatDirectRegListLength 14
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
193 static const u32 golden_settings_tonga_a11[] =
195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198 mmGB_GPU_ID, 0x0000000f, 0x00000000,
199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
213 static const u32 tonga_golden_common_all[] =
215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
225 static const u32 tonga_mgcg_cgcg_init[] =
227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
304 static const u32 golden_settings_vegam_a11[] =
306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316 mmSQ_CONFIG, 0x07f80000, 0x01180000,
317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
325 static const u32 vegam_golden_common_all[] =
327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
335 static const u32 golden_settings_polaris11_a11[] =
337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347 mmSQ_CONFIG, 0x07f80000, 0x01180000,
348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
356 static const u32 polaris11_golden_common_all[] =
358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
366 static const u32 golden_settings_polaris10_a11[] =
368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379 mmSQ_CONFIG, 0x07f80000, 0x07180000,
380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
387 static const u32 polaris10_golden_common_all[] =
389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
399 static const u32 fiji_golden_common_all[] =
401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
413 static const u32 golden_settings_fiji_a10[] =
415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
428 static const u32 fiji_mgcg_cgcg_init[] =
430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
467 static const u32 golden_settings_iceland_a11[] =
469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472 mmGB_GPU_ID, 0x0000000f, 0x00000000,
473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
487 static const u32 iceland_golden_common_all[] =
489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
499 static const u32 iceland_mgcg_cgcg_init[] =
501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
567 static const u32 cz_golden_settings_a11[] =
569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571 mmGB_GPU_ID, 0x0000000f, 0x00000000,
572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
583 static const u32 cz_golden_common_all[] =
585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
595 static const u32 cz_mgcg_cgcg_init[] =
597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
674 static const u32 stoney_golden_settings_a11[] =
676 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677 mmGB_GPU_ID, 0x0000000f, 0x00000000,
678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
688 static const u32 stoney_golden_common_all[] =
690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
700 static const u32 stoney_mgcg_cgcg_init[] =
702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 static const char * const sq_edc_source_names[] = {
711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
731 switch (adev->asic_type) {
732 case CHIP_TOPAZ:
733 amdgpu_device_program_register_sequence(adev,
734 iceland_mgcg_cgcg_init,
735 ARRAY_SIZE(iceland_mgcg_cgcg_init));
736 amdgpu_device_program_register_sequence(adev,
737 golden_settings_iceland_a11,
738 ARRAY_SIZE(golden_settings_iceland_a11));
739 amdgpu_device_program_register_sequence(adev,
740 iceland_golden_common_all,
741 ARRAY_SIZE(iceland_golden_common_all));
742 break;
743 case CHIP_FIJI:
744 amdgpu_device_program_register_sequence(adev,
745 fiji_mgcg_cgcg_init,
746 ARRAY_SIZE(fiji_mgcg_cgcg_init));
747 amdgpu_device_program_register_sequence(adev,
748 golden_settings_fiji_a10,
749 ARRAY_SIZE(golden_settings_fiji_a10));
750 amdgpu_device_program_register_sequence(adev,
751 fiji_golden_common_all,
752 ARRAY_SIZE(fiji_golden_common_all));
753 break;
755 case CHIP_TONGA:
756 amdgpu_device_program_register_sequence(adev,
757 tonga_mgcg_cgcg_init,
758 ARRAY_SIZE(tonga_mgcg_cgcg_init));
759 amdgpu_device_program_register_sequence(adev,
760 golden_settings_tonga_a11,
761 ARRAY_SIZE(golden_settings_tonga_a11));
762 amdgpu_device_program_register_sequence(adev,
763 tonga_golden_common_all,
764 ARRAY_SIZE(tonga_golden_common_all));
765 break;
766 case CHIP_VEGAM:
767 amdgpu_device_program_register_sequence(adev,
768 golden_settings_vegam_a11,
769 ARRAY_SIZE(golden_settings_vegam_a11));
770 amdgpu_device_program_register_sequence(adev,
771 vegam_golden_common_all,
772 ARRAY_SIZE(vegam_golden_common_all));
773 break;
774 case CHIP_POLARIS11:
775 case CHIP_POLARIS12:
776 amdgpu_device_program_register_sequence(adev,
777 golden_settings_polaris11_a11,
778 ARRAY_SIZE(golden_settings_polaris11_a11));
779 amdgpu_device_program_register_sequence(adev,
780 polaris11_golden_common_all,
781 ARRAY_SIZE(polaris11_golden_common_all));
782 break;
783 case CHIP_POLARIS10:
784 amdgpu_device_program_register_sequence(adev,
785 golden_settings_polaris10_a11,
786 ARRAY_SIZE(golden_settings_polaris10_a11));
787 amdgpu_device_program_register_sequence(adev,
788 polaris10_golden_common_all,
789 ARRAY_SIZE(polaris10_golden_common_all));
790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791 if (adev->pdev->revision == 0xc7 &&
792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
798 break;
799 case CHIP_CARRIZO:
800 amdgpu_device_program_register_sequence(adev,
801 cz_mgcg_cgcg_init,
802 ARRAY_SIZE(cz_mgcg_cgcg_init));
803 amdgpu_device_program_register_sequence(adev,
804 cz_golden_settings_a11,
805 ARRAY_SIZE(cz_golden_settings_a11));
806 amdgpu_device_program_register_sequence(adev,
807 cz_golden_common_all,
808 ARRAY_SIZE(cz_golden_common_all));
809 break;
810 case CHIP_STONEY:
811 amdgpu_device_program_register_sequence(adev,
812 stoney_mgcg_cgcg_init,
813 ARRAY_SIZE(stoney_mgcg_cgcg_init));
814 amdgpu_device_program_register_sequence(adev,
815 stoney_golden_settings_a11,
816 ARRAY_SIZE(stoney_golden_settings_a11));
817 amdgpu_device_program_register_sequence(adev,
818 stoney_golden_common_all,
819 ARRAY_SIZE(stoney_golden_common_all));
820 break;
821 default:
822 break;
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
828 adev->gfx.scratch.num_reg = 8;
829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
835 struct amdgpu_device *adev = ring->adev;
836 uint32_t scratch;
837 uint32_t tmp = 0;
838 unsigned i;
839 int r;
841 r = amdgpu_gfx_scratch_get(adev, &scratch);
842 if (r) {
843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844 return r;
846 WREG32(scratch, 0xCAFEDEAD);
847 r = amdgpu_ring_alloc(ring, 3);
848 if (r) {
849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
850 ring->idx, r);
851 amdgpu_gfx_scratch_free(adev, scratch);
852 return r;
854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856 amdgpu_ring_write(ring, 0xDEADBEEF);
857 amdgpu_ring_commit(ring);
859 for (i = 0; i < adev->usec_timeout; i++) {
860 tmp = RREG32(scratch);
861 if (tmp == 0xDEADBEEF)
862 break;
863 DRM_UDELAY(1);
865 if (i < adev->usec_timeout) {
866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867 ring->idx, i);
868 } else {
869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870 ring->idx, scratch, tmp);
871 r = -EINVAL;
873 amdgpu_gfx_scratch_free(adev, scratch);
874 return r;
877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
879 struct amdgpu_device *adev = ring->adev;
880 struct amdgpu_ib ib;
881 struct dma_fence *f = NULL;
883 unsigned int index;
884 uint64_t gpu_addr;
885 uint32_t tmp;
886 long r;
888 r = amdgpu_device_wb_get(adev, &index);
889 if (r) {
890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
891 return r;
894 gpu_addr = adev->wb.gpu_addr + (index * 4);
895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896 memset(&ib, 0, sizeof(ib));
897 r = amdgpu_ib_get(adev, NULL, 16, &ib);
898 if (r) {
899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900 goto err1;
902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904 ib.ptr[2] = lower_32_bits(gpu_addr);
905 ib.ptr[3] = upper_32_bits(gpu_addr);
906 ib.ptr[4] = 0xDEADBEEF;
907 ib.length_dw = 5;
909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
910 if (r)
911 goto err2;
913 r = dma_fence_wait_timeout(f, false, timeout);
914 if (r == 0) {
915 DRM_ERROR("amdgpu: IB test timed out.\n");
916 r = -ETIMEDOUT;
917 goto err2;
918 } else if (r < 0) {
919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
920 goto err2;
923 tmp = adev->wb.wb[index];
924 if (tmp == 0xDEADBEEF) {
925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926 r = 0;
927 } else {
928 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
929 r = -EINVAL;
932 err2:
933 amdgpu_ib_free(adev, &ib, NULL);
934 dma_fence_put(f);
935 err1:
936 amdgpu_device_wb_free(adev, index);
937 return r;
941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
943 release_firmware(adev->gfx.pfp_fw);
944 adev->gfx.pfp_fw = NULL;
945 release_firmware(adev->gfx.me_fw);
946 adev->gfx.me_fw = NULL;
947 release_firmware(adev->gfx.ce_fw);
948 adev->gfx.ce_fw = NULL;
949 release_firmware(adev->gfx.rlc_fw);
950 adev->gfx.rlc_fw = NULL;
951 release_firmware(adev->gfx.mec_fw);
952 adev->gfx.mec_fw = NULL;
953 if ((adev->asic_type != CHIP_STONEY) &&
954 (adev->asic_type != CHIP_TOPAZ))
955 release_firmware(adev->gfx.mec2_fw);
956 adev->gfx.mec2_fw = NULL;
958 kfree(adev->gfx.rlc.register_list_format);
961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
963 const char *chip_name;
964 char fw_name[30];
965 int err;
966 struct amdgpu_firmware_info *info = NULL;
967 const struct common_firmware_header *header = NULL;
968 const struct gfx_firmware_header_v1_0 *cp_hdr;
969 const struct rlc_firmware_header_v2_0 *rlc_hdr;
970 unsigned int *tmp = NULL, i;
972 DRM_DEBUG("\n");
974 switch (adev->asic_type) {
975 case CHIP_TOPAZ:
976 chip_name = "topaz";
977 break;
978 case CHIP_TONGA:
979 chip_name = "tonga";
980 break;
981 case CHIP_CARRIZO:
982 chip_name = "carrizo";
983 break;
984 case CHIP_FIJI:
985 chip_name = "fiji";
986 break;
987 case CHIP_STONEY:
988 chip_name = "stoney";
989 break;
990 case CHIP_POLARIS10:
991 chip_name = "polaris10";
992 break;
993 case CHIP_POLARIS11:
994 chip_name = "polaris11";
995 break;
996 case CHIP_POLARIS12:
997 chip_name = "polaris12";
998 break;
999 case CHIP_VEGAM:
1000 chip_name = "vegam";
1001 break;
1002 default:
1003 BUG();
1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009 if (err == -ENOENT) {
1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1013 } else {
1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1017 if (err)
1018 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020 if (err)
1021 goto out;
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029 if (err == -ENOENT) {
1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1033 } else {
1034 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1037 if (err)
1038 goto out;
1039 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040 if (err)
1041 goto out;
1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050 if (err == -ENOENT) {
1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1054 } else {
1055 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1058 if (err)
1059 goto out;
1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061 if (err)
1062 goto out;
1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1068 * Support for MCBP/Virtualization in combination with chained IBs is
1069 * formal released on feature version #46
1071 if (adev->gfx.ce_feature_version >= 46 &&
1072 adev->gfx.pfp_feature_version >= 46) {
1073 adev->virt.chained_ib_support = true;
1074 DRM_INFO("Chained IB support enabled!\n");
1075 } else
1076 adev->virt.chained_ib_support = false;
1078 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080 if (err)
1081 goto out;
1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1087 adev->gfx.rlc.save_and_restore_offset =
1088 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1089 adev->gfx.rlc.clear_state_descriptor_offset =
1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1091 adev->gfx.rlc.avail_scratch_ram_locations =
1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1093 adev->gfx.rlc.reg_restore_list_size =
1094 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1095 adev->gfx.rlc.reg_list_format_start =
1096 le32_to_cpu(rlc_hdr->reg_list_format_start);
1097 adev->gfx.rlc.reg_list_format_separate_start =
1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1099 adev->gfx.rlc.starting_offsets_start =
1100 le32_to_cpu(rlc_hdr->starting_offsets_start);
1101 adev->gfx.rlc.reg_list_format_size_bytes =
1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1103 adev->gfx.rlc.reg_list_size_bytes =
1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1106 adev->gfx.rlc.register_list_format =
1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1108 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1110 if (!adev->gfx.rlc.register_list_format) {
1111 err = -ENOMEM;
1112 goto out;
1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130 if (err == -ENOENT) {
1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1134 } else {
1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1138 if (err)
1139 goto out;
1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141 if (err)
1142 goto out;
1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1147 if ((adev->asic_type != CHIP_STONEY) &&
1148 (adev->asic_type != CHIP_TOPAZ)) {
1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152 if (err == -ENOENT) {
1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1156 } else {
1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1160 if (!err) {
1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162 if (err)
1163 goto out;
1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1165 adev->gfx.mec2_fw->data;
1166 adev->gfx.mec2_fw_version =
1167 le32_to_cpu(cp_hdr->header.ucode_version);
1168 adev->gfx.mec2_feature_version =
1169 le32_to_cpu(cp_hdr->ucode_feature_version);
1170 } else {
1171 err = 0;
1172 adev->gfx.mec2_fw = NULL;
1176 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1177 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1178 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1179 info->fw = adev->gfx.pfp_fw;
1180 header = (const struct common_firmware_header *)info->fw->data;
1181 adev->firmware.fw_size +=
1182 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1185 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1186 info->fw = adev->gfx.me_fw;
1187 header = (const struct common_firmware_header *)info->fw->data;
1188 adev->firmware.fw_size +=
1189 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1192 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1193 info->fw = adev->gfx.ce_fw;
1194 header = (const struct common_firmware_header *)info->fw->data;
1195 adev->firmware.fw_size +=
1196 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1199 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1200 info->fw = adev->gfx.rlc_fw;
1201 header = (const struct common_firmware_header *)info->fw->data;
1202 adev->firmware.fw_size +=
1203 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1205 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1206 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1207 info->fw = adev->gfx.mec_fw;
1208 header = (const struct common_firmware_header *)info->fw->data;
1209 adev->firmware.fw_size +=
1210 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1212 /* we need account JT in */
1213 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1214 adev->firmware.fw_size +=
1215 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1217 if (amdgpu_sriov_vf(adev)) {
1218 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1219 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1220 info->fw = adev->gfx.mec_fw;
1221 adev->firmware.fw_size +=
1222 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1225 if (adev->gfx.mec2_fw) {
1226 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1227 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1228 info->fw = adev->gfx.mec2_fw;
1229 header = (const struct common_firmware_header *)info->fw->data;
1230 adev->firmware.fw_size +=
1231 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1236 out:
1237 if (err) {
1238 dev_err(adev->dev,
1239 "gfx8: Failed to load firmware \"%s\"\n",
1240 fw_name);
1241 release_firmware(adev->gfx.pfp_fw);
1242 adev->gfx.pfp_fw = NULL;
1243 release_firmware(adev->gfx.me_fw);
1244 adev->gfx.me_fw = NULL;
1245 release_firmware(adev->gfx.ce_fw);
1246 adev->gfx.ce_fw = NULL;
1247 release_firmware(adev->gfx.rlc_fw);
1248 adev->gfx.rlc_fw = NULL;
1249 release_firmware(adev->gfx.mec_fw);
1250 adev->gfx.mec_fw = NULL;
1251 release_firmware(adev->gfx.mec2_fw);
1252 adev->gfx.mec2_fw = NULL;
1254 return err;
1257 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1258 volatile u32 *buffer)
1260 u32 count = 0, i;
1261 const struct cs_section_def *sect = NULL;
1262 const struct cs_extent_def *ext = NULL;
1264 if (adev->gfx.rlc.cs_data == NULL)
1265 return;
1266 if (buffer == NULL)
1267 return;
1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1270 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1272 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1273 buffer[count++] = cpu_to_le32(0x80000000);
1274 buffer[count++] = cpu_to_le32(0x80000000);
1276 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1277 for (ext = sect->section; ext->extent != NULL; ++ext) {
1278 if (sect->id == SECT_CONTEXT) {
1279 buffer[count++] =
1280 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1281 buffer[count++] = cpu_to_le32(ext->reg_index -
1282 PACKET3_SET_CONTEXT_REG_START);
1283 for (i = 0; i < ext->reg_count; i++)
1284 buffer[count++] = cpu_to_le32(ext->extent[i]);
1285 } else {
1286 return;
1291 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1292 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1293 PACKET3_SET_CONTEXT_REG_START);
1294 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1295 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1298 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1300 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1301 buffer[count++] = cpu_to_le32(0);
1304 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1306 const __le32 *fw_data;
1307 volatile u32 *dst_ptr;
1308 int me, i, max_me = 4;
1309 u32 bo_offset = 0;
1310 u32 table_offset, table_size;
1312 if (adev->asic_type == CHIP_CARRIZO)
1313 max_me = 5;
1315 /* write the cp table buffer */
1316 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1317 for (me = 0; me < max_me; me++) {
1318 if (me == 0) {
1319 const struct gfx_firmware_header_v1_0 *hdr =
1320 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321 fw_data = (const __le32 *)
1322 (adev->gfx.ce_fw->data +
1323 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1324 table_offset = le32_to_cpu(hdr->jt_offset);
1325 table_size = le32_to_cpu(hdr->jt_size);
1326 } else if (me == 1) {
1327 const struct gfx_firmware_header_v1_0 *hdr =
1328 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1329 fw_data = (const __le32 *)
1330 (adev->gfx.pfp_fw->data +
1331 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1332 table_offset = le32_to_cpu(hdr->jt_offset);
1333 table_size = le32_to_cpu(hdr->jt_size);
1334 } else if (me == 2) {
1335 const struct gfx_firmware_header_v1_0 *hdr =
1336 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1337 fw_data = (const __le32 *)
1338 (adev->gfx.me_fw->data +
1339 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1340 table_offset = le32_to_cpu(hdr->jt_offset);
1341 table_size = le32_to_cpu(hdr->jt_size);
1342 } else if (me == 3) {
1343 const struct gfx_firmware_header_v1_0 *hdr =
1344 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1345 fw_data = (const __le32 *)
1346 (adev->gfx.mec_fw->data +
1347 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1348 table_offset = le32_to_cpu(hdr->jt_offset);
1349 table_size = le32_to_cpu(hdr->jt_size);
1350 } else if (me == 4) {
1351 const struct gfx_firmware_header_v1_0 *hdr =
1352 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1353 fw_data = (const __le32 *)
1354 (adev->gfx.mec2_fw->data +
1355 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1356 table_offset = le32_to_cpu(hdr->jt_offset);
1357 table_size = le32_to_cpu(hdr->jt_size);
1360 for (i = 0; i < table_size; i ++) {
1361 dst_ptr[bo_offset + i] =
1362 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1365 bo_offset += table_size;
1369 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1371 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1372 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1375 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1377 volatile u32 *dst_ptr;
1378 u32 dws;
1379 const struct cs_section_def *cs_data;
1380 int r;
1382 adev->gfx.rlc.cs_data = vi_cs_data;
1384 cs_data = adev->gfx.rlc.cs_data;
1386 if (cs_data) {
1387 /* clear state block */
1388 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1390 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1391 AMDGPU_GEM_DOMAIN_VRAM,
1392 &adev->gfx.rlc.clear_state_obj,
1393 &adev->gfx.rlc.clear_state_gpu_addr,
1394 (void **)&adev->gfx.rlc.cs_ptr);
1395 if (r) {
1396 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1397 gfx_v8_0_rlc_fini(adev);
1398 return r;
1401 /* set up the cs buffer */
1402 dst_ptr = adev->gfx.rlc.cs_ptr;
1403 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1404 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1405 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1408 if ((adev->asic_type == CHIP_CARRIZO) ||
1409 (adev->asic_type == CHIP_STONEY)) {
1410 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1411 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1412 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1413 &adev->gfx.rlc.cp_table_obj,
1414 &adev->gfx.rlc.cp_table_gpu_addr,
1415 (void **)&adev->gfx.rlc.cp_table_ptr);
1416 if (r) {
1417 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1418 return r;
1421 cz_init_cp_jump_table(adev);
1423 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1424 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1427 return 0;
1430 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1432 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1435 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1437 int r;
1438 u32 *hpd;
1439 size_t mec_hpd_size;
1441 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1443 /* take ownership of the relevant compute queues */
1444 amdgpu_gfx_compute_queue_acquire(adev);
1446 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1448 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1449 AMDGPU_GEM_DOMAIN_GTT,
1450 &adev->gfx.mec.hpd_eop_obj,
1451 &adev->gfx.mec.hpd_eop_gpu_addr,
1452 (void **)&hpd);
1453 if (r) {
1454 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1455 return r;
1458 memset(hpd, 0, mec_hpd_size);
1460 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1461 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1463 return 0;
1466 static const u32 vgpr_init_compute_shader[] =
1468 0x7e000209, 0x7e020208,
1469 0x7e040207, 0x7e060206,
1470 0x7e080205, 0x7e0a0204,
1471 0x7e0c0203, 0x7e0e0202,
1472 0x7e100201, 0x7e120200,
1473 0x7e140209, 0x7e160208,
1474 0x7e180207, 0x7e1a0206,
1475 0x7e1c0205, 0x7e1e0204,
1476 0x7e200203, 0x7e220202,
1477 0x7e240201, 0x7e260200,
1478 0x7e280209, 0x7e2a0208,
1479 0x7e2c0207, 0x7e2e0206,
1480 0x7e300205, 0x7e320204,
1481 0x7e340203, 0x7e360202,
1482 0x7e380201, 0x7e3a0200,
1483 0x7e3c0209, 0x7e3e0208,
1484 0x7e400207, 0x7e420206,
1485 0x7e440205, 0x7e460204,
1486 0x7e480203, 0x7e4a0202,
1487 0x7e4c0201, 0x7e4e0200,
1488 0x7e500209, 0x7e520208,
1489 0x7e540207, 0x7e560206,
1490 0x7e580205, 0x7e5a0204,
1491 0x7e5c0203, 0x7e5e0202,
1492 0x7e600201, 0x7e620200,
1493 0x7e640209, 0x7e660208,
1494 0x7e680207, 0x7e6a0206,
1495 0x7e6c0205, 0x7e6e0204,
1496 0x7e700203, 0x7e720202,
1497 0x7e740201, 0x7e760200,
1498 0x7e780209, 0x7e7a0208,
1499 0x7e7c0207, 0x7e7e0206,
1500 0xbf8a0000, 0xbf810000,
1503 static const u32 sgpr_init_compute_shader[] =
1505 0xbe8a0100, 0xbe8c0102,
1506 0xbe8e0104, 0xbe900106,
1507 0xbe920108, 0xbe940100,
1508 0xbe960102, 0xbe980104,
1509 0xbe9a0106, 0xbe9c0108,
1510 0xbe9e0100, 0xbea00102,
1511 0xbea20104, 0xbea40106,
1512 0xbea60108, 0xbea80100,
1513 0xbeaa0102, 0xbeac0104,
1514 0xbeae0106, 0xbeb00108,
1515 0xbeb20100, 0xbeb40102,
1516 0xbeb60104, 0xbeb80106,
1517 0xbeba0108, 0xbebc0100,
1518 0xbebe0102, 0xbec00104,
1519 0xbec20106, 0xbec40108,
1520 0xbec60100, 0xbec80102,
1521 0xbee60004, 0xbee70005,
1522 0xbeea0006, 0xbeeb0007,
1523 0xbee80008, 0xbee90009,
1524 0xbefc0000, 0xbf8a0000,
1525 0xbf810000, 0x00000000,
1528 static const u32 vgpr_init_regs[] =
1530 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1531 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1532 mmCOMPUTE_NUM_THREAD_X, 256*4,
1533 mmCOMPUTE_NUM_THREAD_Y, 1,
1534 mmCOMPUTE_NUM_THREAD_Z, 1,
1535 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1536 mmCOMPUTE_PGM_RSRC2, 20,
1537 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1538 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1539 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1540 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1541 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1542 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1543 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1544 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1545 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1546 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1549 static const u32 sgpr1_init_regs[] =
1551 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1552 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1553 mmCOMPUTE_NUM_THREAD_X, 256*5,
1554 mmCOMPUTE_NUM_THREAD_Y, 1,
1555 mmCOMPUTE_NUM_THREAD_Z, 1,
1556 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1557 mmCOMPUTE_PGM_RSRC2, 20,
1558 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1559 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1560 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1561 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1562 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1563 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1564 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1565 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1566 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1567 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1570 static const u32 sgpr2_init_regs[] =
1572 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1573 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1574 mmCOMPUTE_NUM_THREAD_X, 256*5,
1575 mmCOMPUTE_NUM_THREAD_Y, 1,
1576 mmCOMPUTE_NUM_THREAD_Z, 1,
1577 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1578 mmCOMPUTE_PGM_RSRC2, 20,
1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1591 static const u32 sec_ded_counter_registers[] =
1593 mmCPC_EDC_ATC_CNT,
1594 mmCPC_EDC_SCRATCH_CNT,
1595 mmCPC_EDC_UCODE_CNT,
1596 mmCPF_EDC_ATC_CNT,
1597 mmCPF_EDC_ROQ_CNT,
1598 mmCPF_EDC_TAG_CNT,
1599 mmCPG_EDC_ATC_CNT,
1600 mmCPG_EDC_DMA_CNT,
1601 mmCPG_EDC_TAG_CNT,
1602 mmDC_EDC_CSINVOC_CNT,
1603 mmDC_EDC_RESTORE_CNT,
1604 mmDC_EDC_STATE_CNT,
1605 mmGDS_EDC_CNT,
1606 mmGDS_EDC_GRBM_CNT,
1607 mmGDS_EDC_OA_DED,
1608 mmSPI_EDC_CNT,
1609 mmSQC_ATC_EDC_GATCL1_CNT,
1610 mmSQC_EDC_CNT,
1611 mmSQ_EDC_DED_CNT,
1612 mmSQ_EDC_INFO,
1613 mmSQ_EDC_SEC_CNT,
1614 mmTCC_EDC_CNT,
1615 mmTCP_ATC_EDC_GATCL1_CNT,
1616 mmTCP_EDC_CNT,
1617 mmTD_EDC_CNT
1620 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1623 struct amdgpu_ib ib;
1624 struct dma_fence *f = NULL;
1625 int r, i;
1626 u32 tmp;
1627 unsigned total_size, vgpr_offset, sgpr_offset;
1628 u64 gpu_addr;
1630 /* only supported on CZ */
1631 if (adev->asic_type != CHIP_CARRIZO)
1632 return 0;
1634 /* bail if the compute ring is not ready */
1635 if (!ring->ready)
1636 return 0;
1638 tmp = RREG32(mmGB_EDC_MODE);
1639 WREG32(mmGB_EDC_MODE, 0);
1641 total_size =
1642 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1643 total_size +=
1644 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645 total_size +=
1646 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1647 total_size = ALIGN(total_size, 256);
1648 vgpr_offset = total_size;
1649 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1650 sgpr_offset = total_size;
1651 total_size += sizeof(sgpr_init_compute_shader);
1653 /* allocate an indirect buffer to put the commands in */
1654 memset(&ib, 0, sizeof(ib));
1655 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1656 if (r) {
1657 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1658 return r;
1661 /* load the compute shaders */
1662 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1663 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1665 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1666 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1668 /* init the ib length to 0 */
1669 ib.length_dw = 0;
1671 /* VGPR */
1672 /* write the register state for the compute dispatch */
1673 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1675 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1676 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1678 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1679 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1681 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1682 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1683 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1685 /* write dispatch packet */
1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1687 ib.ptr[ib.length_dw++] = 8; /* x */
1688 ib.ptr[ib.length_dw++] = 1; /* y */
1689 ib.ptr[ib.length_dw++] = 1; /* z */
1690 ib.ptr[ib.length_dw++] =
1691 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1693 /* write CS partial flush packet */
1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1695 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1697 /* SGPR1 */
1698 /* write the register state for the compute dispatch */
1699 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1701 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1702 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1705 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1707 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1708 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1709 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1711 /* write dispatch packet */
1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1713 ib.ptr[ib.length_dw++] = 8; /* x */
1714 ib.ptr[ib.length_dw++] = 1; /* y */
1715 ib.ptr[ib.length_dw++] = 1; /* z */
1716 ib.ptr[ib.length_dw++] =
1717 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1719 /* write CS partial flush packet */
1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1721 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1723 /* SGPR2 */
1724 /* write the register state for the compute dispatch */
1725 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1727 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1731 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1733 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1737 /* write dispatch packet */
1738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1739 ib.ptr[ib.length_dw++] = 8; /* x */
1740 ib.ptr[ib.length_dw++] = 1; /* y */
1741 ib.ptr[ib.length_dw++] = 1; /* z */
1742 ib.ptr[ib.length_dw++] =
1743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1745 /* write CS partial flush packet */
1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1749 /* shedule the ib on the ring */
1750 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1751 if (r) {
1752 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1753 goto fail;
1756 /* wait for the GPU to finish processing the IB */
1757 r = dma_fence_wait(f, false);
1758 if (r) {
1759 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1760 goto fail;
1763 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1764 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1765 WREG32(mmGB_EDC_MODE, tmp);
1767 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1768 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1769 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1772 /* read back registers to clear the counters */
1773 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1774 RREG32(sec_ded_counter_registers[i]);
1776 fail:
1777 amdgpu_ib_free(adev, &ib, NULL);
1778 dma_fence_put(f);
1780 return r;
1783 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1785 u32 gb_addr_config;
1786 u32 mc_shared_chmap, mc_arb_ramcfg;
1787 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1788 u32 tmp;
1789 int ret;
1791 switch (adev->asic_type) {
1792 case CHIP_TOPAZ:
1793 adev->gfx.config.max_shader_engines = 1;
1794 adev->gfx.config.max_tile_pipes = 2;
1795 adev->gfx.config.max_cu_per_sh = 6;
1796 adev->gfx.config.max_sh_per_se = 1;
1797 adev->gfx.config.max_backends_per_se = 2;
1798 adev->gfx.config.max_texture_channel_caches = 2;
1799 adev->gfx.config.max_gprs = 256;
1800 adev->gfx.config.max_gs_threads = 32;
1801 adev->gfx.config.max_hw_contexts = 8;
1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1808 break;
1809 case CHIP_FIJI:
1810 adev->gfx.config.max_shader_engines = 4;
1811 adev->gfx.config.max_tile_pipes = 16;
1812 adev->gfx.config.max_cu_per_sh = 16;
1813 adev->gfx.config.max_sh_per_se = 1;
1814 adev->gfx.config.max_backends_per_se = 4;
1815 adev->gfx.config.max_texture_channel_caches = 16;
1816 adev->gfx.config.max_gprs = 256;
1817 adev->gfx.config.max_gs_threads = 32;
1818 adev->gfx.config.max_hw_contexts = 8;
1820 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1825 break;
1826 case CHIP_POLARIS11:
1827 case CHIP_POLARIS12:
1828 ret = amdgpu_atombios_get_gfx_info(adev);
1829 if (ret)
1830 return ret;
1831 adev->gfx.config.max_gprs = 256;
1832 adev->gfx.config.max_gs_threads = 32;
1833 adev->gfx.config.max_hw_contexts = 8;
1835 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1836 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1837 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1838 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1839 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1840 break;
1841 case CHIP_POLARIS10:
1842 case CHIP_VEGAM:
1843 ret = amdgpu_atombios_get_gfx_info(adev);
1844 if (ret)
1845 return ret;
1846 adev->gfx.config.max_gprs = 256;
1847 adev->gfx.config.max_gs_threads = 32;
1848 adev->gfx.config.max_hw_contexts = 8;
1850 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1851 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1852 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1853 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1854 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1855 break;
1856 case CHIP_TONGA:
1857 adev->gfx.config.max_shader_engines = 4;
1858 adev->gfx.config.max_tile_pipes = 8;
1859 adev->gfx.config.max_cu_per_sh = 8;
1860 adev->gfx.config.max_sh_per_se = 1;
1861 adev->gfx.config.max_backends_per_se = 2;
1862 adev->gfx.config.max_texture_channel_caches = 8;
1863 adev->gfx.config.max_gprs = 256;
1864 adev->gfx.config.max_gs_threads = 32;
1865 adev->gfx.config.max_hw_contexts = 8;
1867 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1871 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1872 break;
1873 case CHIP_CARRIZO:
1874 adev->gfx.config.max_shader_engines = 1;
1875 adev->gfx.config.max_tile_pipes = 2;
1876 adev->gfx.config.max_sh_per_se = 1;
1877 adev->gfx.config.max_backends_per_se = 2;
1878 adev->gfx.config.max_cu_per_sh = 8;
1879 adev->gfx.config.max_texture_channel_caches = 2;
1880 adev->gfx.config.max_gprs = 256;
1881 adev->gfx.config.max_gs_threads = 32;
1882 adev->gfx.config.max_hw_contexts = 8;
1884 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1888 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1889 break;
1890 case CHIP_STONEY:
1891 adev->gfx.config.max_shader_engines = 1;
1892 adev->gfx.config.max_tile_pipes = 2;
1893 adev->gfx.config.max_sh_per_se = 1;
1894 adev->gfx.config.max_backends_per_se = 1;
1895 adev->gfx.config.max_cu_per_sh = 3;
1896 adev->gfx.config.max_texture_channel_caches = 2;
1897 adev->gfx.config.max_gprs = 256;
1898 adev->gfx.config.max_gs_threads = 16;
1899 adev->gfx.config.max_hw_contexts = 8;
1901 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1902 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1903 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1904 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1905 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1906 break;
1907 default:
1908 adev->gfx.config.max_shader_engines = 2;
1909 adev->gfx.config.max_tile_pipes = 4;
1910 adev->gfx.config.max_cu_per_sh = 2;
1911 adev->gfx.config.max_sh_per_se = 1;
1912 adev->gfx.config.max_backends_per_se = 2;
1913 adev->gfx.config.max_texture_channel_caches = 4;
1914 adev->gfx.config.max_gprs = 256;
1915 adev->gfx.config.max_gs_threads = 32;
1916 adev->gfx.config.max_hw_contexts = 8;
1918 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1919 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1920 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1921 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1922 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1923 break;
1926 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1927 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1928 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1930 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1931 adev->gfx.config.mem_max_burst_length_bytes = 256;
1932 if (adev->flags & AMD_IS_APU) {
1933 /* Get memory bank mapping mode. */
1934 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1935 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1936 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1939 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1942 /* Validate settings in case only one DIMM installed. */
1943 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1944 dimm00_addr_map = 0;
1945 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1946 dimm01_addr_map = 0;
1947 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1948 dimm10_addr_map = 0;
1949 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1950 dimm11_addr_map = 0;
1952 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1953 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1954 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1955 adev->gfx.config.mem_row_size_in_kb = 2;
1956 else
1957 adev->gfx.config.mem_row_size_in_kb = 1;
1958 } else {
1959 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1960 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1961 if (adev->gfx.config.mem_row_size_in_kb > 4)
1962 adev->gfx.config.mem_row_size_in_kb = 4;
1965 adev->gfx.config.shader_engine_tile_size = 32;
1966 adev->gfx.config.num_gpus = 1;
1967 adev->gfx.config.multi_gpu_tile_size = 64;
1969 /* fix up row size */
1970 switch (adev->gfx.config.mem_row_size_in_kb) {
1971 case 1:
1972 default:
1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1974 break;
1975 case 2:
1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1977 break;
1978 case 4:
1979 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1980 break;
1982 adev->gfx.config.gb_addr_config = gb_addr_config;
1984 return 0;
1987 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1988 int mec, int pipe, int queue)
1990 int r;
1991 unsigned irq_type;
1992 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1994 ring = &adev->gfx.compute_ring[ring_id];
1996 /* mec0 is me1 */
1997 ring->me = mec + 1;
1998 ring->pipe = pipe;
1999 ring->queue = queue;
2001 ring->ring_obj = NULL;
2002 ring->use_doorbell = true;
2003 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2004 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2005 + (ring_id * GFX8_MEC_HPD_SIZE);
2006 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2008 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2009 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2010 + ring->pipe;
2012 /* type-2 packets are deprecated on MEC, use type-3 instead */
2013 r = amdgpu_ring_init(adev, ring, 1024,
2014 &adev->gfx.eop_irq, irq_type);
2015 if (r)
2016 return r;
2019 return 0;
2022 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2024 static int gfx_v8_0_sw_init(void *handle)
2026 int i, j, k, r, ring_id;
2027 struct amdgpu_ring *ring;
2028 struct amdgpu_kiq *kiq;
2029 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2031 switch (adev->asic_type) {
2032 case CHIP_TONGA:
2033 case CHIP_CARRIZO:
2034 case CHIP_FIJI:
2035 case CHIP_POLARIS10:
2036 case CHIP_POLARIS11:
2037 case CHIP_POLARIS12:
2038 case CHIP_VEGAM:
2039 adev->gfx.mec.num_mec = 2;
2040 break;
2041 case CHIP_TOPAZ:
2042 case CHIP_STONEY:
2043 default:
2044 adev->gfx.mec.num_mec = 1;
2045 break;
2048 adev->gfx.mec.num_pipe_per_mec = 4;
2049 adev->gfx.mec.num_queue_per_pipe = 8;
2051 /* KIQ event */
2052 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
2053 if (r)
2054 return r;
2056 /* EOP Event */
2057 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2058 if (r)
2059 return r;
2061 /* Privileged reg */
2062 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2063 &adev->gfx.priv_reg_irq);
2064 if (r)
2065 return r;
2067 /* Privileged inst */
2068 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2069 &adev->gfx.priv_inst_irq);
2070 if (r)
2071 return r;
2073 /* Add CP EDC/ECC irq */
2074 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2075 &adev->gfx.cp_ecc_error_irq);
2076 if (r)
2077 return r;
2079 /* SQ interrupts. */
2080 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2081 &adev->gfx.sq_irq);
2082 if (r) {
2083 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2084 return r;
2087 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2089 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2091 gfx_v8_0_scratch_init(adev);
2093 r = gfx_v8_0_init_microcode(adev);
2094 if (r) {
2095 DRM_ERROR("Failed to load gfx firmware!\n");
2096 return r;
2099 r = gfx_v8_0_rlc_init(adev);
2100 if (r) {
2101 DRM_ERROR("Failed to init rlc BOs!\n");
2102 return r;
2105 r = gfx_v8_0_mec_init(adev);
2106 if (r) {
2107 DRM_ERROR("Failed to init MEC BOs!\n");
2108 return r;
2111 /* set up the gfx ring */
2112 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2113 ring = &adev->gfx.gfx_ring[i];
2114 ring->ring_obj = NULL;
2115 sprintf(ring->name, "gfx");
2116 /* no gfx doorbells on iceland */
2117 if (adev->asic_type != CHIP_TOPAZ) {
2118 ring->use_doorbell = true;
2119 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2122 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2123 AMDGPU_CP_IRQ_GFX_EOP);
2124 if (r)
2125 return r;
2129 /* set up the compute queues - allocate horizontally across pipes */
2130 ring_id = 0;
2131 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2132 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2133 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2134 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2135 continue;
2137 r = gfx_v8_0_compute_ring_init(adev,
2138 ring_id,
2139 i, k, j);
2140 if (r)
2141 return r;
2143 ring_id++;
2148 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2149 if (r) {
2150 DRM_ERROR("Failed to init KIQ BOs!\n");
2151 return r;
2154 kiq = &adev->gfx.kiq;
2155 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2156 if (r)
2157 return r;
2159 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2160 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2161 if (r)
2162 return r;
2164 /* reserve GDS, GWS and OA resource for gfx */
2165 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2166 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2167 &adev->gds.gds_gfx_bo, NULL, NULL);
2168 if (r)
2169 return r;
2171 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2172 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2173 &adev->gds.gws_gfx_bo, NULL, NULL);
2174 if (r)
2175 return r;
2177 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2178 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2179 &adev->gds.oa_gfx_bo, NULL, NULL);
2180 if (r)
2181 return r;
2183 adev->gfx.ce_ram_size = 0x8000;
2185 r = gfx_v8_0_gpu_early_init(adev);
2186 if (r)
2187 return r;
2189 return 0;
2192 static int gfx_v8_0_sw_fini(void *handle)
2194 int i;
2195 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2197 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2198 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2199 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2201 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2202 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2203 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2204 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2206 amdgpu_gfx_compute_mqd_sw_fini(adev);
2207 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2208 amdgpu_gfx_kiq_fini(adev);
2210 gfx_v8_0_mec_fini(adev);
2211 gfx_v8_0_rlc_fini(adev);
2212 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2213 &adev->gfx.rlc.clear_state_gpu_addr,
2214 (void **)&adev->gfx.rlc.cs_ptr);
2215 if ((adev->asic_type == CHIP_CARRIZO) ||
2216 (adev->asic_type == CHIP_STONEY)) {
2217 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2218 &adev->gfx.rlc.cp_table_gpu_addr,
2219 (void **)&adev->gfx.rlc.cp_table_ptr);
2221 gfx_v8_0_free_microcode(adev);
2223 return 0;
2226 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2228 uint32_t *modearray, *mod2array;
2229 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2230 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2231 u32 reg_offset;
2233 modearray = adev->gfx.config.tile_mode_array;
2234 mod2array = adev->gfx.config.macrotile_mode_array;
2236 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2237 modearray[reg_offset] = 0;
2239 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2240 mod2array[reg_offset] = 0;
2242 switch (adev->asic_type) {
2243 case CHIP_TOPAZ:
2244 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2273 PIPE_CONFIG(ADDR_SURF_P2));
2274 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P2) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2294 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2302 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2326 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2335 PIPE_CONFIG(ADDR_SURF_P2) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2338 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339 PIPE_CONFIG(ADDR_SURF_P2) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2342 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2347 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366 NUM_BANKS(ADDR_SURF_8_BANK));
2367 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2371 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 NUM_BANKS(ADDR_SURF_16_BANK));
2379 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382 NUM_BANKS(ADDR_SURF_16_BANK));
2383 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386 NUM_BANKS(ADDR_SURF_16_BANK));
2387 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390 NUM_BANKS(ADDR_SURF_16_BANK));
2391 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 NUM_BANKS(ADDR_SURF_16_BANK));
2395 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2398 NUM_BANKS(ADDR_SURF_16_BANK));
2399 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2402 NUM_BANKS(ADDR_SURF_8_BANK));
2404 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2405 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2406 reg_offset != 23)
2407 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2409 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2410 if (reg_offset != 7)
2411 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2413 break;
2414 case CHIP_FIJI:
2415 case CHIP_VEGAM:
2416 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2436 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2440 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2444 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2447 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2448 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2450 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2470 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2475 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2479 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2482 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2486 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2487 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2491 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2499 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2500 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2503 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2504 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2507 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2510 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2511 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2512 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2514 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2515 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2516 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2518 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2519 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2520 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2521 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2522 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2525 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2528 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2532 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2533 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2536 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2539 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542 NUM_BANKS(ADDR_SURF_8_BANK));
2543 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546 NUM_BANKS(ADDR_SURF_8_BANK));
2547 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550 NUM_BANKS(ADDR_SURF_8_BANK));
2551 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 NUM_BANKS(ADDR_SURF_8_BANK));
2555 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558 NUM_BANKS(ADDR_SURF_8_BANK));
2559 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562 NUM_BANKS(ADDR_SURF_8_BANK));
2563 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 NUM_BANKS(ADDR_SURF_8_BANK));
2567 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2569 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2570 NUM_BANKS(ADDR_SURF_8_BANK));
2571 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2573 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2574 NUM_BANKS(ADDR_SURF_8_BANK));
2575 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 NUM_BANKS(ADDR_SURF_8_BANK));
2579 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2582 NUM_BANKS(ADDR_SURF_8_BANK));
2583 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586 NUM_BANKS(ADDR_SURF_8_BANK));
2587 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590 NUM_BANKS(ADDR_SURF_8_BANK));
2591 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2594 NUM_BANKS(ADDR_SURF_4_BANK));
2596 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2597 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2599 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2600 if (reg_offset != 7)
2601 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2603 break;
2604 case CHIP_TONGA:
2605 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2620 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2624 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2625 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2629 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2633 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2639 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2651 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2661 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2664 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2665 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2666 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2671 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2676 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2680 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2688 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2692 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2699 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2700 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2703 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2704 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2708 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2728 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731 NUM_BANKS(ADDR_SURF_16_BANK));
2732 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 NUM_BANKS(ADDR_SURF_16_BANK));
2736 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739 NUM_BANKS(ADDR_SURF_16_BANK));
2740 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743 NUM_BANKS(ADDR_SURF_16_BANK));
2744 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2746 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 NUM_BANKS(ADDR_SURF_16_BANK));
2748 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2751 NUM_BANKS(ADDR_SURF_16_BANK));
2752 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2755 NUM_BANKS(ADDR_SURF_16_BANK));
2756 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759 NUM_BANKS(ADDR_SURF_16_BANK));
2760 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 NUM_BANKS(ADDR_SURF_16_BANK));
2764 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2765 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2766 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2767 NUM_BANKS(ADDR_SURF_16_BANK));
2768 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771 NUM_BANKS(ADDR_SURF_16_BANK));
2772 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2775 NUM_BANKS(ADDR_SURF_8_BANK));
2776 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2779 NUM_BANKS(ADDR_SURF_4_BANK));
2780 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2783 NUM_BANKS(ADDR_SURF_4_BANK));
2785 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2786 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2788 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2789 if (reg_offset != 7)
2790 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2792 break;
2793 case CHIP_POLARIS11:
2794 case CHIP_POLARIS12:
2795 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2810 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2826 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2827 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2829 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2854 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2862 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2863 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2865 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2870 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2874 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2875 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2878 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2886 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2887 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2890 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2894 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2898 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2901 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2918 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921 NUM_BANKS(ADDR_SURF_16_BANK));
2923 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926 NUM_BANKS(ADDR_SURF_16_BANK));
2928 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2931 NUM_BANKS(ADDR_SURF_16_BANK));
2933 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936 NUM_BANKS(ADDR_SURF_16_BANK));
2938 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2941 NUM_BANKS(ADDR_SURF_16_BANK));
2943 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2946 NUM_BANKS(ADDR_SURF_16_BANK));
2948 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951 NUM_BANKS(ADDR_SURF_16_BANK));
2953 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2956 NUM_BANKS(ADDR_SURF_16_BANK));
2958 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961 NUM_BANKS(ADDR_SURF_16_BANK));
2963 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 NUM_BANKS(ADDR_SURF_16_BANK));
2968 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 NUM_BANKS(ADDR_SURF_16_BANK));
2973 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976 NUM_BANKS(ADDR_SURF_16_BANK));
2978 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981 NUM_BANKS(ADDR_SURF_8_BANK));
2983 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2986 NUM_BANKS(ADDR_SURF_4_BANK));
2988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2989 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2991 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2992 if (reg_offset != 7)
2993 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2995 break;
2996 case CHIP_POLARIS10:
2997 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3000 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3008 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3024 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3025 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3028 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3029 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3031 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3035 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3043 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3045 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3051 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3055 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3057 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3059 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3063 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3067 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3068 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3069 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3073 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3076 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3077 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3080 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3081 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3084 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3085 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3087 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3088 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3089 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3091 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3092 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3095 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3096 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3100 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3105 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3109 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3113 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3116 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3117 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3120 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3123 NUM_BANKS(ADDR_SURF_16_BANK));
3125 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128 NUM_BANKS(ADDR_SURF_16_BANK));
3130 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133 NUM_BANKS(ADDR_SURF_16_BANK));
3135 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 NUM_BANKS(ADDR_SURF_16_BANK));
3140 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3143 NUM_BANKS(ADDR_SURF_16_BANK));
3145 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3148 NUM_BANKS(ADDR_SURF_16_BANK));
3150 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3153 NUM_BANKS(ADDR_SURF_16_BANK));
3155 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158 NUM_BANKS(ADDR_SURF_16_BANK));
3160 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163 NUM_BANKS(ADDR_SURF_16_BANK));
3165 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3168 NUM_BANKS(ADDR_SURF_16_BANK));
3170 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173 NUM_BANKS(ADDR_SURF_16_BANK));
3175 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3178 NUM_BANKS(ADDR_SURF_8_BANK));
3180 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3183 NUM_BANKS(ADDR_SURF_4_BANK));
3185 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3188 NUM_BANKS(ADDR_SURF_4_BANK));
3190 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3191 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3193 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3194 if (reg_offset != 7)
3195 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3197 break;
3198 case CHIP_STONEY:
3199 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3202 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3203 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3206 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3207 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3210 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3211 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3214 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3215 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3219 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3223 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3228 PIPE_CONFIG(ADDR_SURF_P2));
3229 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234 PIPE_CONFIG(ADDR_SURF_P2) |
3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3237 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 PIPE_CONFIG(ADDR_SURF_P2) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3241 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3242 PIPE_CONFIG(ADDR_SURF_P2) |
3243 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246 PIPE_CONFIG(ADDR_SURF_P2) |
3247 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3249 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3250 PIPE_CONFIG(ADDR_SURF_P2) |
3251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254 PIPE_CONFIG(ADDR_SURF_P2) |
3255 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3257 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3258 PIPE_CONFIG(ADDR_SURF_P2) |
3259 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3261 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3262 PIPE_CONFIG(ADDR_SURF_P2) |
3263 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3265 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3266 PIPE_CONFIG(ADDR_SURF_P2) |
3267 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3269 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3270 PIPE_CONFIG(ADDR_SURF_P2) |
3271 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3273 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3274 PIPE_CONFIG(ADDR_SURF_P2) |
3275 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3277 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3278 PIPE_CONFIG(ADDR_SURF_P2) |
3279 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3281 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3282 PIPE_CONFIG(ADDR_SURF_P2) |
3283 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3286 PIPE_CONFIG(ADDR_SURF_P2) |
3287 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3290 PIPE_CONFIG(ADDR_SURF_P2) |
3291 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294 PIPE_CONFIG(ADDR_SURF_P2) |
3295 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3298 PIPE_CONFIG(ADDR_SURF_P2) |
3299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3302 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305 NUM_BANKS(ADDR_SURF_8_BANK));
3306 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309 NUM_BANKS(ADDR_SURF_8_BANK));
3310 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313 NUM_BANKS(ADDR_SURF_8_BANK));
3314 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3317 NUM_BANKS(ADDR_SURF_8_BANK));
3318 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321 NUM_BANKS(ADDR_SURF_8_BANK));
3322 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325 NUM_BANKS(ADDR_SURF_8_BANK));
3326 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3329 NUM_BANKS(ADDR_SURF_8_BANK));
3330 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3333 NUM_BANKS(ADDR_SURF_16_BANK));
3334 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 NUM_BANKS(ADDR_SURF_16_BANK));
3338 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3341 NUM_BANKS(ADDR_SURF_16_BANK));
3342 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3345 NUM_BANKS(ADDR_SURF_16_BANK));
3346 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 NUM_BANKS(ADDR_SURF_16_BANK));
3350 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3353 NUM_BANKS(ADDR_SURF_16_BANK));
3354 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3357 NUM_BANKS(ADDR_SURF_8_BANK));
3359 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3360 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3361 reg_offset != 23)
3362 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3364 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3365 if (reg_offset != 7)
3366 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3368 break;
3369 default:
3370 dev_warn(adev->dev,
3371 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3372 adev->asic_type);
3374 case CHIP_CARRIZO:
3375 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376 PIPE_CONFIG(ADDR_SURF_P2) |
3377 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3378 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3379 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380 PIPE_CONFIG(ADDR_SURF_P2) |
3381 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3382 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3383 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384 PIPE_CONFIG(ADDR_SURF_P2) |
3385 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3386 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3387 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3388 PIPE_CONFIG(ADDR_SURF_P2) |
3389 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3390 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3391 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3392 PIPE_CONFIG(ADDR_SURF_P2) |
3393 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3395 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3396 PIPE_CONFIG(ADDR_SURF_P2) |
3397 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3398 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3399 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400 PIPE_CONFIG(ADDR_SURF_P2) |
3401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3402 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3403 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3404 PIPE_CONFIG(ADDR_SURF_P2));
3405 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3406 PIPE_CONFIG(ADDR_SURF_P2) |
3407 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3409 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410 PIPE_CONFIG(ADDR_SURF_P2) |
3411 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3413 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414 PIPE_CONFIG(ADDR_SURF_P2) |
3415 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3417 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3418 PIPE_CONFIG(ADDR_SURF_P2) |
3419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3421 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3422 PIPE_CONFIG(ADDR_SURF_P2) |
3423 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3425 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3426 PIPE_CONFIG(ADDR_SURF_P2) |
3427 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3429 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3430 PIPE_CONFIG(ADDR_SURF_P2) |
3431 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3433 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3434 PIPE_CONFIG(ADDR_SURF_P2) |
3435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3437 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3438 PIPE_CONFIG(ADDR_SURF_P2) |
3439 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3441 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3442 PIPE_CONFIG(ADDR_SURF_P2) |
3443 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3445 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3446 PIPE_CONFIG(ADDR_SURF_P2) |
3447 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3449 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3450 PIPE_CONFIG(ADDR_SURF_P2) |
3451 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3453 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3454 PIPE_CONFIG(ADDR_SURF_P2) |
3455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3457 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3458 PIPE_CONFIG(ADDR_SURF_P2) |
3459 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3461 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3462 PIPE_CONFIG(ADDR_SURF_P2) |
3463 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3465 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3466 PIPE_CONFIG(ADDR_SURF_P2) |
3467 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3469 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3470 PIPE_CONFIG(ADDR_SURF_P2) |
3471 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3473 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3474 PIPE_CONFIG(ADDR_SURF_P2) |
3475 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3478 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3481 NUM_BANKS(ADDR_SURF_8_BANK));
3482 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3485 NUM_BANKS(ADDR_SURF_8_BANK));
3486 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3489 NUM_BANKS(ADDR_SURF_8_BANK));
3490 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3493 NUM_BANKS(ADDR_SURF_8_BANK));
3494 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3497 NUM_BANKS(ADDR_SURF_8_BANK));
3498 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3501 NUM_BANKS(ADDR_SURF_8_BANK));
3502 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3505 NUM_BANKS(ADDR_SURF_8_BANK));
3506 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3509 NUM_BANKS(ADDR_SURF_16_BANK));
3510 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3513 NUM_BANKS(ADDR_SURF_16_BANK));
3514 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3517 NUM_BANKS(ADDR_SURF_16_BANK));
3518 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3521 NUM_BANKS(ADDR_SURF_16_BANK));
3522 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3525 NUM_BANKS(ADDR_SURF_16_BANK));
3526 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3529 NUM_BANKS(ADDR_SURF_16_BANK));
3530 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3533 NUM_BANKS(ADDR_SURF_8_BANK));
3535 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3536 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3537 reg_offset != 23)
3538 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3540 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3541 if (reg_offset != 7)
3542 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3544 break;
3548 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3549 u32 se_num, u32 sh_num, u32 instance)
3551 u32 data;
3553 if (instance == 0xffffffff)
3554 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3555 else
3556 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3558 if (se_num == 0xffffffff)
3559 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3560 else
3561 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3563 if (sh_num == 0xffffffff)
3564 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3565 else
3566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3568 WREG32(mmGRBM_GFX_INDEX, data);
3571 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3572 u32 me, u32 pipe, u32 q)
3574 vi_srbm_select(adev, me, pipe, q, 0);
3577 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3579 u32 data, mask;
3581 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3582 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3584 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3586 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3587 adev->gfx.config.max_sh_per_se);
3589 return (~data) & mask;
3592 static void
3593 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3595 switch (adev->asic_type) {
3596 case CHIP_FIJI:
3597 case CHIP_VEGAM:
3598 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3599 RB_XSEL2(1) | PKR_MAP(2) |
3600 PKR_XSEL(1) | PKR_YSEL(1) |
3601 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3602 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3603 SE_PAIR_YSEL(2);
3604 break;
3605 case CHIP_TONGA:
3606 case CHIP_POLARIS10:
3607 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3608 SE_XSEL(1) | SE_YSEL(1);
3609 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3610 SE_PAIR_YSEL(2);
3611 break;
3612 case CHIP_TOPAZ:
3613 case CHIP_CARRIZO:
3614 *rconf |= RB_MAP_PKR0(2);
3615 *rconf1 |= 0x0;
3616 break;
3617 case CHIP_POLARIS11:
3618 case CHIP_POLARIS12:
3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3620 SE_XSEL(1) | SE_YSEL(1);
3621 *rconf1 |= 0x0;
3622 break;
3623 case CHIP_STONEY:
3624 *rconf |= 0x0;
3625 *rconf1 |= 0x0;
3626 break;
3627 default:
3628 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3629 break;
3633 static void
3634 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3635 u32 raster_config, u32 raster_config_1,
3636 unsigned rb_mask, unsigned num_rb)
3638 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3639 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3640 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3641 unsigned rb_per_se = num_rb / num_se;
3642 unsigned se_mask[4];
3643 unsigned se;
3645 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3646 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3647 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3648 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3650 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3651 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3652 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3654 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3655 (!se_mask[2] && !se_mask[3]))) {
3656 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3658 if (!se_mask[0] && !se_mask[1]) {
3659 raster_config_1 |=
3660 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3661 } else {
3662 raster_config_1 |=
3663 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3667 for (se = 0; se < num_se; se++) {
3668 unsigned raster_config_se = raster_config;
3669 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3670 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3671 int idx = (se / 2) * 2;
3673 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3674 raster_config_se &= ~SE_MAP_MASK;
3676 if (!se_mask[idx]) {
3677 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3678 } else {
3679 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3683 pkr0_mask &= rb_mask;
3684 pkr1_mask &= rb_mask;
3685 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3686 raster_config_se &= ~PKR_MAP_MASK;
3688 if (!pkr0_mask) {
3689 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3690 } else {
3691 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3695 if (rb_per_se >= 2) {
3696 unsigned rb0_mask = 1 << (se * rb_per_se);
3697 unsigned rb1_mask = rb0_mask << 1;
3699 rb0_mask &= rb_mask;
3700 rb1_mask &= rb_mask;
3701 if (!rb0_mask || !rb1_mask) {
3702 raster_config_se &= ~RB_MAP_PKR0_MASK;
3704 if (!rb0_mask) {
3705 raster_config_se |=
3706 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3707 } else {
3708 raster_config_se |=
3709 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3713 if (rb_per_se > 2) {
3714 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3715 rb1_mask = rb0_mask << 1;
3716 rb0_mask &= rb_mask;
3717 rb1_mask &= rb_mask;
3718 if (!rb0_mask || !rb1_mask) {
3719 raster_config_se &= ~RB_MAP_PKR1_MASK;
3721 if (!rb0_mask) {
3722 raster_config_se |=
3723 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3724 } else {
3725 raster_config_se |=
3726 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3732 /* GRBM_GFX_INDEX has a different offset on VI */
3733 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3734 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3735 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3738 /* GRBM_GFX_INDEX has a different offset on VI */
3739 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3742 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3744 int i, j;
3745 u32 data;
3746 u32 raster_config = 0, raster_config_1 = 0;
3747 u32 active_rbs = 0;
3748 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3749 adev->gfx.config.max_sh_per_se;
3750 unsigned num_rb_pipes;
3752 mutex_lock(&adev->grbm_idx_mutex);
3753 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3754 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3755 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3756 data = gfx_v8_0_get_rb_active_bitmap(adev);
3757 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3758 rb_bitmap_width_per_sh);
3761 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3763 adev->gfx.config.backend_enable_mask = active_rbs;
3764 adev->gfx.config.num_rbs = hweight32(active_rbs);
3766 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3767 adev->gfx.config.max_shader_engines, 16);
3769 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3771 if (!adev->gfx.config.backend_enable_mask ||
3772 adev->gfx.config.num_rbs >= num_rb_pipes) {
3773 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3774 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3775 } else {
3776 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3777 adev->gfx.config.backend_enable_mask,
3778 num_rb_pipes);
3781 /* cache the values for userspace */
3782 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3783 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3784 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3785 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3786 RREG32(mmCC_RB_BACKEND_DISABLE);
3787 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3788 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3789 adev->gfx.config.rb_config[i][j].raster_config =
3790 RREG32(mmPA_SC_RASTER_CONFIG);
3791 adev->gfx.config.rb_config[i][j].raster_config_1 =
3792 RREG32(mmPA_SC_RASTER_CONFIG_1);
3795 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3796 mutex_unlock(&adev->grbm_idx_mutex);
3800 * gfx_v8_0_init_compute_vmid - gart enable
3802 * @adev: amdgpu_device pointer
3804 * Initialize compute vmid sh_mem registers
3807 #define DEFAULT_SH_MEM_BASES (0x6000)
3808 #define FIRST_COMPUTE_VMID (8)
3809 #define LAST_COMPUTE_VMID (16)
3810 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3812 int i;
3813 uint32_t sh_mem_config;
3814 uint32_t sh_mem_bases;
3817 * Configure apertures:
3818 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3819 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3820 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3822 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3824 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3825 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3826 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3827 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3828 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3829 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3831 mutex_lock(&adev->srbm_mutex);
3832 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3833 vi_srbm_select(adev, 0, 0, 0, i);
3834 /* CP and shaders */
3835 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3836 WREG32(mmSH_MEM_APE1_BASE, 1);
3837 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3838 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3840 vi_srbm_select(adev, 0, 0, 0, 0);
3841 mutex_unlock(&adev->srbm_mutex);
3844 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3846 switch (adev->asic_type) {
3847 default:
3848 adev->gfx.config.double_offchip_lds_buf = 1;
3849 break;
3850 case CHIP_CARRIZO:
3851 case CHIP_STONEY:
3852 adev->gfx.config.double_offchip_lds_buf = 0;
3853 break;
3857 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3859 u32 tmp, sh_static_mem_cfg;
3860 int i;
3862 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3863 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3864 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3865 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3867 gfx_v8_0_tiling_mode_table_init(adev);
3868 gfx_v8_0_setup_rb(adev);
3869 gfx_v8_0_get_cu_info(adev);
3870 gfx_v8_0_config_init(adev);
3872 /* XXX SH_MEM regs */
3873 /* where to put LDS, scratch, GPUVM in FSA64 space */
3874 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3875 SWIZZLE_ENABLE, 1);
3876 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3877 ELEMENT_SIZE, 1);
3878 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3879 INDEX_STRIDE, 3);
3880 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3882 mutex_lock(&adev->srbm_mutex);
3883 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3884 vi_srbm_select(adev, 0, 0, 0, i);
3885 /* CP and shaders */
3886 if (i == 0) {
3887 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3888 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3889 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3890 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3891 WREG32(mmSH_MEM_CONFIG, tmp);
3892 WREG32(mmSH_MEM_BASES, 0);
3893 } else {
3894 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3895 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3896 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3897 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3898 WREG32(mmSH_MEM_CONFIG, tmp);
3899 tmp = adev->gmc.shared_aperture_start >> 48;
3900 WREG32(mmSH_MEM_BASES, tmp);
3903 WREG32(mmSH_MEM_APE1_BASE, 1);
3904 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3906 vi_srbm_select(adev, 0, 0, 0, 0);
3907 mutex_unlock(&adev->srbm_mutex);
3909 gfx_v8_0_init_compute_vmid(adev);
3911 mutex_lock(&adev->grbm_idx_mutex);
3913 * making sure that the following register writes will be broadcasted
3914 * to all the shaders
3916 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3918 WREG32(mmPA_SC_FIFO_SIZE,
3919 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3920 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3921 (adev->gfx.config.sc_prim_fifo_size_backend <<
3922 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3923 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3924 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3925 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3926 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3928 tmp = RREG32(mmSPI_ARB_PRIORITY);
3929 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3931 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3932 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3933 WREG32(mmSPI_ARB_PRIORITY, tmp);
3935 mutex_unlock(&adev->grbm_idx_mutex);
3939 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3941 u32 i, j, k;
3942 u32 mask;
3944 mutex_lock(&adev->grbm_idx_mutex);
3945 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3946 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3947 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3948 for (k = 0; k < adev->usec_timeout; k++) {
3949 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3950 break;
3951 udelay(1);
3953 if (k == adev->usec_timeout) {
3954 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3955 0xffffffff, 0xffffffff);
3956 mutex_unlock(&adev->grbm_idx_mutex);
3957 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3958 i, j);
3959 return;
3963 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3964 mutex_unlock(&adev->grbm_idx_mutex);
3966 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3967 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3968 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3969 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3970 for (k = 0; k < adev->usec_timeout; k++) {
3971 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3972 break;
3973 udelay(1);
3977 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3978 bool enable)
3980 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3984 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3987 WREG32(mmCP_INT_CNTL_RING0, tmp);
3990 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3992 /* csib */
3993 WREG32(mmRLC_CSIB_ADDR_HI,
3994 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3995 WREG32(mmRLC_CSIB_ADDR_LO,
3996 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3997 WREG32(mmRLC_CSIB_LENGTH,
3998 adev->gfx.rlc.clear_state_size);
4001 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4002 int ind_offset,
4003 int list_size,
4004 int *unique_indices,
4005 int *indices_count,
4006 int max_indices,
4007 int *ind_start_offsets,
4008 int *offset_count,
4009 int max_offset)
4011 int indices;
4012 bool new_entry = true;
4014 for (; ind_offset < list_size; ind_offset++) {
4016 if (new_entry) {
4017 new_entry = false;
4018 ind_start_offsets[*offset_count] = ind_offset;
4019 *offset_count = *offset_count + 1;
4020 BUG_ON(*offset_count >= max_offset);
4023 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4024 new_entry = true;
4025 continue;
4028 ind_offset += 2;
4030 /* look for the matching indice */
4031 for (indices = 0;
4032 indices < *indices_count;
4033 indices++) {
4034 if (unique_indices[indices] ==
4035 register_list_format[ind_offset])
4036 break;
4039 if (indices >= *indices_count) {
4040 unique_indices[*indices_count] =
4041 register_list_format[ind_offset];
4042 indices = *indices_count;
4043 *indices_count = *indices_count + 1;
4044 BUG_ON(*indices_count >= max_indices);
4047 register_list_format[ind_offset] = indices;
4051 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4053 int i, temp, data;
4054 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4055 int indices_count = 0;
4056 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4057 int offset_count = 0;
4059 int list_size;
4060 unsigned int *register_list_format =
4061 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4062 if (!register_list_format)
4063 return -ENOMEM;
4064 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4065 adev->gfx.rlc.reg_list_format_size_bytes);
4067 gfx_v8_0_parse_ind_reg_list(register_list_format,
4068 RLC_FormatDirectRegListLength,
4069 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4070 unique_indices,
4071 &indices_count,
4072 ARRAY_SIZE(unique_indices),
4073 indirect_start_offsets,
4074 &offset_count,
4075 ARRAY_SIZE(indirect_start_offsets));
4077 /* save and restore list */
4078 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4080 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4081 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4082 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4084 /* indirect list */
4085 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4086 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4087 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4089 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4090 list_size = list_size >> 1;
4091 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4092 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4094 /* starting offsets starts */
4095 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4096 adev->gfx.rlc.starting_offsets_start);
4097 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4098 WREG32(mmRLC_GPM_SCRATCH_DATA,
4099 indirect_start_offsets[i]);
4101 /* unique indices */
4102 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4103 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4104 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4105 if (unique_indices[i] != 0) {
4106 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4107 WREG32(data + i, unique_indices[i] >> 20);
4110 kfree(register_list_format);
4112 return 0;
4115 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4117 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4120 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4122 uint32_t data;
4124 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4126 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4128 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4129 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4130 WREG32(mmRLC_PG_DELAY, data);
4132 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4133 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4137 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4138 bool enable)
4140 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4143 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4144 bool enable)
4146 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4149 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4151 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4154 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4156 if ((adev->asic_type == CHIP_CARRIZO) ||
4157 (adev->asic_type == CHIP_STONEY)) {
4158 gfx_v8_0_init_csb(adev);
4159 gfx_v8_0_init_save_restore_list(adev);
4160 gfx_v8_0_enable_save_restore_machine(adev);
4161 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4162 gfx_v8_0_init_power_gating(adev);
4163 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4164 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4165 (adev->asic_type == CHIP_POLARIS12) ||
4166 (adev->asic_type == CHIP_VEGAM)) {
4167 gfx_v8_0_init_csb(adev);
4168 gfx_v8_0_init_save_restore_list(adev);
4169 gfx_v8_0_enable_save_restore_machine(adev);
4170 gfx_v8_0_init_power_gating(adev);
4175 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4177 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4179 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4180 gfx_v8_0_wait_for_rlc_serdes(adev);
4183 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4185 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4186 udelay(50);
4188 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4189 udelay(50);
4192 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4194 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4196 /* carrizo do enable cp interrupt after cp inited */
4197 if (!(adev->flags & AMD_IS_APU))
4198 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4200 udelay(50);
4203 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4205 const struct rlc_firmware_header_v2_0 *hdr;
4206 const __le32 *fw_data;
4207 unsigned i, fw_size;
4209 if (!adev->gfx.rlc_fw)
4210 return -EINVAL;
4212 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4213 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4215 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4216 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4217 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4219 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4220 for (i = 0; i < fw_size; i++)
4221 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4222 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4224 return 0;
4227 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4229 int r;
4230 u32 tmp;
4232 gfx_v8_0_rlc_stop(adev);
4234 /* disable CG */
4235 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4236 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4237 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4238 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4239 if (adev->asic_type == CHIP_POLARIS11 ||
4240 adev->asic_type == CHIP_POLARIS10 ||
4241 adev->asic_type == CHIP_POLARIS12 ||
4242 adev->asic_type == CHIP_VEGAM) {
4243 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4244 tmp &= ~0x3;
4245 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4248 /* disable PG */
4249 WREG32(mmRLC_PG_CNTL, 0);
4251 gfx_v8_0_rlc_reset(adev);
4252 gfx_v8_0_init_pg(adev);
4255 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4256 /* legacy rlc firmware loading */
4257 r = gfx_v8_0_rlc_load_microcode(adev);
4258 if (r)
4259 return r;
4262 gfx_v8_0_rlc_start(adev);
4264 return 0;
4267 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4269 int i;
4270 u32 tmp = RREG32(mmCP_ME_CNTL);
4272 if (enable) {
4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4274 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4276 } else {
4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4280 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4281 adev->gfx.gfx_ring[i].ready = false;
4283 WREG32(mmCP_ME_CNTL, tmp);
4284 udelay(50);
4287 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4289 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4290 const struct gfx_firmware_header_v1_0 *ce_hdr;
4291 const struct gfx_firmware_header_v1_0 *me_hdr;
4292 const __le32 *fw_data;
4293 unsigned i, fw_size;
4295 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296 return -EINVAL;
4298 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4299 adev->gfx.pfp_fw->data;
4300 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4301 adev->gfx.ce_fw->data;
4302 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4303 adev->gfx.me_fw->data;
4305 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4306 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4307 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4309 gfx_v8_0_cp_gfx_enable(adev, false);
4311 /* PFP */
4312 fw_data = (const __le32 *)
4313 (adev->gfx.pfp_fw->data +
4314 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4317 for (i = 0; i < fw_size; i++)
4318 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4321 /* CE */
4322 fw_data = (const __le32 *)
4323 (adev->gfx.ce_fw->data +
4324 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4325 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4326 WREG32(mmCP_CE_UCODE_ADDR, 0);
4327 for (i = 0; i < fw_size; i++)
4328 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4329 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4331 /* ME */
4332 fw_data = (const __le32 *)
4333 (adev->gfx.me_fw->data +
4334 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4335 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4336 WREG32(mmCP_ME_RAM_WADDR, 0);
4337 for (i = 0; i < fw_size; i++)
4338 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4339 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4341 return 0;
4344 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4346 u32 count = 0;
4347 const struct cs_section_def *sect = NULL;
4348 const struct cs_extent_def *ext = NULL;
4350 /* begin clear state */
4351 count += 2;
4352 /* context control state */
4353 count += 3;
4355 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4356 for (ext = sect->section; ext->extent != NULL; ++ext) {
4357 if (sect->id == SECT_CONTEXT)
4358 count += 2 + ext->reg_count;
4359 else
4360 return 0;
4363 /* pa_sc_raster_config/pa_sc_raster_config1 */
4364 count += 4;
4365 /* end clear state */
4366 count += 2;
4367 /* clear state */
4368 count += 2;
4370 return count;
4373 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4375 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4376 const struct cs_section_def *sect = NULL;
4377 const struct cs_extent_def *ext = NULL;
4378 int r, i;
4380 /* init the CP */
4381 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4382 WREG32(mmCP_ENDIAN_SWAP, 0);
4383 WREG32(mmCP_DEVICE_ID, 1);
4385 gfx_v8_0_cp_gfx_enable(adev, true);
4387 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4388 if (r) {
4389 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4390 return r;
4393 /* clear state buffer */
4394 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4395 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4397 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4398 amdgpu_ring_write(ring, 0x80000000);
4399 amdgpu_ring_write(ring, 0x80000000);
4401 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4402 for (ext = sect->section; ext->extent != NULL; ++ext) {
4403 if (sect->id == SECT_CONTEXT) {
4404 amdgpu_ring_write(ring,
4405 PACKET3(PACKET3_SET_CONTEXT_REG,
4406 ext->reg_count));
4407 amdgpu_ring_write(ring,
4408 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4409 for (i = 0; i < ext->reg_count; i++)
4410 amdgpu_ring_write(ring, ext->extent[i]);
4415 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4416 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4417 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4418 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4420 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4421 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4423 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4424 amdgpu_ring_write(ring, 0);
4426 /* init the CE partitions */
4427 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4428 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4429 amdgpu_ring_write(ring, 0x8000);
4430 amdgpu_ring_write(ring, 0x8000);
4432 amdgpu_ring_commit(ring);
4434 return 0;
4436 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4438 u32 tmp;
4439 /* no gfx doorbells on iceland */
4440 if (adev->asic_type == CHIP_TOPAZ)
4441 return;
4443 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4445 if (ring->use_doorbell) {
4446 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4447 DOORBELL_OFFSET, ring->doorbell_index);
4448 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4449 DOORBELL_HIT, 0);
4450 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4451 DOORBELL_EN, 1);
4452 } else {
4453 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4456 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4458 if (adev->flags & AMD_IS_APU)
4459 return;
4461 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4462 DOORBELL_RANGE_LOWER,
4463 AMDGPU_DOORBELL_GFX_RING0);
4464 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4466 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4467 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4470 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4472 struct amdgpu_ring *ring;
4473 u32 tmp;
4474 u32 rb_bufsz;
4475 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4476 int r;
4478 /* Set the write pointer delay */
4479 WREG32(mmCP_RB_WPTR_DELAY, 0);
4481 /* set the RB to use vmid 0 */
4482 WREG32(mmCP_RB_VMID, 0);
4484 /* Set ring buffer size */
4485 ring = &adev->gfx.gfx_ring[0];
4486 rb_bufsz = order_base_2(ring->ring_size / 8);
4487 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4489 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4491 #ifdef __BIG_ENDIAN
4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4493 #endif
4494 WREG32(mmCP_RB0_CNTL, tmp);
4496 /* Initialize the ring buffer's read and write pointers */
4497 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4498 ring->wptr = 0;
4499 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4501 /* set the wb address wether it's enabled or not */
4502 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4503 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4504 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4506 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4507 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4508 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4509 mdelay(1);
4510 WREG32(mmCP_RB0_CNTL, tmp);
4512 rb_addr = ring->gpu_addr >> 8;
4513 WREG32(mmCP_RB0_BASE, rb_addr);
4514 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4516 gfx_v8_0_set_cpg_door_bell(adev, ring);
4517 /* start the ring */
4518 amdgpu_ring_clear_ring(ring);
4519 gfx_v8_0_cp_gfx_start(adev);
4520 ring->ready = true;
4521 r = amdgpu_ring_test_ring(ring);
4522 if (r)
4523 ring->ready = false;
4525 return r;
4528 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4530 int i;
4532 if (enable) {
4533 WREG32(mmCP_MEC_CNTL, 0);
4534 } else {
4535 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4536 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4537 adev->gfx.compute_ring[i].ready = false;
4538 adev->gfx.kiq.ring.ready = false;
4540 udelay(50);
4543 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4545 const struct gfx_firmware_header_v1_0 *mec_hdr;
4546 const __le32 *fw_data;
4547 unsigned i, fw_size;
4549 if (!adev->gfx.mec_fw)
4550 return -EINVAL;
4552 gfx_v8_0_cp_compute_enable(adev, false);
4554 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4555 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4557 fw_data = (const __le32 *)
4558 (adev->gfx.mec_fw->data +
4559 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4560 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4562 /* MEC1 */
4563 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4564 for (i = 0; i < fw_size; i++)
4565 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4566 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4568 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4569 if (adev->gfx.mec2_fw) {
4570 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4572 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4573 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4575 fw_data = (const __le32 *)
4576 (adev->gfx.mec2_fw->data +
4577 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4578 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4580 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4581 for (i = 0; i < fw_size; i++)
4582 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4583 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4586 return 0;
4589 /* KIQ functions */
4590 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4592 uint32_t tmp;
4593 struct amdgpu_device *adev = ring->adev;
4595 /* tell RLC which is KIQ queue */
4596 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4597 tmp &= 0xffffff00;
4598 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4599 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4600 tmp |= 0x80;
4601 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4604 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4606 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4607 uint32_t scratch, tmp = 0;
4608 uint64_t queue_mask = 0;
4609 int r, i;
4611 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4612 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4613 continue;
4615 /* This situation may be hit in the future if a new HW
4616 * generation exposes more than 64 queues. If so, the
4617 * definition of queue_mask needs updating */
4618 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4619 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4620 break;
4623 queue_mask |= (1ull << i);
4626 r = amdgpu_gfx_scratch_get(adev, &scratch);
4627 if (r) {
4628 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4629 return r;
4631 WREG32(scratch, 0xCAFEDEAD);
4633 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4634 if (r) {
4635 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4636 amdgpu_gfx_scratch_free(adev, scratch);
4637 return r;
4639 /* set resources */
4640 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4641 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4642 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4643 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4644 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4645 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4646 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4647 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4648 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4649 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4650 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4651 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4653 /* map queues */
4654 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4655 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4656 amdgpu_ring_write(kiq_ring,
4657 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4658 amdgpu_ring_write(kiq_ring,
4659 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4660 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4661 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4662 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4663 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4664 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4665 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4666 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4668 /* write to scratch for completion */
4669 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4670 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4671 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4672 amdgpu_ring_commit(kiq_ring);
4674 for (i = 0; i < adev->usec_timeout; i++) {
4675 tmp = RREG32(scratch);
4676 if (tmp == 0xDEADBEEF)
4677 break;
4678 DRM_UDELAY(1);
4680 if (i >= adev->usec_timeout) {
4681 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4682 scratch, tmp);
4683 r = -EINVAL;
4685 amdgpu_gfx_scratch_free(adev, scratch);
4687 return r;
4690 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4692 int i, r = 0;
4694 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4695 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4696 for (i = 0; i < adev->usec_timeout; i++) {
4697 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4698 break;
4699 udelay(1);
4701 if (i == adev->usec_timeout)
4702 r = -ETIMEDOUT;
4704 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4705 WREG32(mmCP_HQD_PQ_RPTR, 0);
4706 WREG32(mmCP_HQD_PQ_WPTR, 0);
4708 return r;
4711 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4713 struct amdgpu_device *adev = ring->adev;
4714 struct vi_mqd *mqd = ring->mqd_ptr;
4715 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4716 uint32_t tmp;
4718 mqd->header = 0xC0310800;
4719 mqd->compute_pipelinestat_enable = 0x00000001;
4720 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4721 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4722 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4723 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4724 mqd->compute_misc_reserved = 0x00000003;
4725 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4726 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4727 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4728 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4729 eop_base_addr = ring->eop_gpu_addr >> 8;
4730 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4731 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4733 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4734 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4735 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4736 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4738 mqd->cp_hqd_eop_control = tmp;
4740 /* enable doorbell? */
4741 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4742 CP_HQD_PQ_DOORBELL_CONTROL,
4743 DOORBELL_EN,
4744 ring->use_doorbell ? 1 : 0);
4746 mqd->cp_hqd_pq_doorbell_control = tmp;
4748 /* set the pointer to the MQD */
4749 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4750 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4752 /* set MQD vmid to 0 */
4753 tmp = RREG32(mmCP_MQD_CONTROL);
4754 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4755 mqd->cp_mqd_control = tmp;
4757 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4758 hqd_gpu_addr = ring->gpu_addr >> 8;
4759 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4760 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4762 /* set up the HQD, this is similar to CP_RB0_CNTL */
4763 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4765 (order_base_2(ring->ring_size / 4) - 1));
4766 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4767 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4768 #ifdef __BIG_ENDIAN
4769 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4770 #endif
4771 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4773 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4774 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4775 mqd->cp_hqd_pq_control = tmp;
4777 /* set the wb address whether it's enabled or not */
4778 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4779 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4780 mqd->cp_hqd_pq_rptr_report_addr_hi =
4781 upper_32_bits(wb_gpu_addr) & 0xffff;
4783 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4784 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4785 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4786 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4788 tmp = 0;
4789 /* enable the doorbell if requested */
4790 if (ring->use_doorbell) {
4791 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4792 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4793 DOORBELL_OFFSET, ring->doorbell_index);
4795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4796 DOORBELL_EN, 1);
4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4798 DOORBELL_SOURCE, 0);
4799 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4800 DOORBELL_HIT, 0);
4803 mqd->cp_hqd_pq_doorbell_control = tmp;
4805 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4806 ring->wptr = 0;
4807 mqd->cp_hqd_pq_wptr = ring->wptr;
4808 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4810 /* set the vmid for the queue */
4811 mqd->cp_hqd_vmid = 0;
4813 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4814 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4815 mqd->cp_hqd_persistent_state = tmp;
4817 /* set MTYPE */
4818 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4819 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4820 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4821 mqd->cp_hqd_ib_control = tmp;
4823 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4824 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4825 mqd->cp_hqd_iq_timer = tmp;
4827 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4828 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4829 mqd->cp_hqd_ctx_save_control = tmp;
4831 /* defaults */
4832 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4833 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4834 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4835 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4836 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4837 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4838 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4839 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4840 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4841 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4842 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4843 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4844 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4845 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4846 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4848 /* activate the queue */
4849 mqd->cp_hqd_active = 1;
4851 return 0;
4854 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4855 struct vi_mqd *mqd)
4857 uint32_t mqd_reg;
4858 uint32_t *mqd_data;
4860 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4861 mqd_data = &mqd->cp_mqd_base_addr_lo;
4863 /* disable wptr polling */
4864 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4866 /* program all HQD registers */
4867 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4868 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4870 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4871 * This is safe since EOP RPTR==WPTR for any inactive HQD
4872 * on ASICs that do not support context-save.
4873 * EOP writes/reads can start anywhere in the ring.
4875 if (adev->asic_type != CHIP_TONGA) {
4876 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4877 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4878 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4881 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4882 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4884 /* activate the HQD */
4885 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4886 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4888 return 0;
4891 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4893 struct amdgpu_device *adev = ring->adev;
4894 struct vi_mqd *mqd = ring->mqd_ptr;
4895 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4897 gfx_v8_0_kiq_setting(ring);
4899 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4900 /* reset MQD to a clean status */
4901 if (adev->gfx.mec.mqd_backup[mqd_idx])
4902 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4904 /* reset ring buffer */
4905 ring->wptr = 0;
4906 amdgpu_ring_clear_ring(ring);
4907 mutex_lock(&adev->srbm_mutex);
4908 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4909 gfx_v8_0_mqd_commit(adev, mqd);
4910 vi_srbm_select(adev, 0, 0, 0, 0);
4911 mutex_unlock(&adev->srbm_mutex);
4912 } else {
4913 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4914 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4915 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4916 mutex_lock(&adev->srbm_mutex);
4917 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4918 gfx_v8_0_mqd_init(ring);
4919 gfx_v8_0_mqd_commit(adev, mqd);
4920 vi_srbm_select(adev, 0, 0, 0, 0);
4921 mutex_unlock(&adev->srbm_mutex);
4923 if (adev->gfx.mec.mqd_backup[mqd_idx])
4924 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4927 return 0;
4930 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4932 struct amdgpu_device *adev = ring->adev;
4933 struct vi_mqd *mqd = ring->mqd_ptr;
4934 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4936 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4937 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4938 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4939 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4940 mutex_lock(&adev->srbm_mutex);
4941 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4942 gfx_v8_0_mqd_init(ring);
4943 vi_srbm_select(adev, 0, 0, 0, 0);
4944 mutex_unlock(&adev->srbm_mutex);
4946 if (adev->gfx.mec.mqd_backup[mqd_idx])
4947 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4948 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4949 /* reset MQD to a clean status */
4950 if (adev->gfx.mec.mqd_backup[mqd_idx])
4951 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4952 /* reset ring buffer */
4953 ring->wptr = 0;
4954 amdgpu_ring_clear_ring(ring);
4955 } else {
4956 amdgpu_ring_clear_ring(ring);
4958 return 0;
4961 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4963 if (adev->asic_type > CHIP_TONGA) {
4964 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4965 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4967 /* enable doorbells */
4968 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4971 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4973 struct amdgpu_ring *ring = NULL;
4974 int r = 0, i;
4976 gfx_v8_0_cp_compute_enable(adev, true);
4978 ring = &adev->gfx.kiq.ring;
4980 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4981 if (unlikely(r != 0))
4982 goto done;
4984 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4985 if (!r) {
4986 r = gfx_v8_0_kiq_init_queue(ring);
4987 amdgpu_bo_kunmap(ring->mqd_obj);
4988 ring->mqd_ptr = NULL;
4990 amdgpu_bo_unreserve(ring->mqd_obj);
4991 if (r)
4992 goto done;
4994 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4995 ring = &adev->gfx.compute_ring[i];
4997 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4998 if (unlikely(r != 0))
4999 goto done;
5000 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5001 if (!r) {
5002 r = gfx_v8_0_kcq_init_queue(ring);
5003 amdgpu_bo_kunmap(ring->mqd_obj);
5004 ring->mqd_ptr = NULL;
5006 amdgpu_bo_unreserve(ring->mqd_obj);
5007 if (r)
5008 goto done;
5011 gfx_v8_0_set_mec_doorbell_range(adev);
5013 r = gfx_v8_0_kiq_kcq_enable(adev);
5014 if (r)
5015 goto done;
5017 /* Test KIQ */
5018 ring = &adev->gfx.kiq.ring;
5019 ring->ready = true;
5020 r = amdgpu_ring_test_ring(ring);
5021 if (r) {
5022 ring->ready = false;
5023 goto done;
5026 /* Test KCQs */
5027 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5028 ring = &adev->gfx.compute_ring[i];
5029 ring->ready = true;
5030 r = amdgpu_ring_test_ring(ring);
5031 if (r)
5032 ring->ready = false;
5035 done:
5036 return r;
5039 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5041 int r;
5043 if (!(adev->flags & AMD_IS_APU))
5044 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5047 /* legacy firmware loading */
5048 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5049 if (r)
5050 return r;
5052 r = gfx_v8_0_cp_compute_load_microcode(adev);
5053 if (r)
5054 return r;
5057 r = gfx_v8_0_cp_gfx_resume(adev);
5058 if (r)
5059 return r;
5061 r = gfx_v8_0_kiq_resume(adev);
5062 if (r)
5063 return r;
5065 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5067 return 0;
5070 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5072 gfx_v8_0_cp_gfx_enable(adev, enable);
5073 gfx_v8_0_cp_compute_enable(adev, enable);
5076 static int gfx_v8_0_hw_init(void *handle)
5078 int r;
5079 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5081 gfx_v8_0_init_golden_registers(adev);
5082 gfx_v8_0_gpu_init(adev);
5084 r = gfx_v8_0_rlc_resume(adev);
5085 if (r)
5086 return r;
5088 r = gfx_v8_0_cp_resume(adev);
5090 return r;
5093 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5095 struct amdgpu_device *adev = kiq_ring->adev;
5096 uint32_t scratch, tmp = 0;
5097 int r, i;
5099 r = amdgpu_gfx_scratch_get(adev, &scratch);
5100 if (r) {
5101 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5102 return r;
5104 WREG32(scratch, 0xCAFEDEAD);
5106 r = amdgpu_ring_alloc(kiq_ring, 10);
5107 if (r) {
5108 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5109 amdgpu_gfx_scratch_free(adev, scratch);
5110 return r;
5113 /* unmap queues */
5114 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5115 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5116 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5117 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5118 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5119 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5120 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5121 amdgpu_ring_write(kiq_ring, 0);
5122 amdgpu_ring_write(kiq_ring, 0);
5123 amdgpu_ring_write(kiq_ring, 0);
5124 /* write to scratch for completion */
5125 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5126 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5127 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5128 amdgpu_ring_commit(kiq_ring);
5130 for (i = 0; i < adev->usec_timeout; i++) {
5131 tmp = RREG32(scratch);
5132 if (tmp == 0xDEADBEEF)
5133 break;
5134 DRM_UDELAY(1);
5136 if (i >= adev->usec_timeout) {
5137 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5138 r = -EINVAL;
5140 amdgpu_gfx_scratch_free(adev, scratch);
5141 return r;
5144 static int gfx_v8_0_hw_fini(void *handle)
5146 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5147 int i;
5149 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5150 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5152 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5154 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5156 /* disable KCQ to avoid CPC touch memory not valid anymore */
5157 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5158 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5160 if (amdgpu_sriov_vf(adev)) {
5161 pr_debug("For SRIOV client, shouldn't do anything.\n");
5162 return 0;
5164 gfx_v8_0_cp_enable(adev, false);
5165 gfx_v8_0_rlc_stop(adev);
5167 amdgpu_device_ip_set_powergating_state(adev,
5168 AMD_IP_BLOCK_TYPE_GFX,
5169 AMD_PG_STATE_UNGATE);
5171 return 0;
5174 static int gfx_v8_0_suspend(void *handle)
5176 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5177 adev->gfx.in_suspend = true;
5178 return gfx_v8_0_hw_fini(adev);
5181 static int gfx_v8_0_resume(void *handle)
5183 int r;
5184 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5186 r = gfx_v8_0_hw_init(adev);
5187 adev->gfx.in_suspend = false;
5188 return r;
5191 static bool gfx_v8_0_is_idle(void *handle)
5193 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5195 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5196 return false;
5197 else
5198 return true;
5201 static int gfx_v8_0_wait_for_idle(void *handle)
5203 unsigned i;
5204 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5206 for (i = 0; i < adev->usec_timeout; i++) {
5207 if (gfx_v8_0_is_idle(handle))
5208 return 0;
5210 udelay(1);
5212 return -ETIMEDOUT;
5215 static bool gfx_v8_0_check_soft_reset(void *handle)
5217 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5218 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5219 u32 tmp;
5221 /* GRBM_STATUS */
5222 tmp = RREG32(mmGRBM_STATUS);
5223 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5224 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5225 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5226 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5227 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5228 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5229 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5230 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5231 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5232 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5233 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5234 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5235 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5238 /* GRBM_STATUS2 */
5239 tmp = RREG32(mmGRBM_STATUS2);
5240 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5241 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5242 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5244 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5245 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5246 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5247 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5248 SOFT_RESET_CPF, 1);
5249 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5250 SOFT_RESET_CPC, 1);
5251 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5252 SOFT_RESET_CPG, 1);
5253 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5254 SOFT_RESET_GRBM, 1);
5257 /* SRBM_STATUS */
5258 tmp = RREG32(mmSRBM_STATUS);
5259 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5260 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5261 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5262 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5263 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5264 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5266 if (grbm_soft_reset || srbm_soft_reset) {
5267 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5268 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5269 return true;
5270 } else {
5271 adev->gfx.grbm_soft_reset = 0;
5272 adev->gfx.srbm_soft_reset = 0;
5273 return false;
5277 static int gfx_v8_0_pre_soft_reset(void *handle)
5279 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5282 if ((!adev->gfx.grbm_soft_reset) &&
5283 (!adev->gfx.srbm_soft_reset))
5284 return 0;
5286 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5287 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5289 /* stop the rlc */
5290 gfx_v8_0_rlc_stop(adev);
5292 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5293 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5294 /* Disable GFX parsing/prefetching */
5295 gfx_v8_0_cp_gfx_enable(adev, false);
5297 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5298 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5299 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5300 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5301 int i;
5303 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5304 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5306 mutex_lock(&adev->srbm_mutex);
5307 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5308 gfx_v8_0_deactivate_hqd(adev, 2);
5309 vi_srbm_select(adev, 0, 0, 0, 0);
5310 mutex_unlock(&adev->srbm_mutex);
5312 /* Disable MEC parsing/prefetching */
5313 gfx_v8_0_cp_compute_enable(adev, false);
5316 return 0;
5319 static int gfx_v8_0_soft_reset(void *handle)
5321 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5322 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5323 u32 tmp;
5325 if ((!adev->gfx.grbm_soft_reset) &&
5326 (!adev->gfx.srbm_soft_reset))
5327 return 0;
5329 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5330 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5332 if (grbm_soft_reset || srbm_soft_reset) {
5333 tmp = RREG32(mmGMCON_DEBUG);
5334 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5335 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5336 WREG32(mmGMCON_DEBUG, tmp);
5337 udelay(50);
5340 if (grbm_soft_reset) {
5341 tmp = RREG32(mmGRBM_SOFT_RESET);
5342 tmp |= grbm_soft_reset;
5343 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5344 WREG32(mmGRBM_SOFT_RESET, tmp);
5345 tmp = RREG32(mmGRBM_SOFT_RESET);
5347 udelay(50);
5349 tmp &= ~grbm_soft_reset;
5350 WREG32(mmGRBM_SOFT_RESET, tmp);
5351 tmp = RREG32(mmGRBM_SOFT_RESET);
5354 if (srbm_soft_reset) {
5355 tmp = RREG32(mmSRBM_SOFT_RESET);
5356 tmp |= srbm_soft_reset;
5357 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5358 WREG32(mmSRBM_SOFT_RESET, tmp);
5359 tmp = RREG32(mmSRBM_SOFT_RESET);
5361 udelay(50);
5363 tmp &= ~srbm_soft_reset;
5364 WREG32(mmSRBM_SOFT_RESET, tmp);
5365 tmp = RREG32(mmSRBM_SOFT_RESET);
5368 if (grbm_soft_reset || srbm_soft_reset) {
5369 tmp = RREG32(mmGMCON_DEBUG);
5370 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5371 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5372 WREG32(mmGMCON_DEBUG, tmp);
5375 /* Wait a little for things to settle down */
5376 udelay(50);
5378 return 0;
5381 static int gfx_v8_0_post_soft_reset(void *handle)
5383 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5384 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5386 if ((!adev->gfx.grbm_soft_reset) &&
5387 (!adev->gfx.srbm_soft_reset))
5388 return 0;
5390 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5391 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5393 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5394 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5395 gfx_v8_0_cp_gfx_resume(adev);
5397 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5398 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5399 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5400 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5401 int i;
5403 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5404 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5406 mutex_lock(&adev->srbm_mutex);
5407 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5408 gfx_v8_0_deactivate_hqd(adev, 2);
5409 vi_srbm_select(adev, 0, 0, 0, 0);
5410 mutex_unlock(&adev->srbm_mutex);
5412 gfx_v8_0_kiq_resume(adev);
5414 gfx_v8_0_rlc_start(adev);
5416 return 0;
5420 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5422 * @adev: amdgpu_device pointer
5424 * Fetches a GPU clock counter snapshot.
5425 * Returns the 64 bit clock counter snapshot.
5427 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5429 uint64_t clock;
5431 mutex_lock(&adev->gfx.gpu_clock_mutex);
5432 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5433 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5434 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5435 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5436 return clock;
5439 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5440 uint32_t vmid,
5441 uint32_t gds_base, uint32_t gds_size,
5442 uint32_t gws_base, uint32_t gws_size,
5443 uint32_t oa_base, uint32_t oa_size)
5445 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5446 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5448 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5449 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5451 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5452 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5454 /* GDS Base */
5455 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5456 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5457 WRITE_DATA_DST_SEL(0)));
5458 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5459 amdgpu_ring_write(ring, 0);
5460 amdgpu_ring_write(ring, gds_base);
5462 /* GDS Size */
5463 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5464 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5465 WRITE_DATA_DST_SEL(0)));
5466 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5467 amdgpu_ring_write(ring, 0);
5468 amdgpu_ring_write(ring, gds_size);
5470 /* GWS */
5471 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5472 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5473 WRITE_DATA_DST_SEL(0)));
5474 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5475 amdgpu_ring_write(ring, 0);
5476 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5478 /* OA */
5479 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5480 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5481 WRITE_DATA_DST_SEL(0)));
5482 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5483 amdgpu_ring_write(ring, 0);
5484 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5487 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5489 WREG32(mmSQ_IND_INDEX,
5490 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5491 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5492 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5493 (SQ_IND_INDEX__FORCE_READ_MASK));
5494 return RREG32(mmSQ_IND_DATA);
5497 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5498 uint32_t wave, uint32_t thread,
5499 uint32_t regno, uint32_t num, uint32_t *out)
5501 WREG32(mmSQ_IND_INDEX,
5502 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5503 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5504 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5505 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5506 (SQ_IND_INDEX__FORCE_READ_MASK) |
5507 (SQ_IND_INDEX__AUTO_INCR_MASK));
5508 while (num--)
5509 *(out++) = RREG32(mmSQ_IND_DATA);
5512 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5514 /* type 0 wave data */
5515 dst[(*no_fields)++] = 0;
5516 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5517 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5518 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5519 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5520 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5521 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5522 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5523 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5524 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5525 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5526 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5527 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5528 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5529 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5530 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5531 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5532 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5533 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5536 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5537 uint32_t wave, uint32_t start,
5538 uint32_t size, uint32_t *dst)
5540 wave_read_regs(
5541 adev, simd, wave, 0,
5542 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5546 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5547 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5548 .select_se_sh = &gfx_v8_0_select_se_sh,
5549 .read_wave_data = &gfx_v8_0_read_wave_data,
5550 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5551 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5554 static int gfx_v8_0_early_init(void *handle)
5556 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5558 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5559 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5560 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5561 gfx_v8_0_set_ring_funcs(adev);
5562 gfx_v8_0_set_irq_funcs(adev);
5563 gfx_v8_0_set_gds_init(adev);
5564 gfx_v8_0_set_rlc_funcs(adev);
5566 return 0;
5569 static int gfx_v8_0_late_init(void *handle)
5571 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5572 int r;
5574 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5575 if (r)
5576 return r;
5578 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5579 if (r)
5580 return r;
5582 /* requires IBs so do in late init after IB pool is initialized */
5583 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5584 if (r)
5585 return r;
5587 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5588 if (r) {
5589 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5590 return r;
5593 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5594 if (r) {
5595 DRM_ERROR(
5596 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5598 return r;
5601 return 0;
5604 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5605 bool enable)
5607 if (((adev->asic_type == CHIP_POLARIS11) ||
5608 (adev->asic_type == CHIP_POLARIS12) ||
5609 (adev->asic_type == CHIP_VEGAM)) &&
5610 adev->powerplay.pp_funcs->set_powergating_by_smu)
5611 /* Send msg to SMU via Powerplay */
5612 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5614 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5617 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5618 bool enable)
5620 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5623 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5624 bool enable)
5626 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5629 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5630 bool enable)
5632 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5635 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5636 bool enable)
5638 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5640 /* Read any GFX register to wake up GFX. */
5641 if (!enable)
5642 RREG32(mmDB_RENDER_CONTROL);
5645 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5646 bool enable)
5648 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5649 cz_enable_gfx_cg_power_gating(adev, true);
5650 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5651 cz_enable_gfx_pipeline_power_gating(adev, true);
5652 } else {
5653 cz_enable_gfx_cg_power_gating(adev, false);
5654 cz_enable_gfx_pipeline_power_gating(adev, false);
5658 static int gfx_v8_0_set_powergating_state(void *handle,
5659 enum amd_powergating_state state)
5661 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5662 bool enable = (state == AMD_PG_STATE_GATE);
5664 if (amdgpu_sriov_vf(adev))
5665 return 0;
5667 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5668 AMD_PG_SUPPORT_RLC_SMU_HS |
5669 AMD_PG_SUPPORT_CP |
5670 AMD_PG_SUPPORT_GFX_DMG))
5671 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5672 switch (adev->asic_type) {
5673 case CHIP_CARRIZO:
5674 case CHIP_STONEY:
5676 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5677 cz_enable_sck_slow_down_on_power_up(adev, true);
5678 cz_enable_sck_slow_down_on_power_down(adev, true);
5679 } else {
5680 cz_enable_sck_slow_down_on_power_up(adev, false);
5681 cz_enable_sck_slow_down_on_power_down(adev, false);
5683 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5684 cz_enable_cp_power_gating(adev, true);
5685 else
5686 cz_enable_cp_power_gating(adev, false);
5688 cz_update_gfx_cg_power_gating(adev, enable);
5690 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5691 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5692 else
5693 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5695 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5696 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5697 else
5698 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5699 break;
5700 case CHIP_POLARIS11:
5701 case CHIP_POLARIS12:
5702 case CHIP_VEGAM:
5703 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5704 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5705 else
5706 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5708 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5709 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5710 else
5711 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5713 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5714 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5715 else
5716 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5717 break;
5718 default:
5719 break;
5721 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5722 AMD_PG_SUPPORT_RLC_SMU_HS |
5723 AMD_PG_SUPPORT_CP |
5724 AMD_PG_SUPPORT_GFX_DMG))
5725 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5726 return 0;
5729 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5731 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5732 int data;
5734 if (amdgpu_sriov_vf(adev))
5735 *flags = 0;
5737 /* AMD_CG_SUPPORT_GFX_MGCG */
5738 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5739 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5740 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5742 /* AMD_CG_SUPPORT_GFX_CGLG */
5743 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5744 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5745 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5747 /* AMD_CG_SUPPORT_GFX_CGLS */
5748 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5749 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5751 /* AMD_CG_SUPPORT_GFX_CGTS */
5752 data = RREG32(mmCGTS_SM_CTRL_REG);
5753 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5754 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5756 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5757 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5758 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5760 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5761 data = RREG32(mmRLC_MEM_SLP_CNTL);
5762 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5763 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5765 /* AMD_CG_SUPPORT_GFX_CP_LS */
5766 data = RREG32(mmCP_MEM_SLP_CNTL);
5767 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5768 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5771 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5772 uint32_t reg_addr, uint32_t cmd)
5774 uint32_t data;
5776 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5778 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5779 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5781 data = RREG32(mmRLC_SERDES_WR_CTRL);
5782 if (adev->asic_type == CHIP_STONEY)
5783 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5784 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5785 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5786 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5787 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5788 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5789 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5790 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5791 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5792 else
5793 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5794 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5795 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5796 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5797 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5798 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5799 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5800 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5801 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5802 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5803 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5804 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5805 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5806 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5807 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5809 WREG32(mmRLC_SERDES_WR_CTRL, data);
5812 #define MSG_ENTER_RLC_SAFE_MODE 1
5813 #define MSG_EXIT_RLC_SAFE_MODE 0
5814 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5815 #define RLC_GPR_REG2__REQ__SHIFT 0
5816 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5817 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5819 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5821 u32 data;
5822 unsigned i;
5824 data = RREG32(mmRLC_CNTL);
5825 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5826 return;
5828 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5829 data |= RLC_SAFE_MODE__CMD_MASK;
5830 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5831 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5832 WREG32(mmRLC_SAFE_MODE, data);
5834 for (i = 0; i < adev->usec_timeout; i++) {
5835 if ((RREG32(mmRLC_GPM_STAT) &
5836 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5837 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5838 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5839 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5840 break;
5841 udelay(1);
5844 for (i = 0; i < adev->usec_timeout; i++) {
5845 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5846 break;
5847 udelay(1);
5849 adev->gfx.rlc.in_safe_mode = true;
5853 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5855 u32 data = 0;
5856 unsigned i;
5858 data = RREG32(mmRLC_CNTL);
5859 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5860 return;
5862 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5863 if (adev->gfx.rlc.in_safe_mode) {
5864 data |= RLC_SAFE_MODE__CMD_MASK;
5865 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5866 WREG32(mmRLC_SAFE_MODE, data);
5867 adev->gfx.rlc.in_safe_mode = false;
5871 for (i = 0; i < adev->usec_timeout; i++) {
5872 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5873 break;
5874 udelay(1);
5878 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5879 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5880 .exit_safe_mode = iceland_exit_rlc_safe_mode
5883 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5884 bool enable)
5886 uint32_t temp, data;
5888 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5890 /* It is disabled by HW by default */
5891 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5892 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5893 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5894 /* 1 - RLC memory Light sleep */
5895 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5897 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5898 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5901 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5902 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5903 if (adev->flags & AMD_IS_APU)
5904 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5905 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5906 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5907 else
5908 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5909 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5910 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5911 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5913 if (temp != data)
5914 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5916 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5917 gfx_v8_0_wait_for_rlc_serdes(adev);
5919 /* 5 - clear mgcg override */
5920 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5922 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5923 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5924 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5925 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5926 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5927 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5928 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5929 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5930 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5931 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5932 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5933 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5934 if (temp != data)
5935 WREG32(mmCGTS_SM_CTRL_REG, data);
5937 udelay(50);
5939 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5940 gfx_v8_0_wait_for_rlc_serdes(adev);
5941 } else {
5942 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5943 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5944 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5945 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5946 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5947 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5948 if (temp != data)
5949 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5951 /* 2 - disable MGLS in RLC */
5952 data = RREG32(mmRLC_MEM_SLP_CNTL);
5953 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5954 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5955 WREG32(mmRLC_MEM_SLP_CNTL, data);
5958 /* 3 - disable MGLS in CP */
5959 data = RREG32(mmCP_MEM_SLP_CNTL);
5960 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5961 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5962 WREG32(mmCP_MEM_SLP_CNTL, data);
5965 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5966 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5967 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5968 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5969 if (temp != data)
5970 WREG32(mmCGTS_SM_CTRL_REG, data);
5972 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5973 gfx_v8_0_wait_for_rlc_serdes(adev);
5975 /* 6 - set mgcg override */
5976 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5978 udelay(50);
5980 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5981 gfx_v8_0_wait_for_rlc_serdes(adev);
5984 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5987 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5988 bool enable)
5990 uint32_t temp, temp1, data, data1;
5992 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5994 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5996 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5997 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5998 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5999 if (temp1 != data1)
6000 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6002 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6003 gfx_v8_0_wait_for_rlc_serdes(adev);
6005 /* 2 - clear cgcg override */
6006 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6008 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6009 gfx_v8_0_wait_for_rlc_serdes(adev);
6011 /* 3 - write cmd to set CGLS */
6012 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6014 /* 4 - enable cgcg */
6015 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6017 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6018 /* enable cgls*/
6019 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6021 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6022 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6024 if (temp1 != data1)
6025 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6026 } else {
6027 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6030 if (temp != data)
6031 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6033 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6034 * Cmp_busy/GFX_Idle interrupts
6036 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6037 } else {
6038 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6039 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6041 /* TEST CGCG */
6042 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6043 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6044 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6045 if (temp1 != data1)
6046 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6048 /* read gfx register to wake up cgcg */
6049 RREG32(mmCB_CGTT_SCLK_CTRL);
6050 RREG32(mmCB_CGTT_SCLK_CTRL);
6051 RREG32(mmCB_CGTT_SCLK_CTRL);
6052 RREG32(mmCB_CGTT_SCLK_CTRL);
6054 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6055 gfx_v8_0_wait_for_rlc_serdes(adev);
6057 /* write cmd to Set CGCG Overrride */
6058 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6060 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6061 gfx_v8_0_wait_for_rlc_serdes(adev);
6063 /* write cmd to Clear CGLS */
6064 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6066 /* disable cgcg, cgls should be disabled too. */
6067 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6068 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6069 if (temp != data)
6070 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6071 /* enable interrupts again for PG */
6072 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6075 gfx_v8_0_wait_for_rlc_serdes(adev);
6077 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6079 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6080 bool enable)
6082 if (enable) {
6083 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6084 * === MGCG + MGLS + TS(CG/LS) ===
6086 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6087 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6088 } else {
6089 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6090 * === CGCG + CGLS ===
6092 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6093 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6095 return 0;
6098 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6099 enum amd_clockgating_state state)
6101 uint32_t msg_id, pp_state = 0;
6102 uint32_t pp_support_state = 0;
6104 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6105 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6106 pp_support_state = PP_STATE_SUPPORT_LS;
6107 pp_state = PP_STATE_LS;
6109 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6110 pp_support_state |= PP_STATE_SUPPORT_CG;
6111 pp_state |= PP_STATE_CG;
6113 if (state == AMD_CG_STATE_UNGATE)
6114 pp_state = 0;
6116 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6117 PP_BLOCK_GFX_CG,
6118 pp_support_state,
6119 pp_state);
6120 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6121 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6124 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6125 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6126 pp_support_state = PP_STATE_SUPPORT_LS;
6127 pp_state = PP_STATE_LS;
6130 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6131 pp_support_state |= PP_STATE_SUPPORT_CG;
6132 pp_state |= PP_STATE_CG;
6135 if (state == AMD_CG_STATE_UNGATE)
6136 pp_state = 0;
6138 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6139 PP_BLOCK_GFX_MG,
6140 pp_support_state,
6141 pp_state);
6142 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6143 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6146 return 0;
6149 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6150 enum amd_clockgating_state state)
6153 uint32_t msg_id, pp_state = 0;
6154 uint32_t pp_support_state = 0;
6156 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6157 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6158 pp_support_state = PP_STATE_SUPPORT_LS;
6159 pp_state = PP_STATE_LS;
6161 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6162 pp_support_state |= PP_STATE_SUPPORT_CG;
6163 pp_state |= PP_STATE_CG;
6165 if (state == AMD_CG_STATE_UNGATE)
6166 pp_state = 0;
6168 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6169 PP_BLOCK_GFX_CG,
6170 pp_support_state,
6171 pp_state);
6172 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6173 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6176 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6177 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6178 pp_support_state = PP_STATE_SUPPORT_LS;
6179 pp_state = PP_STATE_LS;
6181 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6182 pp_support_state |= PP_STATE_SUPPORT_CG;
6183 pp_state |= PP_STATE_CG;
6185 if (state == AMD_CG_STATE_UNGATE)
6186 pp_state = 0;
6188 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6189 PP_BLOCK_GFX_3D,
6190 pp_support_state,
6191 pp_state);
6192 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6193 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6196 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6197 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6198 pp_support_state = PP_STATE_SUPPORT_LS;
6199 pp_state = PP_STATE_LS;
6202 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6203 pp_support_state |= PP_STATE_SUPPORT_CG;
6204 pp_state |= PP_STATE_CG;
6207 if (state == AMD_CG_STATE_UNGATE)
6208 pp_state = 0;
6210 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6211 PP_BLOCK_GFX_MG,
6212 pp_support_state,
6213 pp_state);
6214 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6215 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6218 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6219 pp_support_state = PP_STATE_SUPPORT_LS;
6221 if (state == AMD_CG_STATE_UNGATE)
6222 pp_state = 0;
6223 else
6224 pp_state = PP_STATE_LS;
6226 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6227 PP_BLOCK_GFX_RLC,
6228 pp_support_state,
6229 pp_state);
6230 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6231 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6234 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6235 pp_support_state = PP_STATE_SUPPORT_LS;
6237 if (state == AMD_CG_STATE_UNGATE)
6238 pp_state = 0;
6239 else
6240 pp_state = PP_STATE_LS;
6241 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6242 PP_BLOCK_GFX_CP,
6243 pp_support_state,
6244 pp_state);
6245 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6246 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6249 return 0;
6252 static int gfx_v8_0_set_clockgating_state(void *handle,
6253 enum amd_clockgating_state state)
6255 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6257 if (amdgpu_sriov_vf(adev))
6258 return 0;
6260 switch (adev->asic_type) {
6261 case CHIP_FIJI:
6262 case CHIP_CARRIZO:
6263 case CHIP_STONEY:
6264 gfx_v8_0_update_gfx_clock_gating(adev,
6265 state == AMD_CG_STATE_GATE);
6266 break;
6267 case CHIP_TONGA:
6268 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6269 break;
6270 case CHIP_POLARIS10:
6271 case CHIP_POLARIS11:
6272 case CHIP_POLARIS12:
6273 case CHIP_VEGAM:
6274 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6275 break;
6276 default:
6277 break;
6279 return 0;
6282 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6284 return ring->adev->wb.wb[ring->rptr_offs];
6287 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6289 struct amdgpu_device *adev = ring->adev;
6291 if (ring->use_doorbell)
6292 /* XXX check if swapping is necessary on BE */
6293 return ring->adev->wb.wb[ring->wptr_offs];
6294 else
6295 return RREG32(mmCP_RB0_WPTR);
6298 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6300 struct amdgpu_device *adev = ring->adev;
6302 if (ring->use_doorbell) {
6303 /* XXX check if swapping is necessary on BE */
6304 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6305 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6306 } else {
6307 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6308 (void)RREG32(mmCP_RB0_WPTR);
6312 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6314 u32 ref_and_mask, reg_mem_engine;
6316 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6317 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6318 switch (ring->me) {
6319 case 1:
6320 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6321 break;
6322 case 2:
6323 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6324 break;
6325 default:
6326 return;
6328 reg_mem_engine = 0;
6329 } else {
6330 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6331 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6334 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6335 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6336 WAIT_REG_MEM_FUNCTION(3) | /* == */
6337 reg_mem_engine));
6338 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6339 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6340 amdgpu_ring_write(ring, ref_and_mask);
6341 amdgpu_ring_write(ring, ref_and_mask);
6342 amdgpu_ring_write(ring, 0x20); /* poll interval */
6345 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6347 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6348 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6349 EVENT_INDEX(4));
6351 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6352 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6353 EVENT_INDEX(0));
6356 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6357 struct amdgpu_ib *ib,
6358 unsigned vmid, bool ctx_switch)
6360 u32 header, control = 0;
6362 if (ib->flags & AMDGPU_IB_FLAG_CE)
6363 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6364 else
6365 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6367 control |= ib->length_dw | (vmid << 24);
6369 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6370 control |= INDIRECT_BUFFER_PRE_ENB(1);
6372 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6373 gfx_v8_0_ring_emit_de_meta(ring);
6376 amdgpu_ring_write(ring, header);
6377 amdgpu_ring_write(ring,
6378 #ifdef __BIG_ENDIAN
6379 (2 << 0) |
6380 #endif
6381 (ib->gpu_addr & 0xFFFFFFFC));
6382 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6383 amdgpu_ring_write(ring, control);
6386 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6387 struct amdgpu_ib *ib,
6388 unsigned vmid, bool ctx_switch)
6390 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6392 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6393 amdgpu_ring_write(ring,
6394 #ifdef __BIG_ENDIAN
6395 (2 << 0) |
6396 #endif
6397 (ib->gpu_addr & 0xFFFFFFFC));
6398 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6399 amdgpu_ring_write(ring, control);
6402 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6403 u64 seq, unsigned flags)
6405 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6406 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6408 /* Workaround for cache flush problems. First send a dummy EOP
6409 * event down the pipe with seq one below.
6411 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6412 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6413 EOP_TC_ACTION_EN |
6414 EOP_TC_WB_ACTION_EN |
6415 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6416 EVENT_INDEX(5)));
6417 amdgpu_ring_write(ring, addr & 0xfffffffc);
6418 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6419 DATA_SEL(1) | INT_SEL(0));
6420 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6421 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6423 /* Then send the real EOP event down the pipe:
6424 * EVENT_WRITE_EOP - flush caches, send int */
6425 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6426 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6427 EOP_TC_ACTION_EN |
6428 EOP_TC_WB_ACTION_EN |
6429 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6430 EVENT_INDEX(5)));
6431 amdgpu_ring_write(ring, addr & 0xfffffffc);
6432 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6433 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6434 amdgpu_ring_write(ring, lower_32_bits(seq));
6435 amdgpu_ring_write(ring, upper_32_bits(seq));
6439 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6441 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6442 uint32_t seq = ring->fence_drv.sync_seq;
6443 uint64_t addr = ring->fence_drv.gpu_addr;
6445 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6446 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6447 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6448 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6449 amdgpu_ring_write(ring, addr & 0xfffffffc);
6450 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6451 amdgpu_ring_write(ring, seq);
6452 amdgpu_ring_write(ring, 0xffffffff);
6453 amdgpu_ring_write(ring, 4); /* poll interval */
6456 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6457 unsigned vmid, uint64_t pd_addr)
6459 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6461 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6463 /* wait for the invalidate to complete */
6464 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6465 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6466 WAIT_REG_MEM_FUNCTION(0) | /* always */
6467 WAIT_REG_MEM_ENGINE(0))); /* me */
6468 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6469 amdgpu_ring_write(ring, 0);
6470 amdgpu_ring_write(ring, 0); /* ref */
6471 amdgpu_ring_write(ring, 0); /* mask */
6472 amdgpu_ring_write(ring, 0x20); /* poll interval */
6474 /* compute doesn't have PFP */
6475 if (usepfp) {
6476 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6477 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6478 amdgpu_ring_write(ring, 0x0);
6482 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6484 return ring->adev->wb.wb[ring->wptr_offs];
6487 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6489 struct amdgpu_device *adev = ring->adev;
6491 /* XXX check if swapping is necessary on BE */
6492 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6493 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6496 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6497 bool acquire)
6499 struct amdgpu_device *adev = ring->adev;
6500 int pipe_num, tmp, reg;
6501 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6503 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6505 /* first me only has 2 entries, GFX and HP3D */
6506 if (ring->me > 0)
6507 pipe_num -= 2;
6509 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6510 tmp = RREG32(reg);
6511 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6512 WREG32(reg, tmp);
6515 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6516 struct amdgpu_ring *ring,
6517 bool acquire)
6519 int i, pipe;
6520 bool reserve;
6521 struct amdgpu_ring *iring;
6523 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6524 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6525 if (acquire)
6526 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6527 else
6528 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6530 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6531 /* Clear all reservations - everyone reacquires all resources */
6532 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6533 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6534 true);
6536 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6537 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6538 true);
6539 } else {
6540 /* Lower all pipes without a current reservation */
6541 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6542 iring = &adev->gfx.gfx_ring[i];
6543 pipe = amdgpu_gfx_queue_to_bit(adev,
6544 iring->me,
6545 iring->pipe,
6547 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6548 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6551 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6552 iring = &adev->gfx.compute_ring[i];
6553 pipe = amdgpu_gfx_queue_to_bit(adev,
6554 iring->me,
6555 iring->pipe,
6557 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6558 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6562 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6565 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6566 struct amdgpu_ring *ring,
6567 bool acquire)
6569 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6570 uint32_t queue_priority = acquire ? 0xf : 0x0;
6572 mutex_lock(&adev->srbm_mutex);
6573 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6575 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6576 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6578 vi_srbm_select(adev, 0, 0, 0, 0);
6579 mutex_unlock(&adev->srbm_mutex);
6581 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6582 enum drm_sched_priority priority)
6584 struct amdgpu_device *adev = ring->adev;
6585 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6587 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6588 return;
6590 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6591 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6594 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6595 u64 addr, u64 seq,
6596 unsigned flags)
6598 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6599 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6601 /* RELEASE_MEM - flush caches, send int */
6602 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6603 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6604 EOP_TC_ACTION_EN |
6605 EOP_TC_WB_ACTION_EN |
6606 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6607 EVENT_INDEX(5)));
6608 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6609 amdgpu_ring_write(ring, addr & 0xfffffffc);
6610 amdgpu_ring_write(ring, upper_32_bits(addr));
6611 amdgpu_ring_write(ring, lower_32_bits(seq));
6612 amdgpu_ring_write(ring, upper_32_bits(seq));
6615 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6616 u64 seq, unsigned int flags)
6618 /* we only allocate 32bit for each seq wb address */
6619 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6621 /* write fence seq to the "addr" */
6622 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6623 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6624 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6625 amdgpu_ring_write(ring, lower_32_bits(addr));
6626 amdgpu_ring_write(ring, upper_32_bits(addr));
6627 amdgpu_ring_write(ring, lower_32_bits(seq));
6629 if (flags & AMDGPU_FENCE_FLAG_INT) {
6630 /* set register to trigger INT */
6631 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6632 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6633 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6634 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6635 amdgpu_ring_write(ring, 0);
6636 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6640 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6642 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6643 amdgpu_ring_write(ring, 0);
6646 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6648 uint32_t dw2 = 0;
6650 if (amdgpu_sriov_vf(ring->adev))
6651 gfx_v8_0_ring_emit_ce_meta(ring);
6653 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6654 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6655 gfx_v8_0_ring_emit_vgt_flush(ring);
6656 /* set load_global_config & load_global_uconfig */
6657 dw2 |= 0x8001;
6658 /* set load_cs_sh_regs */
6659 dw2 |= 0x01000000;
6660 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6661 dw2 |= 0x10002;
6663 /* set load_ce_ram if preamble presented */
6664 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6665 dw2 |= 0x10000000;
6666 } else {
6667 /* still load_ce_ram if this is the first time preamble presented
6668 * although there is no context switch happens.
6670 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6671 dw2 |= 0x10000000;
6674 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6675 amdgpu_ring_write(ring, dw2);
6676 amdgpu_ring_write(ring, 0);
6679 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6681 unsigned ret;
6683 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6684 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6685 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6686 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6687 ret = ring->wptr & ring->buf_mask;
6688 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6689 return ret;
6692 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6694 unsigned cur;
6696 BUG_ON(offset > ring->buf_mask);
6697 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6699 cur = (ring->wptr & ring->buf_mask) - 1;
6700 if (likely(cur > offset))
6701 ring->ring[offset] = cur - offset;
6702 else
6703 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6706 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6708 struct amdgpu_device *adev = ring->adev;
6710 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6711 amdgpu_ring_write(ring, 0 | /* src: register*/
6712 (5 << 8) | /* dst: memory */
6713 (1 << 20)); /* write confirm */
6714 amdgpu_ring_write(ring, reg);
6715 amdgpu_ring_write(ring, 0);
6716 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6717 adev->virt.reg_val_offs * 4));
6718 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6719 adev->virt.reg_val_offs * 4));
6722 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6723 uint32_t val)
6725 uint32_t cmd;
6727 switch (ring->funcs->type) {
6728 case AMDGPU_RING_TYPE_GFX:
6729 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6730 break;
6731 case AMDGPU_RING_TYPE_KIQ:
6732 cmd = 1 << 16; /* no inc addr */
6733 break;
6734 default:
6735 cmd = WR_CONFIRM;
6736 break;
6739 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6740 amdgpu_ring_write(ring, cmd);
6741 amdgpu_ring_write(ring, reg);
6742 amdgpu_ring_write(ring, 0);
6743 amdgpu_ring_write(ring, val);
6746 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6747 enum amdgpu_interrupt_state state)
6749 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6750 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6753 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6754 int me, int pipe,
6755 enum amdgpu_interrupt_state state)
6757 u32 mec_int_cntl, mec_int_cntl_reg;
6760 * amdgpu controls only the first MEC. That's why this function only
6761 * handles the setting of interrupts for this specific MEC. All other
6762 * pipes' interrupts are set by amdkfd.
6765 if (me == 1) {
6766 switch (pipe) {
6767 case 0:
6768 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6769 break;
6770 case 1:
6771 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6772 break;
6773 case 2:
6774 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6775 break;
6776 case 3:
6777 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6778 break;
6779 default:
6780 DRM_DEBUG("invalid pipe %d\n", pipe);
6781 return;
6783 } else {
6784 DRM_DEBUG("invalid me %d\n", me);
6785 return;
6788 switch (state) {
6789 case AMDGPU_IRQ_STATE_DISABLE:
6790 mec_int_cntl = RREG32(mec_int_cntl_reg);
6791 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6792 WREG32(mec_int_cntl_reg, mec_int_cntl);
6793 break;
6794 case AMDGPU_IRQ_STATE_ENABLE:
6795 mec_int_cntl = RREG32(mec_int_cntl_reg);
6796 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6797 WREG32(mec_int_cntl_reg, mec_int_cntl);
6798 break;
6799 default:
6800 break;
6804 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6805 struct amdgpu_irq_src *source,
6806 unsigned type,
6807 enum amdgpu_interrupt_state state)
6809 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6810 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6812 return 0;
6815 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6816 struct amdgpu_irq_src *source,
6817 unsigned type,
6818 enum amdgpu_interrupt_state state)
6820 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6821 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6823 return 0;
6826 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6827 struct amdgpu_irq_src *src,
6828 unsigned type,
6829 enum amdgpu_interrupt_state state)
6831 switch (type) {
6832 case AMDGPU_CP_IRQ_GFX_EOP:
6833 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6834 break;
6835 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6836 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6837 break;
6838 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6839 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6840 break;
6841 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6842 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6843 break;
6844 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6845 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6846 break;
6847 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6848 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6849 break;
6850 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6851 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6852 break;
6853 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6854 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6855 break;
6856 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6857 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6858 break;
6859 default:
6860 break;
6862 return 0;
6865 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6866 struct amdgpu_irq_src *source,
6867 unsigned int type,
6868 enum amdgpu_interrupt_state state)
6870 int enable_flag;
6872 switch (state) {
6873 case AMDGPU_IRQ_STATE_DISABLE:
6874 enable_flag = 0;
6875 break;
6877 case AMDGPU_IRQ_STATE_ENABLE:
6878 enable_flag = 1;
6879 break;
6881 default:
6882 return -EINVAL;
6885 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6886 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6887 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6888 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6889 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6890 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6891 enable_flag);
6892 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6893 enable_flag);
6894 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6895 enable_flag);
6896 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6897 enable_flag);
6898 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6899 enable_flag);
6900 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6901 enable_flag);
6902 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6903 enable_flag);
6904 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6905 enable_flag);
6907 return 0;
6910 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6911 struct amdgpu_irq_src *source,
6912 unsigned int type,
6913 enum amdgpu_interrupt_state state)
6915 int enable_flag;
6917 switch (state) {
6918 case AMDGPU_IRQ_STATE_DISABLE:
6919 enable_flag = 1;
6920 break;
6922 case AMDGPU_IRQ_STATE_ENABLE:
6923 enable_flag = 0;
6924 break;
6926 default:
6927 return -EINVAL;
6930 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6931 enable_flag);
6933 return 0;
6936 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6937 struct amdgpu_irq_src *source,
6938 struct amdgpu_iv_entry *entry)
6940 int i;
6941 u8 me_id, pipe_id, queue_id;
6942 struct amdgpu_ring *ring;
6944 DRM_DEBUG("IH: CP EOP\n");
6945 me_id = (entry->ring_id & 0x0c) >> 2;
6946 pipe_id = (entry->ring_id & 0x03) >> 0;
6947 queue_id = (entry->ring_id & 0x70) >> 4;
6949 switch (me_id) {
6950 case 0:
6951 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6952 break;
6953 case 1:
6954 case 2:
6955 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6956 ring = &adev->gfx.compute_ring[i];
6957 /* Per-queue interrupt is supported for MEC starting from VI.
6958 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6960 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6961 amdgpu_fence_process(ring);
6963 break;
6965 return 0;
6968 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6969 struct amdgpu_irq_src *source,
6970 struct amdgpu_iv_entry *entry)
6972 DRM_ERROR("Illegal register access in command stream\n");
6973 schedule_work(&adev->reset_work);
6974 return 0;
6977 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6978 struct amdgpu_irq_src *source,
6979 struct amdgpu_iv_entry *entry)
6981 DRM_ERROR("Illegal instruction in command stream\n");
6982 schedule_work(&adev->reset_work);
6983 return 0;
6986 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6987 struct amdgpu_irq_src *source,
6988 struct amdgpu_iv_entry *entry)
6990 DRM_ERROR("CP EDC/ECC error detected.");
6991 return 0;
6994 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6996 u32 enc, se_id, sh_id, cu_id;
6997 char type[20];
6998 int sq_edc_source = -1;
7000 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
7001 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
7003 switch (enc) {
7004 case 0:
7005 DRM_INFO("SQ general purpose intr detected:"
7006 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
7007 "host_cmd_overflow %d, cmd_timestamp %d,"
7008 "reg_timestamp %d, thread_trace_buff_full %d,"
7009 "wlt %d, thread_trace %d.\n",
7010 se_id,
7011 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
7012 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
7013 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
7014 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
7015 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
7016 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
7017 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
7018 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
7020 break;
7021 case 1:
7022 case 2:
7024 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
7025 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
7028 * This function can be called either directly from ISR
7029 * or from BH in which case we can access SQ_EDC_INFO
7030 * instance
7032 if (in_task()) {
7033 mutex_lock(&adev->grbm_idx_mutex);
7034 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
7036 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
7038 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7039 mutex_unlock(&adev->grbm_idx_mutex);
7042 if (enc == 1)
7043 sprintf(type, "instruction intr");
7044 else
7045 sprintf(type, "EDC/ECC error");
7047 DRM_INFO(
7048 "SQ %s detected: "
7049 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
7050 "trap %s, sq_ed_info.source %s.\n",
7051 type, se_id, sh_id, cu_id,
7052 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
7053 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
7054 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
7055 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
7056 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
7058 break;
7059 default:
7060 DRM_ERROR("SQ invalid encoding type\n.");
7064 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7067 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7068 struct sq_work *sq_work = container_of(work, struct sq_work, work);
7070 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7073 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7074 struct amdgpu_irq_src *source,
7075 struct amdgpu_iv_entry *entry)
7077 unsigned ih_data = entry->src_data[0];
7080 * Try to submit work so SQ_EDC_INFO can be accessed from
7081 * BH. If previous work submission hasn't finished yet
7082 * just print whatever info is possible directly from the ISR.
7084 if (work_pending(&adev->gfx.sq_work.work)) {
7085 gfx_v8_0_parse_sq_irq(adev, ih_data);
7086 } else {
7087 adev->gfx.sq_work.ih_data = ih_data;
7088 schedule_work(&adev->gfx.sq_work.work);
7091 return 0;
7094 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7095 struct amdgpu_irq_src *src,
7096 unsigned int type,
7097 enum amdgpu_interrupt_state state)
7099 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7101 switch (type) {
7102 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7103 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7104 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7105 if (ring->me == 1)
7106 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7107 ring->pipe,
7108 GENERIC2_INT_ENABLE,
7109 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7110 else
7111 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7112 ring->pipe,
7113 GENERIC2_INT_ENABLE,
7114 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7115 break;
7116 default:
7117 BUG(); /* kiq only support GENERIC2_INT now */
7118 break;
7120 return 0;
7123 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7124 struct amdgpu_irq_src *source,
7125 struct amdgpu_iv_entry *entry)
7127 u8 me_id, pipe_id, queue_id;
7128 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7130 me_id = (entry->ring_id & 0x0c) >> 2;
7131 pipe_id = (entry->ring_id & 0x03) >> 0;
7132 queue_id = (entry->ring_id & 0x70) >> 4;
7133 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7134 me_id, pipe_id, queue_id);
7136 amdgpu_fence_process(ring);
7137 return 0;
7140 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7141 .name = "gfx_v8_0",
7142 .early_init = gfx_v8_0_early_init,
7143 .late_init = gfx_v8_0_late_init,
7144 .sw_init = gfx_v8_0_sw_init,
7145 .sw_fini = gfx_v8_0_sw_fini,
7146 .hw_init = gfx_v8_0_hw_init,
7147 .hw_fini = gfx_v8_0_hw_fini,
7148 .suspend = gfx_v8_0_suspend,
7149 .resume = gfx_v8_0_resume,
7150 .is_idle = gfx_v8_0_is_idle,
7151 .wait_for_idle = gfx_v8_0_wait_for_idle,
7152 .check_soft_reset = gfx_v8_0_check_soft_reset,
7153 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7154 .soft_reset = gfx_v8_0_soft_reset,
7155 .post_soft_reset = gfx_v8_0_post_soft_reset,
7156 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7157 .set_powergating_state = gfx_v8_0_set_powergating_state,
7158 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7161 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7162 .type = AMDGPU_RING_TYPE_GFX,
7163 .align_mask = 0xff,
7164 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7165 .support_64bit_ptrs = false,
7166 .get_rptr = gfx_v8_0_ring_get_rptr,
7167 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7168 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7169 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7170 5 + /* COND_EXEC */
7171 7 + /* PIPELINE_SYNC */
7172 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7173 12 + /* FENCE for VM_FLUSH */
7174 20 + /* GDS switch */
7175 4 + /* double SWITCH_BUFFER,
7176 the first COND_EXEC jump to the place just
7177 prior to this double SWITCH_BUFFER */
7178 5 + /* COND_EXEC */
7179 7 + /* HDP_flush */
7180 4 + /* VGT_flush */
7181 14 + /* CE_META */
7182 31 + /* DE_META */
7183 3 + /* CNTX_CTRL */
7184 5 + /* HDP_INVL */
7185 12 + 12 + /* FENCE x2 */
7186 2, /* SWITCH_BUFFER */
7187 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7188 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7189 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7190 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7191 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7192 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7193 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7194 .test_ring = gfx_v8_0_ring_test_ring,
7195 .test_ib = gfx_v8_0_ring_test_ib,
7196 .insert_nop = amdgpu_ring_insert_nop,
7197 .pad_ib = amdgpu_ring_generic_pad_ib,
7198 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7199 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7200 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7201 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7202 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7205 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7206 .type = AMDGPU_RING_TYPE_COMPUTE,
7207 .align_mask = 0xff,
7208 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7209 .support_64bit_ptrs = false,
7210 .get_rptr = gfx_v8_0_ring_get_rptr,
7211 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7212 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7213 .emit_frame_size =
7214 20 + /* gfx_v8_0_ring_emit_gds_switch */
7215 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7216 5 + /* hdp_invalidate */
7217 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7218 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7219 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7220 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7221 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7222 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7223 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7224 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7225 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7226 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7227 .test_ring = gfx_v8_0_ring_test_ring,
7228 .test_ib = gfx_v8_0_ring_test_ib,
7229 .insert_nop = amdgpu_ring_insert_nop,
7230 .pad_ib = amdgpu_ring_generic_pad_ib,
7231 .set_priority = gfx_v8_0_ring_set_priority_compute,
7232 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7235 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7236 .type = AMDGPU_RING_TYPE_KIQ,
7237 .align_mask = 0xff,
7238 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7239 .support_64bit_ptrs = false,
7240 .get_rptr = gfx_v8_0_ring_get_rptr,
7241 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7242 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7243 .emit_frame_size =
7244 20 + /* gfx_v8_0_ring_emit_gds_switch */
7245 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7246 5 + /* hdp_invalidate */
7247 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7248 17 + /* gfx_v8_0_ring_emit_vm_flush */
7249 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7250 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7251 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7252 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7253 .test_ring = gfx_v8_0_ring_test_ring,
7254 .test_ib = gfx_v8_0_ring_test_ib,
7255 .insert_nop = amdgpu_ring_insert_nop,
7256 .pad_ib = amdgpu_ring_generic_pad_ib,
7257 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7258 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7261 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7263 int i;
7265 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7267 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7268 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7270 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7271 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7274 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7275 .set = gfx_v8_0_set_eop_interrupt_state,
7276 .process = gfx_v8_0_eop_irq,
7279 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7280 .set = gfx_v8_0_set_priv_reg_fault_state,
7281 .process = gfx_v8_0_priv_reg_irq,
7284 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7285 .set = gfx_v8_0_set_priv_inst_fault_state,
7286 .process = gfx_v8_0_priv_inst_irq,
7289 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7290 .set = gfx_v8_0_kiq_set_interrupt_state,
7291 .process = gfx_v8_0_kiq_irq,
7294 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7295 .set = gfx_v8_0_set_cp_ecc_int_state,
7296 .process = gfx_v8_0_cp_ecc_error_irq,
7299 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7300 .set = gfx_v8_0_set_sq_int_state,
7301 .process = gfx_v8_0_sq_irq,
7304 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7306 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7307 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7309 adev->gfx.priv_reg_irq.num_types = 1;
7310 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7312 adev->gfx.priv_inst_irq.num_types = 1;
7313 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7315 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7316 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7318 adev->gfx.cp_ecc_error_irq.num_types = 1;
7319 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7321 adev->gfx.sq_irq.num_types = 1;
7322 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7325 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7327 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7330 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7332 /* init asci gds info */
7333 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7334 adev->gds.gws.total_size = 64;
7335 adev->gds.oa.total_size = 16;
7337 if (adev->gds.mem.total_size == 64 * 1024) {
7338 adev->gds.mem.gfx_partition_size = 4096;
7339 adev->gds.mem.cs_partition_size = 4096;
7341 adev->gds.gws.gfx_partition_size = 4;
7342 adev->gds.gws.cs_partition_size = 4;
7344 adev->gds.oa.gfx_partition_size = 4;
7345 adev->gds.oa.cs_partition_size = 1;
7346 } else {
7347 adev->gds.mem.gfx_partition_size = 1024;
7348 adev->gds.mem.cs_partition_size = 1024;
7350 adev->gds.gws.gfx_partition_size = 16;
7351 adev->gds.gws.cs_partition_size = 16;
7353 adev->gds.oa.gfx_partition_size = 4;
7354 adev->gds.oa.cs_partition_size = 4;
7358 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7359 u32 bitmap)
7361 u32 data;
7363 if (!bitmap)
7364 return;
7366 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7367 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7369 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7372 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7374 u32 data, mask;
7376 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7377 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7379 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7381 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7384 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7386 int i, j, k, counter, active_cu_number = 0;
7387 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7388 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7389 unsigned disable_masks[4 * 2];
7390 u32 ao_cu_num;
7392 memset(cu_info, 0, sizeof(*cu_info));
7394 if (adev->flags & AMD_IS_APU)
7395 ao_cu_num = 2;
7396 else
7397 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7399 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7401 mutex_lock(&adev->grbm_idx_mutex);
7402 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7403 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7404 mask = 1;
7405 ao_bitmap = 0;
7406 counter = 0;
7407 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7408 if (i < 4 && j < 2)
7409 gfx_v8_0_set_user_cu_inactive_bitmap(
7410 adev, disable_masks[i * 2 + j]);
7411 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7412 cu_info->bitmap[i][j] = bitmap;
7414 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7415 if (bitmap & mask) {
7416 if (counter < ao_cu_num)
7417 ao_bitmap |= mask;
7418 counter ++;
7420 mask <<= 1;
7422 active_cu_number += counter;
7423 if (i < 2 && j < 2)
7424 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7425 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7428 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7429 mutex_unlock(&adev->grbm_idx_mutex);
7431 cu_info->number = active_cu_number;
7432 cu_info->ao_cu_mask = ao_cu_mask;
7433 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7434 cu_info->max_waves_per_simd = 10;
7435 cu_info->max_scratch_slots_per_cu = 32;
7436 cu_info->wave_front_size = 64;
7437 cu_info->lds_size = 64;
7440 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7442 .type = AMD_IP_BLOCK_TYPE_GFX,
7443 .major = 8,
7444 .minor = 0,
7445 .rev = 0,
7446 .funcs = &gfx_v8_0_ip_funcs,
7449 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7451 .type = AMD_IP_BLOCK_TYPE_GFX,
7452 .major = 8,
7453 .minor = 1,
7454 .rev = 0,
7455 .funcs = &gfx_v8_0_ip_funcs,
7458 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7460 uint64_t ce_payload_addr;
7461 int cnt_ce;
7462 union {
7463 struct vi_ce_ib_state regular;
7464 struct vi_ce_ib_state_chained_ib chained;
7465 } ce_payload = {};
7467 if (ring->adev->virt.chained_ib_support) {
7468 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7469 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7470 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7471 } else {
7472 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7473 offsetof(struct vi_gfx_meta_data, ce_payload);
7474 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7477 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7478 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7479 WRITE_DATA_DST_SEL(8) |
7480 WR_CONFIRM) |
7481 WRITE_DATA_CACHE_POLICY(0));
7482 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7483 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7484 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7487 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7489 uint64_t de_payload_addr, gds_addr, csa_addr;
7490 int cnt_de;
7491 union {
7492 struct vi_de_ib_state regular;
7493 struct vi_de_ib_state_chained_ib chained;
7494 } de_payload = {};
7496 csa_addr = amdgpu_csa_vaddr(ring->adev);
7497 gds_addr = csa_addr + 4096;
7498 if (ring->adev->virt.chained_ib_support) {
7499 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7500 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7501 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7502 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7503 } else {
7504 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7505 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7506 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7507 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7510 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7511 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7512 WRITE_DATA_DST_SEL(8) |
7513 WR_CONFIRM) |
7514 WRITE_DATA_CACHE_POLICY(0));
7515 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7516 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7517 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);