2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
30 #include "vega10/soc15ip.h"
31 #include "vega10/GC/gc_9_0_offset.h"
32 #include "vega10/GC/gc_9_0_sh_mask.h"
33 #include "vega10/vega10_enum.h"
34 #include "vega10/HDP/hdp_4_0_offset.h"
36 #include "soc15_common.h"
37 #include "clearstate_gfx9.h"
38 #include "v9_structs.h"
40 #define GFX9_NUM_GFX_RINGS 1
41 #define GFX9_NUM_COMPUTE_RINGS 8
42 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
44 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
45 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
46 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
47 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
48 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
49 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
51 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset
[] =
53 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID0_SIZE
),
54 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID0
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID0
)},
55 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID1_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID1_SIZE
),
56 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID1
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID1
)},
57 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID2_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID2_SIZE
),
58 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID2
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID2
)},
59 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID3_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID3_SIZE
),
60 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID3
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID3
)},
61 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID4_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID4_SIZE
),
62 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID4
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID4
)},
63 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID5_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID5_SIZE
),
64 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID5
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID5
)},
65 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID6_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID6_SIZE
),
66 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID6
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID6
)},
67 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID7_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID7_SIZE
),
68 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID7
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID7
)},
69 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID8_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID8_SIZE
),
70 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID8
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID8
)},
71 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID9_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID9_SIZE
),
72 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID9
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID9
)},
73 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID10_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID10_SIZE
),
74 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID10
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID10
)},
75 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID11_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID11_SIZE
),
76 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID11
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID11
)},
77 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID12_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID12_SIZE
),
78 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID12
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID12
)},
79 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID13_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID13_SIZE
),
80 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID13
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID13
)},
81 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID14_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID14_SIZE
),
82 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID14
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID14
)},
83 {SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID15_BASE
), SOC15_REG_OFFSET(GC
, 0, mmGDS_VMID15_SIZE
),
84 SOC15_REG_OFFSET(GC
, 0, mmGDS_GWS_VMID15
), SOC15_REG_OFFSET(GC
, 0, mmGDS_OA_VMID15
)}
87 static const u32 golden_settings_gc_9_0
[] =
89 SOC15_REG_OFFSET(GC
, 0, mmDB_DEBUG2
), 0xf00ffeff, 0x00000400,
90 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_BINNER_EVENT_CNTL_3
), 0x00000003, 0x82400024,
91 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_ENHANCE
), 0x3fffffff, 0x00000001,
92 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_LINE_STIPPLE_STATE
), 0x0000ff0f, 0x00000000,
93 SOC15_REG_OFFSET(GC
, 0, mmTA_CNTL_AUX
), 0xfffffeef, 0x010b0000,
94 SOC15_REG_OFFSET(GC
, 0, mmTCP_CHAN_STEER_HI
), 0xffffffff, 0x4a2c0e68,
95 SOC15_REG_OFFSET(GC
, 0, mmTCP_CHAN_STEER_LO
), 0xffffffff, 0xb5d3f197,
96 SOC15_REG_OFFSET(GC
, 0, mmVGT_GS_MAX_WAVE_ID
), 0x00000fff, 0x000003ff
99 static const u32 golden_settings_gc_9_0_vg10
[] =
101 SOC15_REG_OFFSET(GC
, 0, mmCB_HW_CONTROL
), 0x0000f000, 0x00012107,
102 SOC15_REG_OFFSET(GC
, 0, mmCB_HW_CONTROL_3
), 0x30000000, 0x10000000,
103 SOC15_REG_OFFSET(GC
, 0, mmGB_ADDR_CONFIG
), 0xffff77ff, 0x2a114042,
104 SOC15_REG_OFFSET(GC
, 0, mmGB_ADDR_CONFIG_READ
), 0xffff77ff, 0x2a114042,
105 SOC15_REG_OFFSET(GC
, 0, mmPA_SC_ENHANCE_1
), 0x00008000, 0x00048000,
106 SOC15_REG_OFFSET(GC
, 0, mmRMI_UTCL1_CNTL2
), 0x00030000, 0x00020000,
107 SOC15_REG_OFFSET(GC
, 0, mmTD_CNTL
), 0x00001800, 0x00000800,
108 SOC15_REG_OFFSET(GC
, 0, mmSPI_CONFIG_CNTL_1
),0x0000000f, 0x00000007
111 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
113 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
);
114 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
);
115 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
);
116 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
);
117 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
118 struct amdgpu_cu_info
*cu_info
);
119 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
);
120 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
);
122 static void gfx_v9_0_init_golden_registers(struct amdgpu_device
*adev
)
124 switch (adev
->asic_type
) {
126 amdgpu_program_register_sequence(adev
,
127 golden_settings_gc_9_0
,
128 (const u32
)ARRAY_SIZE(golden_settings_gc_9_0
));
129 amdgpu_program_register_sequence(adev
,
130 golden_settings_gc_9_0_vg10
,
131 (const u32
)ARRAY_SIZE(golden_settings_gc_9_0_vg10
));
138 static void gfx_v9_0_scratch_init(struct amdgpu_device
*adev
)
140 adev
->gfx
.scratch
.num_reg
= 7;
141 adev
->gfx
.scratch
.reg_base
= SOC15_REG_OFFSET(GC
, 0, mmSCRATCH_REG0
);
142 adev
->gfx
.scratch
.free_mask
= (1u << adev
->gfx
.scratch
.num_reg
) - 1;
145 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring
*ring
, int eng_sel
,
146 bool wc
, uint32_t reg
, uint32_t val
)
148 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
149 amdgpu_ring_write(ring
, WRITE_DATA_ENGINE_SEL(eng_sel
) |
150 WRITE_DATA_DST_SEL(0) |
151 (wc
? WR_CONFIRM
: 0));
152 amdgpu_ring_write(ring
, reg
);
153 amdgpu_ring_write(ring
, 0);
154 amdgpu_ring_write(ring
, val
);
157 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring
*ring
, int eng_sel
,
158 int mem_space
, int opt
, uint32_t addr0
,
159 uint32_t addr1
, uint32_t ref
, uint32_t mask
,
162 amdgpu_ring_write(ring
, PACKET3(PACKET3_WAIT_REG_MEM
, 5));
163 amdgpu_ring_write(ring
,
164 /* memory (1) or register (0) */
165 (WAIT_REG_MEM_MEM_SPACE(mem_space
) |
166 WAIT_REG_MEM_OPERATION(opt
) | /* wait */
167 WAIT_REG_MEM_FUNCTION(3) | /* equal */
168 WAIT_REG_MEM_ENGINE(eng_sel
)));
171 BUG_ON(addr0
& 0x3); /* Dword align */
172 amdgpu_ring_write(ring
, addr0
);
173 amdgpu_ring_write(ring
, addr1
);
174 amdgpu_ring_write(ring
, ref
);
175 amdgpu_ring_write(ring
, mask
);
176 amdgpu_ring_write(ring
, inv
); /* poll interval */
179 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring
*ring
)
181 struct amdgpu_device
*adev
= ring
->adev
;
187 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
189 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r
);
192 WREG32(scratch
, 0xCAFEDEAD);
193 r
= amdgpu_ring_alloc(ring
, 3);
195 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
197 amdgpu_gfx_scratch_free(adev
, scratch
);
200 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
201 amdgpu_ring_write(ring
, (scratch
- PACKET3_SET_UCONFIG_REG_START
));
202 amdgpu_ring_write(ring
, 0xDEADBEEF);
203 amdgpu_ring_commit(ring
);
205 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
206 tmp
= RREG32(scratch
);
207 if (tmp
== 0xDEADBEEF)
211 if (i
< adev
->usec_timeout
) {
212 DRM_INFO("ring test on %d succeeded in %d usecs\n",
215 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
216 ring
->idx
, scratch
, tmp
);
219 amdgpu_gfx_scratch_free(adev
, scratch
);
223 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
225 struct amdgpu_device
*adev
= ring
->adev
;
227 struct dma_fence
*f
= NULL
;
232 r
= amdgpu_gfx_scratch_get(adev
, &scratch
);
234 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r
);
237 WREG32(scratch
, 0xCAFEDEAD);
238 memset(&ib
, 0, sizeof(ib
));
239 r
= amdgpu_ib_get(adev
, NULL
, 256, &ib
);
241 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r
);
244 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
245 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
));
246 ib
.ptr
[2] = 0xDEADBEEF;
249 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
253 r
= dma_fence_wait_timeout(f
, false, timeout
);
255 DRM_ERROR("amdgpu: IB test timed out.\n");
259 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
262 tmp
= RREG32(scratch
);
263 if (tmp
== 0xDEADBEEF) {
264 DRM_INFO("ib test on ring %d succeeded\n", ring
->idx
);
267 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
272 amdgpu_ib_free(adev
, &ib
, NULL
);
275 amdgpu_gfx_scratch_free(adev
, scratch
);
279 static int gfx_v9_0_init_microcode(struct amdgpu_device
*adev
)
281 const char *chip_name
;
284 struct amdgpu_firmware_info
*info
= NULL
;
285 const struct common_firmware_header
*header
= NULL
;
286 const struct gfx_firmware_header_v1_0
*cp_hdr
;
290 switch (adev
->asic_type
) {
292 chip_name
= "vega10";
298 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_pfp.bin", chip_name
);
299 err
= request_firmware(&adev
->gfx
.pfp_fw
, fw_name
, adev
->dev
);
302 err
= amdgpu_ucode_validate(adev
->gfx
.pfp_fw
);
305 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.pfp_fw
->data
;
306 adev
->gfx
.pfp_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
307 adev
->gfx
.pfp_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
309 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_me.bin", chip_name
);
310 err
= request_firmware(&adev
->gfx
.me_fw
, fw_name
, adev
->dev
);
313 err
= amdgpu_ucode_validate(adev
->gfx
.me_fw
);
316 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.me_fw
->data
;
317 adev
->gfx
.me_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
318 adev
->gfx
.me_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
320 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_ce.bin", chip_name
);
321 err
= request_firmware(&adev
->gfx
.ce_fw
, fw_name
, adev
->dev
);
324 err
= amdgpu_ucode_validate(adev
->gfx
.ce_fw
);
327 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.ce_fw
->data
;
328 adev
->gfx
.ce_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
329 adev
->gfx
.ce_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
331 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_rlc.bin", chip_name
);
332 err
= request_firmware(&adev
->gfx
.rlc_fw
, fw_name
, adev
->dev
);
335 err
= amdgpu_ucode_validate(adev
->gfx
.rlc_fw
);
336 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.rlc_fw
->data
;
337 adev
->gfx
.rlc_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
338 adev
->gfx
.rlc_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
340 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec.bin", chip_name
);
341 err
= request_firmware(&adev
->gfx
.mec_fw
, fw_name
, adev
->dev
);
344 err
= amdgpu_ucode_validate(adev
->gfx
.mec_fw
);
347 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
348 adev
->gfx
.mec_fw_version
= le32_to_cpu(cp_hdr
->header
.ucode_version
);
349 adev
->gfx
.mec_feature_version
= le32_to_cpu(cp_hdr
->ucode_feature_version
);
352 snprintf(fw_name
, sizeof(fw_name
), "amdgpu/%s_mec2.bin", chip_name
);
353 err
= request_firmware(&adev
->gfx
.mec2_fw
, fw_name
, adev
->dev
);
355 err
= amdgpu_ucode_validate(adev
->gfx
.mec2_fw
);
358 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)
359 adev
->gfx
.mec2_fw
->data
;
360 adev
->gfx
.mec2_fw_version
=
361 le32_to_cpu(cp_hdr
->header
.ucode_version
);
362 adev
->gfx
.mec2_feature_version
=
363 le32_to_cpu(cp_hdr
->ucode_feature_version
);
366 adev
->gfx
.mec2_fw
= NULL
;
369 if (adev
->firmware
.load_type
== AMDGPU_FW_LOAD_PSP
) {
370 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_PFP
];
371 info
->ucode_id
= AMDGPU_UCODE_ID_CP_PFP
;
372 info
->fw
= adev
->gfx
.pfp_fw
;
373 header
= (const struct common_firmware_header
*)info
->fw
->data
;
374 adev
->firmware
.fw_size
+=
375 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
377 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_ME
];
378 info
->ucode_id
= AMDGPU_UCODE_ID_CP_ME
;
379 info
->fw
= adev
->gfx
.me_fw
;
380 header
= (const struct common_firmware_header
*)info
->fw
->data
;
381 adev
->firmware
.fw_size
+=
382 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
384 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_CE
];
385 info
->ucode_id
= AMDGPU_UCODE_ID_CP_CE
;
386 info
->fw
= adev
->gfx
.ce_fw
;
387 header
= (const struct common_firmware_header
*)info
->fw
->data
;
388 adev
->firmware
.fw_size
+=
389 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
391 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_RLC_G
];
392 info
->ucode_id
= AMDGPU_UCODE_ID_RLC_G
;
393 info
->fw
= adev
->gfx
.rlc_fw
;
394 header
= (const struct common_firmware_header
*)info
->fw
->data
;
395 adev
->firmware
.fw_size
+=
396 ALIGN(le32_to_cpu(header
->ucode_size_bytes
), PAGE_SIZE
);
398 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1
];
399 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1
;
400 info
->fw
= adev
->gfx
.mec_fw
;
401 header
= (const struct common_firmware_header
*)info
->fw
->data
;
402 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
403 adev
->firmware
.fw_size
+=
404 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
406 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC1_JT
];
407 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC1_JT
;
408 info
->fw
= adev
->gfx
.mec_fw
;
409 adev
->firmware
.fw_size
+=
410 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
412 if (adev
->gfx
.mec2_fw
) {
413 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2
];
414 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2
;
415 info
->fw
= adev
->gfx
.mec2_fw
;
416 header
= (const struct common_firmware_header
*)info
->fw
->data
;
417 cp_hdr
= (const struct gfx_firmware_header_v1_0
*)info
->fw
->data
;
418 adev
->firmware
.fw_size
+=
419 ALIGN(le32_to_cpu(header
->ucode_size_bytes
) - le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
420 info
= &adev
->firmware
.ucode
[AMDGPU_UCODE_ID_CP_MEC2_JT
];
421 info
->ucode_id
= AMDGPU_UCODE_ID_CP_MEC2_JT
;
422 info
->fw
= adev
->gfx
.mec2_fw
;
423 adev
->firmware
.fw_size
+=
424 ALIGN(le32_to_cpu(cp_hdr
->jt_size
) * 4, PAGE_SIZE
);
432 "gfx9: Failed to load firmware \"%s\"\n",
434 release_firmware(adev
->gfx
.pfp_fw
);
435 adev
->gfx
.pfp_fw
= NULL
;
436 release_firmware(adev
->gfx
.me_fw
);
437 adev
->gfx
.me_fw
= NULL
;
438 release_firmware(adev
->gfx
.ce_fw
);
439 adev
->gfx
.ce_fw
= NULL
;
440 release_firmware(adev
->gfx
.rlc_fw
);
441 adev
->gfx
.rlc_fw
= NULL
;
442 release_firmware(adev
->gfx
.mec_fw
);
443 adev
->gfx
.mec_fw
= NULL
;
444 release_firmware(adev
->gfx
.mec2_fw
);
445 adev
->gfx
.mec2_fw
= NULL
;
450 static void gfx_v9_0_mec_fini(struct amdgpu_device
*adev
)
454 if (adev
->gfx
.mec
.hpd_eop_obj
) {
455 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.hpd_eop_obj
, true);
456 if (unlikely(r
!= 0))
457 dev_warn(adev
->dev
, "(%d) reserve HPD EOP bo failed\n", r
);
458 amdgpu_bo_unpin(adev
->gfx
.mec
.hpd_eop_obj
);
459 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
461 amdgpu_bo_unref(&adev
->gfx
.mec
.hpd_eop_obj
);
462 adev
->gfx
.mec
.hpd_eop_obj
= NULL
;
464 if (adev
->gfx
.mec
.mec_fw_obj
) {
465 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.mec_fw_obj
, true);
466 if (unlikely(r
!= 0))
467 dev_warn(adev
->dev
, "(%d) reserve mec firmware bo failed\n", r
);
468 amdgpu_bo_unpin(adev
->gfx
.mec
.mec_fw_obj
);
469 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
471 amdgpu_bo_unref(&adev
->gfx
.mec
.mec_fw_obj
);
472 adev
->gfx
.mec
.mec_fw_obj
= NULL
;
476 #define MEC_HPD_SIZE 2048
478 static int gfx_v9_0_mec_init(struct amdgpu_device
*adev
)
482 const __le32
*fw_data
;
486 const struct gfx_firmware_header_v1_0
*mec_hdr
;
489 * we assign only 1 pipe because all other pipes will
492 adev
->gfx
.mec
.num_mec
= 1;
493 adev
->gfx
.mec
.num_pipe
= 1;
494 adev
->gfx
.mec
.num_queue
= adev
->gfx
.mec
.num_mec
* adev
->gfx
.mec
.num_pipe
* 8;
496 if (adev
->gfx
.mec
.hpd_eop_obj
== NULL
) {
497 r
= amdgpu_bo_create(adev
,
498 adev
->gfx
.mec
.num_queue
* MEC_HPD_SIZE
,
500 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
, NULL
,
501 &adev
->gfx
.mec
.hpd_eop_obj
);
503 dev_warn(adev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
508 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.hpd_eop_obj
, false);
509 if (unlikely(r
!= 0)) {
510 gfx_v9_0_mec_fini(adev
);
513 r
= amdgpu_bo_pin(adev
->gfx
.mec
.hpd_eop_obj
, AMDGPU_GEM_DOMAIN_GTT
,
514 &adev
->gfx
.mec
.hpd_eop_gpu_addr
);
516 dev_warn(adev
->dev
, "(%d) pin HDP EOP bo failed\n", r
);
517 gfx_v9_0_mec_fini(adev
);
520 r
= amdgpu_bo_kmap(adev
->gfx
.mec
.hpd_eop_obj
, (void **)&hpd
);
522 dev_warn(adev
->dev
, "(%d) map HDP EOP bo failed\n", r
);
523 gfx_v9_0_mec_fini(adev
);
527 memset(hpd
, 0, adev
->gfx
.mec
.hpd_eop_obj
->tbo
.mem
.size
);
529 amdgpu_bo_kunmap(adev
->gfx
.mec
.hpd_eop_obj
);
530 amdgpu_bo_unreserve(adev
->gfx
.mec
.hpd_eop_obj
);
532 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
534 fw_data
= (const __le32
*)
535 (adev
->gfx
.mec_fw
->data
+
536 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
537 fw_size
= le32_to_cpu(mec_hdr
->header
.ucode_size_bytes
) / 4;
539 if (adev
->gfx
.mec
.mec_fw_obj
== NULL
) {
540 r
= amdgpu_bo_create(adev
,
541 mec_hdr
->header
.ucode_size_bytes
,
543 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
, NULL
,
544 &adev
->gfx
.mec
.mec_fw_obj
);
546 dev_warn(adev
->dev
, "(%d) create mec firmware bo failed\n", r
);
551 r
= amdgpu_bo_reserve(adev
->gfx
.mec
.mec_fw_obj
, false);
552 if (unlikely(r
!= 0)) {
553 gfx_v9_0_mec_fini(adev
);
556 r
= amdgpu_bo_pin(adev
->gfx
.mec
.mec_fw_obj
, AMDGPU_GEM_DOMAIN_GTT
,
557 &adev
->gfx
.mec
.mec_fw_gpu_addr
);
559 dev_warn(adev
->dev
, "(%d) pin mec firmware bo failed\n", r
);
560 gfx_v9_0_mec_fini(adev
);
563 r
= amdgpu_bo_kmap(adev
->gfx
.mec
.mec_fw_obj
, (void **)&fw
);
565 dev_warn(adev
->dev
, "(%d) map firmware bo failed\n", r
);
566 gfx_v9_0_mec_fini(adev
);
569 memcpy(fw
, fw_data
, fw_size
);
571 amdgpu_bo_kunmap(adev
->gfx
.mec
.mec_fw_obj
);
572 amdgpu_bo_unreserve(adev
->gfx
.mec
.mec_fw_obj
);
578 static void gfx_v9_0_kiq_fini(struct amdgpu_device
*adev
)
580 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
582 amdgpu_bo_free_kernel(&kiq
->eop_obj
, &kiq
->eop_gpu_addr
, NULL
);
585 static int gfx_v9_0_kiq_init(struct amdgpu_device
*adev
)
589 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
591 r
= amdgpu_bo_create_kernel(adev
, MEC_HPD_SIZE
, PAGE_SIZE
,
592 AMDGPU_GEM_DOMAIN_GTT
, &kiq
->eop_obj
,
593 &kiq
->eop_gpu_addr
, (void **)&hpd
);
595 dev_warn(adev
->dev
, "failed to create KIQ bo (%d).\n", r
);
599 memset(hpd
, 0, MEC_HPD_SIZE
);
601 r
= amdgpu_bo_reserve(kiq
->eop_obj
, true);
602 if (unlikely(r
!= 0))
603 dev_warn(adev
->dev
, "(%d) reserve kiq eop bo failed\n", r
);
604 amdgpu_bo_kunmap(kiq
->eop_obj
);
605 amdgpu_bo_unreserve(kiq
->eop_obj
);
610 static int gfx_v9_0_kiq_init_ring(struct amdgpu_device
*adev
,
611 struct amdgpu_ring
*ring
,
612 struct amdgpu_irq_src
*irq
)
614 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
617 r
= amdgpu_wb_get(adev
, &adev
->virt
.reg_val_offs
);
622 ring
->ring_obj
= NULL
;
623 ring
->use_doorbell
= true;
624 ring
->doorbell_index
= AMDGPU_DOORBELL_KIQ
;
625 if (adev
->gfx
.mec2_fw
) {
634 ring
->eop_gpu_addr
= kiq
->eop_gpu_addr
;
635 sprintf(ring
->name
, "kiq %d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
636 r
= amdgpu_ring_init(adev
, ring
, 1024,
637 irq
, AMDGPU_CP_KIQ_IRQ_DRIVER0
);
639 dev_warn(adev
->dev
, "(%d) failed to init kiq ring\n", r
);
643 static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring
*ring
,
644 struct amdgpu_irq_src
*irq
)
646 amdgpu_wb_free(ring
->adev
, ring
->adev
->virt
.reg_val_offs
);
647 amdgpu_ring_fini(ring
);
650 /* create MQD for each compute queue */
651 static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device
*adev
)
653 struct amdgpu_ring
*ring
= NULL
;
656 /* create MQD for KIQ */
657 ring
= &adev
->gfx
.kiq
.ring
;
658 if (!ring
->mqd_obj
) {
659 r
= amdgpu_bo_create_kernel(adev
, sizeof(struct v9_mqd
), PAGE_SIZE
,
660 AMDGPU_GEM_DOMAIN_GTT
, &ring
->mqd_obj
,
661 &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
663 dev_warn(adev
->dev
, "failed to create ring mqd ob (%d)", r
);
667 /*TODO: prepare MQD backup */
670 /* create MQD for each KCQ */
671 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
672 ring
= &adev
->gfx
.compute_ring
[i
];
673 if (!ring
->mqd_obj
) {
674 r
= amdgpu_bo_create_kernel(adev
, sizeof(struct v9_mqd
), PAGE_SIZE
,
675 AMDGPU_GEM_DOMAIN_GTT
, &ring
->mqd_obj
,
676 &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
678 dev_warn(adev
->dev
, "failed to create ring mqd ob (%d)", r
);
682 /* TODO: prepare MQD backup */
689 static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device
*adev
)
691 struct amdgpu_ring
*ring
= NULL
;
694 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
695 ring
= &adev
->gfx
.compute_ring
[i
];
696 amdgpu_bo_free_kernel(&ring
->mqd_obj
, &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
699 ring
= &adev
->gfx
.kiq
.ring
;
700 amdgpu_bo_free_kernel(&ring
->mqd_obj
, &ring
->mqd_gpu_addr
, (void **)&ring
->mqd_ptr
);
703 static uint32_t wave_read_ind(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t address
)
705 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
706 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
707 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
708 (address
<< SQ_IND_INDEX__INDEX__SHIFT
) |
709 (SQ_IND_INDEX__FORCE_READ_MASK
));
710 return RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
713 static void wave_read_regs(struct amdgpu_device
*adev
, uint32_t simd
,
714 uint32_t wave
, uint32_t thread
,
715 uint32_t regno
, uint32_t num
, uint32_t *out
)
717 WREG32_SOC15(GC
, 0, mmSQ_IND_INDEX
,
718 (wave
<< SQ_IND_INDEX__WAVE_ID__SHIFT
) |
719 (simd
<< SQ_IND_INDEX__SIMD_ID__SHIFT
) |
720 (regno
<< SQ_IND_INDEX__INDEX__SHIFT
) |
721 (thread
<< SQ_IND_INDEX__THREAD_ID__SHIFT
) |
722 (SQ_IND_INDEX__FORCE_READ_MASK
) |
723 (SQ_IND_INDEX__AUTO_INCR_MASK
));
725 *(out
++) = RREG32_SOC15(GC
, 0, mmSQ_IND_DATA
);
728 static void gfx_v9_0_read_wave_data(struct amdgpu_device
*adev
, uint32_t simd
, uint32_t wave
, uint32_t *dst
, int *no_fields
)
730 /* type 1 wave data */
731 dst
[(*no_fields
)++] = 1;
732 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_STATUS
);
733 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_LO
);
734 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_PC_HI
);
735 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_LO
);
736 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_EXEC_HI
);
737 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_HW_ID
);
738 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW0
);
739 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_INST_DW1
);
740 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_GPR_ALLOC
);
741 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_LDS_ALLOC
);
742 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_TRAPSTS
);
743 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_STS
);
744 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_IB_DBG0
);
745 dst
[(*no_fields
)++] = wave_read_ind(adev
, simd
, wave
, ixSQ_WAVE_M0
);
748 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device
*adev
, uint32_t simd
,
749 uint32_t wave
, uint32_t start
,
750 uint32_t size
, uint32_t *dst
)
754 start
+ SQIND_WAVE_SGPRS_OFFSET
, size
, dst
);
758 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs
= {
759 .get_gpu_clock_counter
= &gfx_v9_0_get_gpu_clock_counter
,
760 .select_se_sh
= &gfx_v9_0_select_se_sh
,
761 .read_wave_data
= &gfx_v9_0_read_wave_data
,
762 .read_wave_sgprs
= &gfx_v9_0_read_wave_sgprs
,
765 static void gfx_v9_0_gpu_early_init(struct amdgpu_device
*adev
)
769 adev
->gfx
.funcs
= &gfx_v9_0_gfx_funcs
;
771 switch (adev
->asic_type
) {
773 adev
->gfx
.config
.max_shader_engines
= 4;
774 adev
->gfx
.config
.max_cu_per_sh
= 16;
775 adev
->gfx
.config
.max_sh_per_se
= 1;
776 adev
->gfx
.config
.max_backends_per_se
= 4;
777 adev
->gfx
.config
.max_texture_channel_caches
= 16;
778 adev
->gfx
.config
.max_gprs
= 256;
779 adev
->gfx
.config
.max_gs_threads
= 32;
780 adev
->gfx
.config
.max_hw_contexts
= 8;
782 adev
->gfx
.config
.sc_prim_fifo_size_frontend
= 0x20;
783 adev
->gfx
.config
.sc_prim_fifo_size_backend
= 0x100;
784 adev
->gfx
.config
.sc_hiz_tile_fifo_size
= 0x30;
785 adev
->gfx
.config
.sc_earlyz_tile_fifo_size
= 0x4C0;
786 adev
->gfx
.config
.gs_vgt_table_depth
= 32;
787 adev
->gfx
.config
.gs_prim_buffer_depth
= 1792;
788 gb_addr_config
= VEGA10_GB_ADDR_CONFIG_GOLDEN
;
795 adev
->gfx
.config
.gb_addr_config
= gb_addr_config
;
797 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
= 1 <<
799 adev
->gfx
.config
.gb_addr_config
,
803 adev
->gfx
.config
.max_tile_pipes
=
804 adev
->gfx
.config
.gb_addr_config_fields
.num_pipes
;
806 adev
->gfx
.config
.gb_addr_config_fields
.num_banks
= 1 <<
808 adev
->gfx
.config
.gb_addr_config
,
811 adev
->gfx
.config
.gb_addr_config_fields
.max_compress_frags
= 1 <<
813 adev
->gfx
.config
.gb_addr_config
,
815 MAX_COMPRESSED_FRAGS
);
816 adev
->gfx
.config
.gb_addr_config_fields
.num_rb_per_se
= 1 <<
818 adev
->gfx
.config
.gb_addr_config
,
821 adev
->gfx
.config
.gb_addr_config_fields
.num_se
= 1 <<
823 adev
->gfx
.config
.gb_addr_config
,
826 adev
->gfx
.config
.gb_addr_config_fields
.pipe_interleave_size
= 1 << (8 +
828 adev
->gfx
.config
.gb_addr_config
,
830 PIPE_INTERLEAVE_SIZE
));
833 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device
*adev
,
834 struct amdgpu_ngg_buf
*ngg_buf
,
841 dev_err(adev
->dev
, "Buffer size is invalid: %d\n", size_se
);
844 size_se
= size_se
? size_se
: default_size_se
;
846 ngg_buf
->size
= size_se
* adev
->gfx
.config
.max_shader_engines
;
847 r
= amdgpu_bo_create_kernel(adev
, ngg_buf
->size
,
848 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_VRAM
,
853 dev_err(adev
->dev
, "(%d) failed to create NGG buffer\n", r
);
856 ngg_buf
->bo_size
= amdgpu_bo_size(ngg_buf
->bo
);
861 static int gfx_v9_0_ngg_fini(struct amdgpu_device
*adev
)
865 for (i
= 0; i
< NGG_BUF_MAX
; i
++)
866 amdgpu_bo_free_kernel(&adev
->gfx
.ngg
.buf
[i
].bo
,
867 &adev
->gfx
.ngg
.buf
[i
].gpu_addr
,
870 memset(&adev
->gfx
.ngg
.buf
[0], 0,
871 sizeof(struct amdgpu_ngg_buf
) * NGG_BUF_MAX
);
873 adev
->gfx
.ngg
.init
= false;
878 static int gfx_v9_0_ngg_init(struct amdgpu_device
*adev
)
882 if (!amdgpu_ngg
|| adev
->gfx
.ngg
.init
== true)
885 /* GDS reserve memory: 64 bytes alignment */
886 adev
->gfx
.ngg
.gds_reserve_size
= ALIGN(5 * 4, 0x40);
887 adev
->gds
.mem
.total_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
888 adev
->gds
.mem
.gfx_partition_size
-= adev
->gfx
.ngg
.gds_reserve_size
;
889 adev
->gfx
.ngg
.gds_reserve_addr
= amdgpu_gds_reg_offset
[0].mem_base
;
890 adev
->gfx
.ngg
.gds_reserve_addr
+= adev
->gds
.mem
.gfx_partition_size
;
892 /* Primitive Buffer */
893 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_PRIM
],
894 amdgpu_prim_buf_per_se
,
897 dev_err(adev
->dev
, "Failed to create Primitive Buffer\n");
901 /* Position Buffer */
902 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_POS
],
903 amdgpu_pos_buf_per_se
,
906 dev_err(adev
->dev
, "Failed to create Position Buffer\n");
910 /* Control Sideband */
911 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_CNTL
],
912 amdgpu_cntl_sb_buf_per_se
,
915 dev_err(adev
->dev
, "Failed to create Control Sideband Buffer\n");
919 /* Parameter Cache, not created by default */
920 if (amdgpu_param_buf_per_se
<= 0)
923 r
= gfx_v9_0_ngg_create_buf(adev
, &adev
->gfx
.ngg
.buf
[NGG_PARAM
],
924 amdgpu_param_buf_per_se
,
927 dev_err(adev
->dev
, "Failed to create Parameter Cache\n");
932 adev
->gfx
.ngg
.init
= true;
935 gfx_v9_0_ngg_fini(adev
);
939 static int gfx_v9_0_ngg_en(struct amdgpu_device
*adev
)
941 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
950 /* Program buffer size */
952 size
= adev
->gfx
.ngg
.buf
[NGG_PRIM
].size
/ 256;
953 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_1
, INDEX_BUF_SIZE
, size
);
955 size
= adev
->gfx
.ngg
.buf
[NGG_POS
].size
/ 256;
956 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_1
, POS_BUF_SIZE
, size
);
958 WREG32_SOC15(GC
, 0, mmWD_BUF_RESOURCE_1
, data
);
961 size
= adev
->gfx
.ngg
.buf
[NGG_CNTL
].size
/ 256;
962 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_2
, CNTL_SB_BUF_SIZE
, size
);
964 size
= adev
->gfx
.ngg
.buf
[NGG_PARAM
].size
/ 1024;
965 data
= REG_SET_FIELD(data
, WD_BUF_RESOURCE_2
, PARAM_BUF_SIZE
, size
);
967 WREG32_SOC15(GC
, 0, mmWD_BUF_RESOURCE_2
, data
);
969 /* Program buffer base address */
970 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_PRIM
].gpu_addr
);
971 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE
, BASE
, base
);
972 WREG32_SOC15(GC
, 0, mmWD_INDEX_BUF_BASE
, data
);
974 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_PRIM
].gpu_addr
);
975 data
= REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI
, BASE_HI
, base
);
976 WREG32_SOC15(GC
, 0, mmWD_INDEX_BUF_BASE_HI
, data
);
978 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_POS
].gpu_addr
);
979 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE
, BASE
, base
);
980 WREG32_SOC15(GC
, 0, mmWD_POS_BUF_BASE
, data
);
982 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_POS
].gpu_addr
);
983 data
= REG_SET_FIELD(0, WD_POS_BUF_BASE_HI
, BASE_HI
, base
);
984 WREG32_SOC15(GC
, 0, mmWD_POS_BUF_BASE_HI
, data
);
986 base
= lower_32_bits(adev
->gfx
.ngg
.buf
[NGG_CNTL
].gpu_addr
);
987 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE
, BASE
, base
);
988 WREG32_SOC15(GC
, 0, mmWD_CNTL_SB_BUF_BASE
, data
);
990 base
= upper_32_bits(adev
->gfx
.ngg
.buf
[NGG_CNTL
].gpu_addr
);
991 data
= REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI
, BASE_HI
, base
);
992 WREG32_SOC15(GC
, 0, mmWD_CNTL_SB_BUF_BASE_HI
, data
);
994 /* Clear GDS reserved memory */
995 r
= amdgpu_ring_alloc(ring
, 17);
997 DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
1002 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1003 amdgpu_gds_reg_offset
[0].mem_size
,
1004 (adev
->gds
.mem
.total_size
+
1005 adev
->gfx
.ngg
.gds_reserve_size
) >>
1008 amdgpu_ring_write(ring
, PACKET3(PACKET3_DMA_DATA
, 5));
1009 amdgpu_ring_write(ring
, (PACKET3_DMA_DATA_CP_SYNC
|
1010 PACKET3_DMA_DATA_SRC_SEL(2)));
1011 amdgpu_ring_write(ring
, 0);
1012 amdgpu_ring_write(ring
, 0);
1013 amdgpu_ring_write(ring
, adev
->gfx
.ngg
.gds_reserve_addr
);
1014 amdgpu_ring_write(ring
, 0);
1015 amdgpu_ring_write(ring
, adev
->gfx
.ngg
.gds_reserve_size
);
1018 gfx_v9_0_write_data_to_reg(ring
, 0, false,
1019 amdgpu_gds_reg_offset
[0].mem_size
, 0);
1021 amdgpu_ring_commit(ring
);
1026 static int gfx_v9_0_sw_init(void *handle
)
1029 struct amdgpu_ring
*ring
;
1030 struct amdgpu_kiq
*kiq
;
1031 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1034 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 178, &adev
->gfx
.kiq
.irq
);
1039 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 181, &adev
->gfx
.eop_irq
);
1043 /* Privileged reg */
1044 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 184,
1045 &adev
->gfx
.priv_reg_irq
);
1049 /* Privileged inst */
1050 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_GRBM_CP
, 185,
1051 &adev
->gfx
.priv_inst_irq
);
1055 adev
->gfx
.gfx_current_status
= AMDGPU_GFX_NORMAL_MODE
;
1057 gfx_v9_0_scratch_init(adev
);
1059 r
= gfx_v9_0_init_microcode(adev
);
1061 DRM_ERROR("Failed to load gfx firmware!\n");
1065 r
= gfx_v9_0_mec_init(adev
);
1067 DRM_ERROR("Failed to init MEC BOs!\n");
1071 /* set up the gfx ring */
1072 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
1073 ring
= &adev
->gfx
.gfx_ring
[i
];
1074 ring
->ring_obj
= NULL
;
1075 sprintf(ring
->name
, "gfx");
1076 ring
->use_doorbell
= true;
1077 ring
->doorbell_index
= AMDGPU_DOORBELL64_GFX_RING0
<< 1;
1078 r
= amdgpu_ring_init(adev
, ring
, 1024,
1079 &adev
->gfx
.eop_irq
, AMDGPU_CP_IRQ_GFX_EOP
);
1084 /* set up the compute queues */
1085 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
1088 /* max 32 queues per MEC */
1089 if ((i
>= 32) || (i
>= AMDGPU_MAX_COMPUTE_RINGS
)) {
1090 DRM_ERROR("Too many (%d) compute rings!\n", i
);
1093 ring
= &adev
->gfx
.compute_ring
[i
];
1094 ring
->ring_obj
= NULL
;
1095 ring
->use_doorbell
= true;
1096 ring
->doorbell_index
= (AMDGPU_DOORBELL64_MEC_RING0
+ i
) << 1;
1097 ring
->me
= 1; /* first MEC */
1099 ring
->queue
= i
% 8;
1100 ring
->eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
+ (i
* MEC_HPD_SIZE
);
1101 sprintf(ring
->name
, "comp_%d.%d.%d", ring
->me
, ring
->pipe
, ring
->queue
);
1102 irq_type
= AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ring
->pipe
;
1103 /* type-2 packets are deprecated on MEC, use type-3 instead */
1104 r
= amdgpu_ring_init(adev
, ring
, 1024,
1105 &adev
->gfx
.eop_irq
, irq_type
);
1110 if (amdgpu_sriov_vf(adev
)) {
1111 r
= gfx_v9_0_kiq_init(adev
);
1113 DRM_ERROR("Failed to init KIQ BOs!\n");
1117 kiq
= &adev
->gfx
.kiq
;
1118 r
= gfx_v9_0_kiq_init_ring(adev
, &kiq
->ring
, &kiq
->irq
);
1122 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1123 r
= gfx_v9_0_compute_mqd_sw_init(adev
);
1128 /* reserve GDS, GWS and OA resource for gfx */
1129 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.mem
.gfx_partition_size
,
1130 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GDS
,
1131 &adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
1135 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.gws
.gfx_partition_size
,
1136 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_GWS
,
1137 &adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
1141 r
= amdgpu_bo_create_kernel(adev
, adev
->gds
.oa
.gfx_partition_size
,
1142 PAGE_SIZE
, AMDGPU_GEM_DOMAIN_OA
,
1143 &adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
1147 adev
->gfx
.ce_ram_size
= 0x8000;
1149 gfx_v9_0_gpu_early_init(adev
);
1151 r
= gfx_v9_0_ngg_init(adev
);
1159 static int gfx_v9_0_sw_fini(void *handle
)
1162 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1164 amdgpu_bo_free_kernel(&adev
->gds
.oa_gfx_bo
, NULL
, NULL
);
1165 amdgpu_bo_free_kernel(&adev
->gds
.gws_gfx_bo
, NULL
, NULL
);
1166 amdgpu_bo_free_kernel(&adev
->gds
.gds_gfx_bo
, NULL
, NULL
);
1168 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1169 amdgpu_ring_fini(&adev
->gfx
.gfx_ring
[i
]);
1170 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1171 amdgpu_ring_fini(&adev
->gfx
.compute_ring
[i
]);
1173 if (amdgpu_sriov_vf(adev
)) {
1174 gfx_v9_0_compute_mqd_sw_fini(adev
);
1175 gfx_v9_0_kiq_free_ring(&adev
->gfx
.kiq
.ring
, &adev
->gfx
.kiq
.irq
);
1176 gfx_v9_0_kiq_fini(adev
);
1179 gfx_v9_0_mec_fini(adev
);
1180 gfx_v9_0_ngg_fini(adev
);
1186 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device
*adev
)
1191 static void gfx_v9_0_select_se_sh(struct amdgpu_device
*adev
, u32 se_num
, u32 sh_num
, u32 instance
)
1193 u32 data
= REG_SET_FIELD(0, GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
, 1);
1195 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff)) {
1196 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1197 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1198 } else if (se_num
== 0xffffffff) {
1199 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1200 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_BROADCAST_WRITES
, 1);
1201 } else if (sh_num
== 0xffffffff) {
1202 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_BROADCAST_WRITES
, 1);
1203 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1205 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SH_INDEX
, sh_num
);
1206 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
, SE_INDEX
, se_num
);
1208 WREG32_SOC15(GC
, 0, mmGRBM_GFX_INDEX
, data
);
1211 static u32
gfx_v9_0_create_bitmask(u32 bit_width
)
1213 return (u32
)((1ULL << bit_width
) - 1);
1216 static u32
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device
*adev
)
1220 data
= RREG32_SOC15(GC
, 0, mmCC_RB_BACKEND_DISABLE
);
1221 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_RB_BACKEND_DISABLE
);
1223 data
&= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK
;
1224 data
>>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT
;
1226 mask
= gfx_v9_0_create_bitmask(adev
->gfx
.config
.max_backends_per_se
/
1227 adev
->gfx
.config
.max_sh_per_se
);
1229 return (~data
) & mask
;
1232 static void gfx_v9_0_setup_rb(struct amdgpu_device
*adev
)
1237 u32 rb_bitmap_width_per_sh
= adev
->gfx
.config
.max_backends_per_se
/
1238 adev
->gfx
.config
.max_sh_per_se
;
1240 mutex_lock(&adev
->grbm_idx_mutex
);
1241 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1242 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1243 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1244 data
= gfx_v9_0_get_rb_active_bitmap(adev
);
1245 active_rbs
|= data
<< ((i
* adev
->gfx
.config
.max_sh_per_se
+ j
) *
1246 rb_bitmap_width_per_sh
);
1249 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1250 mutex_unlock(&adev
->grbm_idx_mutex
);
1252 adev
->gfx
.config
.backend_enable_mask
= active_rbs
;
1253 adev
->gfx
.config
.num_rbs
= hweight32(active_rbs
);
1256 #define DEFAULT_SH_MEM_BASES (0x6000)
1257 #define FIRST_COMPUTE_VMID (8)
1258 #define LAST_COMPUTE_VMID (16)
1259 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device
*adev
)
1262 uint32_t sh_mem_config
;
1263 uint32_t sh_mem_bases
;
1266 * Configure apertures:
1267 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1268 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1269 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1271 sh_mem_bases
= DEFAULT_SH_MEM_BASES
| (DEFAULT_SH_MEM_BASES
<< 16);
1273 sh_mem_config
= SH_MEM_ADDRESS_MODE_64
|
1274 SH_MEM_ALIGNMENT_MODE_UNALIGNED
<<
1275 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT
;
1277 mutex_lock(&adev
->srbm_mutex
);
1278 for (i
= FIRST_COMPUTE_VMID
; i
< LAST_COMPUTE_VMID
; i
++) {
1279 soc15_grbm_select(adev
, 0, 0, 0, i
);
1280 /* CP and shaders */
1281 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, sh_mem_config
);
1282 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, sh_mem_bases
);
1284 soc15_grbm_select(adev
, 0, 0, 0, 0);
1285 mutex_unlock(&adev
->srbm_mutex
);
1288 static void gfx_v9_0_gpu_init(struct amdgpu_device
*adev
)
1293 WREG32_FIELD15(GC
, 0, GRBM_CNTL
, READ_TIMEOUT
, 0xff);
1295 gfx_v9_0_tiling_mode_table_init(adev
);
1297 gfx_v9_0_setup_rb(adev
);
1298 gfx_v9_0_get_cu_info(adev
, &adev
->gfx
.cu_info
);
1300 /* XXX SH_MEM regs */
1301 /* where to put LDS, scratch, GPUVM in FSA64 space */
1302 mutex_lock(&adev
->srbm_mutex
);
1303 for (i
= 0; i
< 16; i
++) {
1304 soc15_grbm_select(adev
, 0, 0, 0, i
);
1305 /* CP and shaders */
1307 tmp
= REG_SET_FIELD(tmp
, SH_MEM_CONFIG
, ALIGNMENT_MODE
,
1308 SH_MEM_ALIGNMENT_MODE_UNALIGNED
);
1309 WREG32_SOC15(GC
, 0, mmSH_MEM_CONFIG
, tmp
);
1310 WREG32_SOC15(GC
, 0, mmSH_MEM_BASES
, 0);
1312 soc15_grbm_select(adev
, 0, 0, 0, 0);
1314 mutex_unlock(&adev
->srbm_mutex
);
1316 gfx_v9_0_init_compute_vmid(adev
);
1318 mutex_lock(&adev
->grbm_idx_mutex
);
1320 * making sure that the following register writes will be broadcasted
1321 * to all the shaders
1323 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1325 WREG32_SOC15(GC
, 0, mmPA_SC_FIFO_SIZE
,
1326 (adev
->gfx
.config
.sc_prim_fifo_size_frontend
<<
1327 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT
) |
1328 (adev
->gfx
.config
.sc_prim_fifo_size_backend
<<
1329 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT
) |
1330 (adev
->gfx
.config
.sc_hiz_tile_fifo_size
<<
1331 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT
) |
1332 (adev
->gfx
.config
.sc_earlyz_tile_fifo_size
<<
1333 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT
));
1334 mutex_unlock(&adev
->grbm_idx_mutex
);
1338 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device
*adev
)
1343 mutex_lock(&adev
->grbm_idx_mutex
);
1344 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
1345 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
1346 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
1347 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1348 if (RREG32_SOC15(GC
, 0, mmRLC_SERDES_CU_MASTER_BUSY
) == 0)
1354 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
1355 mutex_unlock(&adev
->grbm_idx_mutex
);
1357 mask
= RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK
|
1358 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK
|
1359 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK
|
1360 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK
;
1361 for (k
= 0; k
< adev
->usec_timeout
; k
++) {
1362 if ((RREG32_SOC15(GC
, 0, mmRLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
1368 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device
*adev
,
1371 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
);
1376 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
, enable
? 1 : 0);
1377 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CNTX_EMPTY_INT_ENABLE
, enable
? 1 : 0);
1378 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, CMP_BUSY_INT_ENABLE
, enable
? 1 : 0);
1379 tmp
= REG_SET_FIELD(tmp
, CP_INT_CNTL_RING0
, GFX_IDLE_INT_ENABLE
, enable
? 1 : 0);
1381 WREG32_SOC15(GC
, 0, mmCP_INT_CNTL_RING0
, tmp
);
1384 void gfx_v9_0_rlc_stop(struct amdgpu_device
*adev
)
1386 u32 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
1388 tmp
= REG_SET_FIELD(tmp
, RLC_CNTL
, RLC_ENABLE_F32
, 0);
1389 WREG32_SOC15(GC
, 0, mmRLC_CNTL
, tmp
);
1391 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
1393 gfx_v9_0_wait_for_rlc_serdes(adev
);
1396 static void gfx_v9_0_rlc_reset(struct amdgpu_device
*adev
)
1398 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
1400 WREG32_FIELD15(GC
, 0, GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 0);
1404 static void gfx_v9_0_rlc_start(struct amdgpu_device
*adev
)
1406 #ifdef AMDGPU_RLC_DEBUG_RETRY
1410 WREG32_FIELD15(GC
, 0, RLC_CNTL
, RLC_ENABLE_F32
, 1);
1412 /* carrizo do enable cp interrupt after cp inited */
1413 if (!(adev
->flags
& AMD_IS_APU
))
1414 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
1418 #ifdef AMDGPU_RLC_DEBUG_RETRY
1419 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
1420 rlc_ucode_ver
= RREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_6
);
1421 if(rlc_ucode_ver
== 0x108) {
1422 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
1423 rlc_ucode_ver
, adev
->gfx
.rlc_fw_version
);
1424 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
1425 * default is 0x9C4 to create a 100us interval */
1426 WREG32_SOC15(GC
, 0, mmRLC_GPM_TIMER_INT_3
, 0x9C4);
1427 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
1428 * to disable the page fault retry interrupts, default is
1430 WREG32_SOC15(GC
, 0, mmRLC_GPM_GENERAL_12
, 0x100);
1435 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device
*adev
)
1437 const struct rlc_firmware_header_v2_0
*hdr
;
1438 const __le32
*fw_data
;
1439 unsigned i
, fw_size
;
1441 if (!adev
->gfx
.rlc_fw
)
1444 hdr
= (const struct rlc_firmware_header_v2_0
*)adev
->gfx
.rlc_fw
->data
;
1445 amdgpu_ucode_print_rlc_hdr(&hdr
->header
);
1447 fw_data
= (const __le32
*)(adev
->gfx
.rlc_fw
->data
+
1448 le32_to_cpu(hdr
->header
.ucode_array_offset_bytes
));
1449 fw_size
= le32_to_cpu(hdr
->header
.ucode_size_bytes
) / 4;
1451 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
,
1452 RLCG_UCODE_LOADING_START_ADDRESS
);
1453 for (i
= 0; i
< fw_size
; i
++)
1454 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_DATA
, le32_to_cpup(fw_data
++));
1455 WREG32_SOC15(GC
, 0, mmRLC_GPM_UCODE_ADDR
, adev
->gfx
.rlc_fw_version
);
1460 static int gfx_v9_0_rlc_resume(struct amdgpu_device
*adev
)
1464 if (amdgpu_sriov_vf(adev
))
1467 gfx_v9_0_rlc_stop(adev
);
1470 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, 0);
1473 WREG32_SOC15(GC
, 0, mmRLC_PG_CNTL
, 0);
1475 gfx_v9_0_rlc_reset(adev
);
1477 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
1478 /* legacy rlc firmware loading */
1479 r
= gfx_v9_0_rlc_load_microcode(adev
);
1484 gfx_v9_0_rlc_start(adev
);
1489 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device
*adev
, bool enable
)
1492 u32 tmp
= RREG32_SOC15(GC
, 0, mmCP_ME_CNTL
);
1494 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, ME_HALT
, enable
? 0 : 1);
1495 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, PFP_HALT
, enable
? 0 : 1);
1496 tmp
= REG_SET_FIELD(tmp
, CP_ME_CNTL
, CE_HALT
, enable
? 0 : 1);
1498 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
1499 adev
->gfx
.gfx_ring
[i
].ready
= false;
1501 WREG32_SOC15(GC
, 0, mmCP_ME_CNTL
, tmp
);
1505 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device
*adev
)
1507 const struct gfx_firmware_header_v1_0
*pfp_hdr
;
1508 const struct gfx_firmware_header_v1_0
*ce_hdr
;
1509 const struct gfx_firmware_header_v1_0
*me_hdr
;
1510 const __le32
*fw_data
;
1511 unsigned i
, fw_size
;
1513 if (!adev
->gfx
.me_fw
|| !adev
->gfx
.pfp_fw
|| !adev
->gfx
.ce_fw
)
1516 pfp_hdr
= (const struct gfx_firmware_header_v1_0
*)
1517 adev
->gfx
.pfp_fw
->data
;
1518 ce_hdr
= (const struct gfx_firmware_header_v1_0
*)
1519 adev
->gfx
.ce_fw
->data
;
1520 me_hdr
= (const struct gfx_firmware_header_v1_0
*)
1521 adev
->gfx
.me_fw
->data
;
1523 amdgpu_ucode_print_gfx_hdr(&pfp_hdr
->header
);
1524 amdgpu_ucode_print_gfx_hdr(&ce_hdr
->header
);
1525 amdgpu_ucode_print_gfx_hdr(&me_hdr
->header
);
1527 gfx_v9_0_cp_gfx_enable(adev
, false);
1530 fw_data
= (const __le32
*)
1531 (adev
->gfx
.pfp_fw
->data
+
1532 le32_to_cpu(pfp_hdr
->header
.ucode_array_offset_bytes
));
1533 fw_size
= le32_to_cpu(pfp_hdr
->header
.ucode_size_bytes
) / 4;
1534 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_ADDR
, 0);
1535 for (i
= 0; i
< fw_size
; i
++)
1536 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_DATA
, le32_to_cpup(fw_data
++));
1537 WREG32_SOC15(GC
, 0, mmCP_PFP_UCODE_ADDR
, adev
->gfx
.pfp_fw_version
);
1540 fw_data
= (const __le32
*)
1541 (adev
->gfx
.ce_fw
->data
+
1542 le32_to_cpu(ce_hdr
->header
.ucode_array_offset_bytes
));
1543 fw_size
= le32_to_cpu(ce_hdr
->header
.ucode_size_bytes
) / 4;
1544 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_ADDR
, 0);
1545 for (i
= 0; i
< fw_size
; i
++)
1546 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_DATA
, le32_to_cpup(fw_data
++));
1547 WREG32_SOC15(GC
, 0, mmCP_CE_UCODE_ADDR
, adev
->gfx
.ce_fw_version
);
1550 fw_data
= (const __le32
*)
1551 (adev
->gfx
.me_fw
->data
+
1552 le32_to_cpu(me_hdr
->header
.ucode_array_offset_bytes
));
1553 fw_size
= le32_to_cpu(me_hdr
->header
.ucode_size_bytes
) / 4;
1554 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_WADDR
, 0);
1555 for (i
= 0; i
< fw_size
; i
++)
1556 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_DATA
, le32_to_cpup(fw_data
++));
1557 WREG32_SOC15(GC
, 0, mmCP_ME_RAM_WADDR
, adev
->gfx
.me_fw_version
);
1562 static u32
gfx_v9_0_get_csb_size(struct amdgpu_device
*adev
)
1565 const struct cs_section_def
*sect
= NULL
;
1566 const struct cs_extent_def
*ext
= NULL
;
1568 /* begin clear state */
1570 /* context control state */
1573 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
1574 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
1575 if (sect
->id
== SECT_CONTEXT
)
1576 count
+= 2 + ext
->reg_count
;
1581 /* pa_sc_raster_config/pa_sc_raster_config1 */
1583 /* end clear state */
1591 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device
*adev
)
1593 struct amdgpu_ring
*ring
= &adev
->gfx
.gfx_ring
[0];
1594 const struct cs_section_def
*sect
= NULL
;
1595 const struct cs_extent_def
*ext
= NULL
;
1599 WREG32_SOC15(GC
, 0, mmCP_MAX_CONTEXT
, adev
->gfx
.config
.max_hw_contexts
- 1);
1600 WREG32_SOC15(GC
, 0, mmCP_DEVICE_ID
, 1);
1602 gfx_v9_0_cp_gfx_enable(adev
, true);
1604 r
= amdgpu_ring_alloc(ring
, gfx_v9_0_get_csb_size(adev
) + 4);
1606 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r
);
1610 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1611 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
1613 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
1614 amdgpu_ring_write(ring
, 0x80000000);
1615 amdgpu_ring_write(ring
, 0x80000000);
1617 for (sect
= gfx9_cs_data
; sect
->section
!= NULL
; ++sect
) {
1618 for (ext
= sect
->section
; ext
->extent
!= NULL
; ++ext
) {
1619 if (sect
->id
== SECT_CONTEXT
) {
1620 amdgpu_ring_write(ring
,
1621 PACKET3(PACKET3_SET_CONTEXT_REG
,
1623 amdgpu_ring_write(ring
,
1624 ext
->reg_index
- PACKET3_SET_CONTEXT_REG_START
);
1625 for (i
= 0; i
< ext
->reg_count
; i
++)
1626 amdgpu_ring_write(ring
, ext
->extent
[i
]);
1631 amdgpu_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1632 amdgpu_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
1634 amdgpu_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
1635 amdgpu_ring_write(ring
, 0);
1637 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
1638 amdgpu_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
1639 amdgpu_ring_write(ring
, 0x8000);
1640 amdgpu_ring_write(ring
, 0x8000);
1642 amdgpu_ring_commit(ring
);
1647 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device
*adev
)
1649 struct amdgpu_ring
*ring
;
1652 u64 rb_addr
, rptr_addr
, wptr_gpu_addr
;
1654 /* Set the write pointer delay */
1655 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_DELAY
, 0);
1657 /* set the RB to use vmid 0 */
1658 WREG32_SOC15(GC
, 0, mmCP_RB_VMID
, 0);
1660 /* Set ring buffer size */
1661 ring
= &adev
->gfx
.gfx_ring
[0];
1662 rb_bufsz
= order_base_2(ring
->ring_size
/ 8);
1663 tmp
= REG_SET_FIELD(0, CP_RB0_CNTL
, RB_BUFSZ
, rb_bufsz
);
1664 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, RB_BLKSZ
, rb_bufsz
- 2);
1666 tmp
= REG_SET_FIELD(tmp
, CP_RB0_CNTL
, BUF_SWAP
, 1);
1668 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
1670 /* Initialize the ring buffer's write pointers */
1672 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
1673 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
1675 /* set the wb address wether it's enabled or not */
1676 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
1677 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR
, lower_32_bits(rptr_addr
));
1678 WREG32_SOC15(GC
, 0, mmCP_RB0_RPTR_ADDR_HI
, upper_32_bits(rptr_addr
) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK
);
1680 wptr_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
1681 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_LO
, lower_32_bits(wptr_gpu_addr
));
1682 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_ADDR_HI
, upper_32_bits(wptr_gpu_addr
));
1685 WREG32_SOC15(GC
, 0, mmCP_RB0_CNTL
, tmp
);
1687 rb_addr
= ring
->gpu_addr
>> 8;
1688 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE
, rb_addr
);
1689 WREG32_SOC15(GC
, 0, mmCP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
1691 tmp
= RREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
);
1692 if (ring
->use_doorbell
) {
1693 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
1694 DOORBELL_OFFSET
, ring
->doorbell_index
);
1695 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
,
1698 tmp
= REG_SET_FIELD(tmp
, CP_RB_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
1700 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_CONTROL
, tmp
);
1702 tmp
= REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER
,
1703 DOORBELL_RANGE_LOWER
, ring
->doorbell_index
);
1704 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_LOWER
, tmp
);
1706 WREG32_SOC15(GC
, 0, mmCP_RB_DOORBELL_RANGE_UPPER
,
1707 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK
);
1710 /* start the ring */
1711 gfx_v9_0_cp_gfx_start(adev
);
1717 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device
*adev
, bool enable
)
1722 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
, 0);
1724 WREG32_SOC15(GC
, 0, mmCP_MEC_CNTL
,
1725 (CP_MEC_CNTL__MEC_ME1_HALT_MASK
| CP_MEC_CNTL__MEC_ME2_HALT_MASK
));
1726 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
1727 adev
->gfx
.compute_ring
[i
].ready
= false;
1728 adev
->gfx
.kiq
.ring
.ready
= false;
1733 static int gfx_v9_0_cp_compute_start(struct amdgpu_device
*adev
)
1735 gfx_v9_0_cp_compute_enable(adev
, true);
1740 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device
*adev
)
1742 const struct gfx_firmware_header_v1_0
*mec_hdr
;
1743 const __le32
*fw_data
;
1747 if (!adev
->gfx
.mec_fw
)
1750 gfx_v9_0_cp_compute_enable(adev
, false);
1752 mec_hdr
= (const struct gfx_firmware_header_v1_0
*)adev
->gfx
.mec_fw
->data
;
1753 amdgpu_ucode_print_gfx_hdr(&mec_hdr
->header
);
1755 fw_data
= (const __le32
*)
1756 (adev
->gfx
.mec_fw
->data
+
1757 le32_to_cpu(mec_hdr
->header
.ucode_array_offset_bytes
));
1759 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, VMID
, 0);
1760 tmp
= REG_SET_FIELD(tmp
, CP_CPC_IC_BASE_CNTL
, CACHE_POLICY
, 0);
1761 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_CNTL
, tmp
);
1763 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_LO
,
1764 adev
->gfx
.mec
.mec_fw_gpu_addr
& 0xFFFFF000);
1765 WREG32_SOC15(GC
, 0, mmCP_CPC_IC_BASE_HI
,
1766 upper_32_bits(adev
->gfx
.mec
.mec_fw_gpu_addr
));
1769 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
,
1770 mec_hdr
->jt_offset
);
1771 for (i
= 0; i
< mec_hdr
->jt_size
; i
++)
1772 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_DATA
,
1773 le32_to_cpup(fw_data
+ mec_hdr
->jt_offset
+ i
));
1775 WREG32_SOC15(GC
, 0, mmCP_MEC_ME1_UCODE_ADDR
,
1776 adev
->gfx
.mec_fw_version
);
1777 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
1782 static void gfx_v9_0_cp_compute_fini(struct amdgpu_device
*adev
)
1786 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
1787 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
1789 if (ring
->mqd_obj
) {
1790 r
= amdgpu_bo_reserve(ring
->mqd_obj
, true);
1791 if (unlikely(r
!= 0))
1792 dev_warn(adev
->dev
, "(%d) reserve MQD bo failed\n", r
);
1794 amdgpu_bo_unpin(ring
->mqd_obj
);
1795 amdgpu_bo_unreserve(ring
->mqd_obj
);
1797 amdgpu_bo_unref(&ring
->mqd_obj
);
1798 ring
->mqd_obj
= NULL
;
1803 static int gfx_v9_0_init_queue(struct amdgpu_ring
*ring
);
1805 static int gfx_v9_0_cp_compute_resume(struct amdgpu_device
*adev
)
1808 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
1809 struct amdgpu_ring
*ring
= &adev
->gfx
.compute_ring
[i
];
1810 if (gfx_v9_0_init_queue(ring
))
1811 dev_warn(adev
->dev
, "compute queue %d init failed!\n", i
);
1814 r
= gfx_v9_0_cp_compute_start(adev
);
1822 static void gfx_v9_0_kiq_setting(struct amdgpu_ring
*ring
)
1825 struct amdgpu_device
*adev
= ring
->adev
;
1827 /* tell RLC which is KIQ queue */
1828 tmp
= RREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
);
1830 tmp
|= (ring
->me
<< 5) | (ring
->pipe
<< 3) | (ring
->queue
);
1831 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
1833 WREG32_SOC15(GC
, 0, mmRLC_CP_SCHEDULERS
, tmp
);
1836 static void gfx_v9_0_kiq_enable(struct amdgpu_ring
*ring
)
1838 amdgpu_ring_alloc(ring
, 8);
1840 amdgpu_ring_write(ring
, PACKET3(PACKET3_SET_RESOURCES
, 6));
1841 amdgpu_ring_write(ring
, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
1842 amdgpu_ring_write(ring
, 0x000000FF); /* queue mask lo */
1843 amdgpu_ring_write(ring
, 0); /* queue mask hi */
1844 amdgpu_ring_write(ring
, 0); /* gws mask lo */
1845 amdgpu_ring_write(ring
, 0); /* gws mask hi */
1846 amdgpu_ring_write(ring
, 0); /* oac mask */
1847 amdgpu_ring_write(ring
, 0); /* gds heap base:0, gds heap size:0 */
1848 amdgpu_ring_commit(ring
);
1852 static void gfx_v9_0_map_queue_enable(struct amdgpu_ring
*kiq_ring
,
1853 struct amdgpu_ring
*ring
)
1855 struct amdgpu_device
*adev
= kiq_ring
->adev
;
1856 uint64_t mqd_addr
, wptr_addr
;
1858 mqd_addr
= amdgpu_bo_gpu_offset(ring
->mqd_obj
);
1859 wptr_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
1860 amdgpu_ring_alloc(kiq_ring
, 8);
1862 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
1863 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
1864 amdgpu_ring_write(kiq_ring
, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
1865 (0 << 4) | /* Queue_Sel */
1866 (0 << 8) | /* VMID */
1867 (ring
->queue
<< 13 ) |
1868 (ring
->pipe
<< 16) |
1869 ((ring
->me
== 1 ? 0 : 1) << 18) |
1870 (0 << 21) | /*queue_type: normal compute queue */
1871 (1 << 24) | /* alloc format: all_on_one_pipe */
1872 (0 << 26) | /* engine_sel: compute */
1873 (1 << 29)); /* num_queues: must be 1 */
1874 amdgpu_ring_write(kiq_ring
, (ring
->doorbell_index
<< 2));
1875 amdgpu_ring_write(kiq_ring
, lower_32_bits(mqd_addr
));
1876 amdgpu_ring_write(kiq_ring
, upper_32_bits(mqd_addr
));
1877 amdgpu_ring_write(kiq_ring
, lower_32_bits(wptr_addr
));
1878 amdgpu_ring_write(kiq_ring
, upper_32_bits(wptr_addr
));
1879 amdgpu_ring_commit(kiq_ring
);
1883 static int gfx_v9_0_mqd_init(struct amdgpu_ring
*ring
)
1885 struct amdgpu_device
*adev
= ring
->adev
;
1886 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
1887 uint64_t hqd_gpu_addr
, wb_gpu_addr
, eop_base_addr
;
1890 mqd
->header
= 0xC0310800;
1891 mqd
->compute_pipelinestat_enable
= 0x00000001;
1892 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
1893 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
1894 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
1895 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
1896 mqd
->compute_misc_reserved
= 0x00000003;
1898 eop_base_addr
= ring
->eop_gpu_addr
>> 8;
1899 mqd
->cp_hqd_eop_base_addr_lo
= eop_base_addr
;
1900 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_base_addr
);
1902 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1903 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
);
1904 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
1905 (order_base_2(MEC_HPD_SIZE
/ 4) - 1));
1907 mqd
->cp_hqd_eop_control
= tmp
;
1909 /* enable doorbell? */
1910 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
1912 if (ring
->use_doorbell
) {
1913 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1914 DOORBELL_OFFSET
, ring
->doorbell_index
);
1915 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1917 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1918 DOORBELL_SOURCE
, 0);
1919 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1923 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1926 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
1928 /* disable the queue if it's active */
1930 mqd
->cp_hqd_dequeue_request
= 0;
1931 mqd
->cp_hqd_pq_rptr
= 0;
1932 mqd
->cp_hqd_pq_wptr_lo
= 0;
1933 mqd
->cp_hqd_pq_wptr_hi
= 0;
1935 /* set the pointer to the MQD */
1936 mqd
->cp_mqd_base_addr_lo
= ring
->mqd_gpu_addr
& 0xfffffffc;
1937 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(ring
->mqd_gpu_addr
);
1939 /* set MQD vmid to 0 */
1940 tmp
= RREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
);
1941 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
1942 mqd
->cp_mqd_control
= tmp
;
1944 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1945 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
1946 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
1947 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
1949 /* set up the HQD, this is similar to CP_RB0_CNTL */
1950 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
);
1951 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
1952 (order_base_2(ring
->ring_size
/ 4) - 1));
1953 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
1954 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
1956 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
1958 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
1959 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
1960 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
1961 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
1962 mqd
->cp_hqd_pq_control
= tmp
;
1964 /* set the wb address whether it's enabled or not */
1965 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
1966 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
1967 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
1968 upper_32_bits(wb_gpu_addr
) & 0xffff;
1970 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1971 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
1972 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
1973 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
1976 /* enable the doorbell if requested */
1977 if (ring
->use_doorbell
) {
1978 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
1979 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1980 DOORBELL_OFFSET
, ring
->doorbell_index
);
1982 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1984 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1985 DOORBELL_SOURCE
, 0);
1986 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
1990 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
1992 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
1994 mqd
->cp_hqd_pq_rptr
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
);
1996 /* set the vmid for the queue */
1997 mqd
->cp_hqd_vmid
= 0;
1999 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
);
2000 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
2001 mqd
->cp_hqd_persistent_state
= tmp
;
2003 /* set MIN_IB_AVAIL_SIZE */
2004 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_IB_CONTROL
);
2005 tmp
= REG_SET_FIELD(tmp
, CP_HQD_IB_CONTROL
, MIN_IB_AVAIL_SIZE
, 3);
2006 mqd
->cp_hqd_ib_control
= tmp
;
2008 /* activate the queue */
2009 mqd
->cp_hqd_active
= 1;
2014 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring
*ring
)
2016 struct amdgpu_device
*adev
= ring
->adev
;
2017 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2020 /* disable wptr polling */
2021 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
2023 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
,
2024 mqd
->cp_hqd_eop_base_addr_lo
);
2025 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
,
2026 mqd
->cp_hqd_eop_base_addr_hi
);
2028 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2029 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
,
2030 mqd
->cp_hqd_eop_control
);
2032 /* enable doorbell? */
2033 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
2034 mqd
->cp_hqd_pq_doorbell_control
);
2036 /* disable the queue if it's active */
2037 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
2038 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
2039 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
2040 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
2044 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
,
2045 mqd
->cp_hqd_dequeue_request
);
2046 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
,
2047 mqd
->cp_hqd_pq_rptr
);
2048 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
2049 mqd
->cp_hqd_pq_wptr_lo
);
2050 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
2051 mqd
->cp_hqd_pq_wptr_hi
);
2054 /* set the pointer to the MQD */
2055 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
,
2056 mqd
->cp_mqd_base_addr_lo
);
2057 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
,
2058 mqd
->cp_mqd_base_addr_hi
);
2060 /* set MQD vmid to 0 */
2061 WREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
,
2062 mqd
->cp_mqd_control
);
2064 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2065 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE
,
2066 mqd
->cp_hqd_pq_base_lo
);
2067 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE_HI
,
2068 mqd
->cp_hqd_pq_base_hi
);
2070 /* set up the HQD, this is similar to CP_RB0_CNTL */
2071 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
,
2072 mqd
->cp_hqd_pq_control
);
2074 /* set the wb address whether it's enabled or not */
2075 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
,
2076 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
2077 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
2078 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
2080 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2081 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
,
2082 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
2083 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
,
2084 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
2086 /* enable the doorbell if requested */
2087 if (ring
->use_doorbell
) {
2088 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
,
2089 (AMDGPU_DOORBELL64_KIQ
*2) << 2);
2090 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
,
2091 (AMDGPU_DOORBELL64_USERQUEUE_END
* 2) << 2);
2094 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
2095 mqd
->cp_hqd_pq_doorbell_control
);
2097 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2098 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
,
2099 mqd
->cp_hqd_pq_wptr_lo
);
2100 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
,
2101 mqd
->cp_hqd_pq_wptr_hi
);
2103 /* set the vmid for the queue */
2104 WREG32_SOC15(GC
, 0, mmCP_HQD_VMID
, mqd
->cp_hqd_vmid
);
2106 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
,
2107 mqd
->cp_hqd_persistent_state
);
2109 /* activate the queue */
2110 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
,
2111 mqd
->cp_hqd_active
);
2113 if (ring
->use_doorbell
)
2114 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
2119 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring
*ring
)
2121 struct amdgpu_device
*adev
= ring
->adev
;
2122 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
2123 struct v9_mqd
*mqd
= ring
->mqd_ptr
;
2124 bool is_kiq
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_KIQ
);
2125 int mqd_idx
= AMDGPU_MAX_COMPUTE_RINGS
;
2128 gfx_v9_0_kiq_setting(&kiq
->ring
);
2130 mqd_idx
= ring
- &adev
->gfx
.compute_ring
[0];
2133 if (!adev
->gfx
.in_reset
) {
2134 memset((void *)mqd
, 0, sizeof(*mqd
));
2135 mutex_lock(&adev
->srbm_mutex
);
2136 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2137 gfx_v9_0_mqd_init(ring
);
2139 gfx_v9_0_kiq_init_register(ring
);
2140 soc15_grbm_select(adev
, 0, 0, 0, 0);
2141 mutex_unlock(&adev
->srbm_mutex
);
2143 } else { /* for GPU_RESET case */
2144 /* reset MQD to a clean status */
2146 /* reset ring buffer */
2150 mutex_lock(&adev
->srbm_mutex
);
2151 soc15_grbm_select(adev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2152 gfx_v9_0_kiq_init_register(ring
);
2153 soc15_grbm_select(adev
, 0, 0, 0, 0);
2154 mutex_unlock(&adev
->srbm_mutex
);
2159 gfx_v9_0_kiq_enable(ring
);
2161 gfx_v9_0_map_queue_enable(&kiq
->ring
, ring
);
2166 static int gfx_v9_0_kiq_resume(struct amdgpu_device
*adev
)
2168 struct amdgpu_ring
*ring
= NULL
;
2171 gfx_v9_0_cp_compute_enable(adev
, true);
2173 ring
= &adev
->gfx
.kiq
.ring
;
2175 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
2176 if (unlikely(r
!= 0))
2179 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
2181 r
= gfx_v9_0_kiq_init_queue(ring
);
2182 amdgpu_bo_kunmap(ring
->mqd_obj
);
2183 ring
->mqd_ptr
= NULL
;
2185 amdgpu_bo_unreserve(ring
->mqd_obj
);
2189 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2190 ring
= &adev
->gfx
.compute_ring
[i
];
2192 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
2193 if (unlikely(r
!= 0))
2195 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&ring
->mqd_ptr
);
2197 r
= gfx_v9_0_kiq_init_queue(ring
);
2198 amdgpu_bo_kunmap(ring
->mqd_obj
);
2199 ring
->mqd_ptr
= NULL
;
2201 amdgpu_bo_unreserve(ring
->mqd_obj
);
2210 static int gfx_v9_0_cp_resume(struct amdgpu_device
*adev
)
2213 struct amdgpu_ring
*ring
;
2215 if (!(adev
->flags
& AMD_IS_APU
))
2216 gfx_v9_0_enable_gui_idle_interrupt(adev
, false);
2218 if (adev
->firmware
.load_type
!= AMDGPU_FW_LOAD_PSP
) {
2219 /* legacy firmware loading */
2220 r
= gfx_v9_0_cp_gfx_load_microcode(adev
);
2224 r
= gfx_v9_0_cp_compute_load_microcode(adev
);
2229 r
= gfx_v9_0_cp_gfx_resume(adev
);
2233 if (amdgpu_sriov_vf(adev
))
2234 r
= gfx_v9_0_kiq_resume(adev
);
2236 r
= gfx_v9_0_cp_compute_resume(adev
);
2240 ring
= &adev
->gfx
.gfx_ring
[0];
2241 r
= amdgpu_ring_test_ring(ring
);
2243 ring
->ready
= false;
2246 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
2247 ring
= &adev
->gfx
.compute_ring
[i
];
2250 r
= amdgpu_ring_test_ring(ring
);
2252 ring
->ready
= false;
2255 if (amdgpu_sriov_vf(adev
)) {
2256 ring
= &adev
->gfx
.kiq
.ring
;
2258 r
= amdgpu_ring_test_ring(ring
);
2260 ring
->ready
= false;
2263 gfx_v9_0_enable_gui_idle_interrupt(adev
, true);
2268 static void gfx_v9_0_cp_enable(struct amdgpu_device
*adev
, bool enable
)
2270 gfx_v9_0_cp_gfx_enable(adev
, enable
);
2271 gfx_v9_0_cp_compute_enable(adev
, enable
);
2274 static int gfx_v9_0_hw_init(void *handle
)
2277 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2279 gfx_v9_0_init_golden_registers(adev
);
2281 gfx_v9_0_gpu_init(adev
);
2283 r
= gfx_v9_0_rlc_resume(adev
);
2287 r
= gfx_v9_0_cp_resume(adev
);
2291 r
= gfx_v9_0_ngg_en(adev
);
2298 static int gfx_v9_0_hw_fini(void *handle
)
2300 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2302 amdgpu_irq_put(adev
, &adev
->gfx
.priv_reg_irq
, 0);
2303 amdgpu_irq_put(adev
, &adev
->gfx
.priv_inst_irq
, 0);
2304 if (amdgpu_sriov_vf(adev
)) {
2305 pr_debug("For SRIOV client, shouldn't do anything.\n");
2308 gfx_v9_0_cp_enable(adev
, false);
2309 gfx_v9_0_rlc_stop(adev
);
2310 gfx_v9_0_cp_compute_fini(adev
);
2315 static int gfx_v9_0_suspend(void *handle
)
2317 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2319 return gfx_v9_0_hw_fini(adev
);
2322 static int gfx_v9_0_resume(void *handle
)
2324 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2326 return gfx_v9_0_hw_init(adev
);
2329 static bool gfx_v9_0_is_idle(void *handle
)
2331 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2333 if (REG_GET_FIELD(RREG32_SOC15(GC
, 0, mmGRBM_STATUS
),
2334 GRBM_STATUS
, GUI_ACTIVE
))
2340 static int gfx_v9_0_wait_for_idle(void *handle
)
2344 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2346 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2347 /* read MC_STATUS */
2348 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
) &
2349 GRBM_STATUS__GUI_ACTIVE_MASK
;
2351 if (!REG_GET_FIELD(tmp
, GRBM_STATUS
, GUI_ACTIVE
))
2358 static int gfx_v9_0_soft_reset(void *handle
)
2360 u32 grbm_soft_reset
= 0;
2362 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2365 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS
);
2366 if (tmp
& (GRBM_STATUS__PA_BUSY_MASK
| GRBM_STATUS__SC_BUSY_MASK
|
2367 GRBM_STATUS__BCI_BUSY_MASK
| GRBM_STATUS__SX_BUSY_MASK
|
2368 GRBM_STATUS__TA_BUSY_MASK
| GRBM_STATUS__VGT_BUSY_MASK
|
2369 GRBM_STATUS__DB_BUSY_MASK
| GRBM_STATUS__CB_BUSY_MASK
|
2370 GRBM_STATUS__GDS_BUSY_MASK
| GRBM_STATUS__SPI_BUSY_MASK
|
2371 GRBM_STATUS__IA_BUSY_MASK
| GRBM_STATUS__IA_BUSY_NO_DMA_MASK
)) {
2372 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2373 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
2374 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2375 GRBM_SOFT_RESET
, SOFT_RESET_GFX
, 1);
2378 if (tmp
& (GRBM_STATUS__CP_BUSY_MASK
| GRBM_STATUS__CP_COHERENCY_BUSY_MASK
)) {
2379 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2380 GRBM_SOFT_RESET
, SOFT_RESET_CP
, 1);
2384 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_STATUS2
);
2385 if (REG_GET_FIELD(tmp
, GRBM_STATUS2
, RLC_BUSY
))
2386 grbm_soft_reset
= REG_SET_FIELD(grbm_soft_reset
,
2387 GRBM_SOFT_RESET
, SOFT_RESET_RLC
, 1);
2390 if (grbm_soft_reset
) {
2392 gfx_v9_0_rlc_stop(adev
);
2394 /* Disable GFX parsing/prefetching */
2395 gfx_v9_0_cp_gfx_enable(adev
, false);
2397 /* Disable MEC parsing/prefetching */
2398 gfx_v9_0_cp_compute_enable(adev
, false);
2400 if (grbm_soft_reset
) {
2401 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
2402 tmp
|= grbm_soft_reset
;
2403 dev_info(adev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
2404 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
2405 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
2409 tmp
&= ~grbm_soft_reset
;
2410 WREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
, tmp
);
2411 tmp
= RREG32_SOC15(GC
, 0, mmGRBM_SOFT_RESET
);
2414 /* Wait a little for things to settle down */
2420 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device
*adev
)
2424 mutex_lock(&adev
->gfx
.gpu_clock_mutex
);
2425 WREG32_SOC15(GC
, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
2426 clock
= (uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_LSB
) |
2427 ((uint64_t)RREG32_SOC15(GC
, 0, mmRLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
2428 mutex_unlock(&adev
->gfx
.gpu_clock_mutex
);
2432 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring
*ring
,
2434 uint32_t gds_base
, uint32_t gds_size
,
2435 uint32_t gws_base
, uint32_t gws_size
,
2436 uint32_t oa_base
, uint32_t oa_size
)
2438 gds_base
= gds_base
>> AMDGPU_GDS_SHIFT
;
2439 gds_size
= gds_size
>> AMDGPU_GDS_SHIFT
;
2441 gws_base
= gws_base
>> AMDGPU_GWS_SHIFT
;
2442 gws_size
= gws_size
>> AMDGPU_GWS_SHIFT
;
2444 oa_base
= oa_base
>> AMDGPU_OA_SHIFT
;
2445 oa_size
= oa_size
>> AMDGPU_OA_SHIFT
;
2448 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2449 amdgpu_gds_reg_offset
[vmid
].mem_base
,
2453 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2454 amdgpu_gds_reg_offset
[vmid
].mem_size
,
2458 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2459 amdgpu_gds_reg_offset
[vmid
].gws
,
2460 gws_size
<< GDS_GWS_VMID0__SIZE__SHIFT
| gws_base
);
2463 gfx_v9_0_write_data_to_reg(ring
, 0, false,
2464 amdgpu_gds_reg_offset
[vmid
].oa
,
2465 (1 << (oa_size
+ oa_base
)) - (1 << oa_base
));
2468 static int gfx_v9_0_early_init(void *handle
)
2470 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2472 adev
->gfx
.num_gfx_rings
= GFX9_NUM_GFX_RINGS
;
2473 adev
->gfx
.num_compute_rings
= GFX9_NUM_COMPUTE_RINGS
;
2474 gfx_v9_0_set_ring_funcs(adev
);
2475 gfx_v9_0_set_irq_funcs(adev
);
2476 gfx_v9_0_set_gds_init(adev
);
2477 gfx_v9_0_set_rlc_funcs(adev
);
2482 static int gfx_v9_0_late_init(void *handle
)
2484 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2487 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_reg_irq
, 0);
2491 r
= amdgpu_irq_get(adev
, &adev
->gfx
.priv_inst_irq
, 0);
2498 static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device
*adev
)
2500 uint32_t rlc_setting
, data
;
2503 if (adev
->gfx
.rlc
.in_safe_mode
)
2506 /* if RLC is not enabled, do nothing */
2507 rlc_setting
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
2508 if (!(rlc_setting
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
2511 if (adev
->cg_flags
&
2512 (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
|
2513 AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
2514 data
= RLC_SAFE_MODE__CMD_MASK
;
2515 data
|= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT
);
2516 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
2518 /* wait for RLC_SAFE_MODE */
2519 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
2520 if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC
, 0, mmRLC_SAFE_MODE
), RLC_SAFE_MODE
, CMD
))
2524 adev
->gfx
.rlc
.in_safe_mode
= true;
2528 static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device
*adev
)
2530 uint32_t rlc_setting
, data
;
2532 if (!adev
->gfx
.rlc
.in_safe_mode
)
2535 /* if RLC is not enabled, do nothing */
2536 rlc_setting
= RREG32_SOC15(GC
, 0, mmRLC_CNTL
);
2537 if (!(rlc_setting
& RLC_CNTL__RLC_ENABLE_F32_MASK
))
2540 if (adev
->cg_flags
&
2541 (AMD_CG_SUPPORT_GFX_CGCG
| AMD_CG_SUPPORT_GFX_MGCG
)) {
2543 * Try to exit safe mode only if it is already in safe
2546 data
= RLC_SAFE_MODE__CMD_MASK
;
2547 WREG32_SOC15(GC
, 0, mmRLC_SAFE_MODE
, data
);
2548 adev
->gfx
.rlc
.in_safe_mode
= false;
2552 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device
*adev
,
2557 /* It is disabled by HW by default */
2558 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGCG
)) {
2559 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
2560 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
2561 data
&= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
|
2562 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
2563 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
2564 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
2566 /* only for Vega10 & Raven1 */
2567 data
|= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
;
2570 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
2572 /* MGLS is a global flag to control all MGLS in GFX */
2573 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_MGLS
) {
2574 /* 2 - RLC memory Light sleep */
2575 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_RLC_LS
) {
2576 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
2577 data
|= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
2579 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
2581 /* 3 - CP memory Light sleep */
2582 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CP_LS
) {
2583 def
= data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
2584 data
|= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
2586 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
2590 /* 1 - MGCG_OVERRIDE */
2591 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
2592 data
|= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK
|
2593 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK
|
2594 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK
|
2595 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
|
2596 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK
);
2598 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
2600 /* 2 - disable MGLS in RLC */
2601 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
2602 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
) {
2603 data
&= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
;
2604 WREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
, data
);
2607 /* 3 - disable MGLS in CP */
2608 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
2609 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
) {
2610 data
&= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
;
2611 WREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
, data
);
2616 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device
*adev
,
2621 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
2623 /* Enable 3D CGCG/CGLS */
2624 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGCG
)) {
2625 /* write cmd to clear cgcg/cgls ov */
2626 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
2627 /* unset CGCG override */
2628 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK
;
2629 /* update CGCG and CGLS override bits */
2631 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
2632 /* enable 3Dcgcg FSM(0x0020003f) */
2633 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
2634 data
= (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
2635 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
;
2636 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_3D_CGLS
)
2637 data
|= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
2638 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
;
2640 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
2642 /* set IDLE_POLL_COUNT(0x00900100) */
2643 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
2644 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
2645 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
2647 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
2649 /* Disable CGCG/CGLS */
2650 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
2651 /* disable cgcg, cgls should be disabled */
2652 data
&= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
|
2653 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
);
2654 /* disable cgcg and cgls in FSM */
2656 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
, data
);
2659 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
2662 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device
*adev
,
2667 adev
->gfx
.rlc
.funcs
->enter_safe_mode(adev
);
2669 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGCG
)) {
2670 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
2671 /* unset CGCG override */
2672 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK
;
2673 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
2674 data
&= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
2676 data
|= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK
;
2677 /* update CGCG and CGLS override bits */
2679 WREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
, data
);
2681 /* enable cgcg FSM(0x0020003F) */
2682 def
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
2683 data
= (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT
) |
2684 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
;
2685 if (adev
->cg_flags
& AMD_CG_SUPPORT_GFX_CGLS
)
2686 data
|= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT
) |
2687 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
;
2689 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
2691 /* set IDLE_POLL_COUNT(0x00900100) */
2692 def
= RREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
);
2693 data
= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT
) |
2694 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT
);
2696 WREG32_SOC15(GC
, 0, mmCP_RB_WPTR_POLL_CNTL
, data
);
2698 def
= data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
2699 /* reset CGCG/CGLS bits */
2700 data
&= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
| RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
);
2701 /* disable cgcg and cgls in FSM */
2703 WREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
, data
);
2706 adev
->gfx
.rlc
.funcs
->exit_safe_mode(adev
);
2709 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device
*adev
,
2713 /* CGCG/CGLS should be enabled after MGCG/MGLS
2714 * === MGCG + MGLS ===
2716 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
2717 /* === CGCG /CGLS for GFX 3D Only === */
2718 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
2719 /* === CGCG + CGLS === */
2720 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
2722 /* CGCG/CGLS should be disabled before MGCG/MGLS
2723 * === CGCG + CGLS ===
2725 gfx_v9_0_update_coarse_grain_clock_gating(adev
, enable
);
2726 /* === CGCG /CGLS for GFX 3D Only === */
2727 gfx_v9_0_update_3d_clock_gating(adev
, enable
);
2728 /* === MGCG + MGLS === */
2729 gfx_v9_0_update_medium_grain_clock_gating(adev
, enable
);
2734 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs
= {
2735 .enter_safe_mode
= gfx_v9_0_enter_rlc_safe_mode
,
2736 .exit_safe_mode
= gfx_v9_0_exit_rlc_safe_mode
2739 static int gfx_v9_0_set_powergating_state(void *handle
,
2740 enum amd_powergating_state state
)
2745 static int gfx_v9_0_set_clockgating_state(void *handle
,
2746 enum amd_clockgating_state state
)
2748 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2750 if (amdgpu_sriov_vf(adev
))
2753 switch (adev
->asic_type
) {
2755 gfx_v9_0_update_gfx_clock_gating(adev
,
2756 state
== AMD_CG_STATE_GATE
? true : false);
2764 static void gfx_v9_0_get_clockgating_state(void *handle
, u32
*flags
)
2766 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
2769 if (amdgpu_sriov_vf(adev
))
2772 /* AMD_CG_SUPPORT_GFX_MGCG */
2773 data
= RREG32_SOC15(GC
, 0, mmRLC_CGTT_MGCG_OVERRIDE
);
2774 if (!(data
& RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK
))
2775 *flags
|= AMD_CG_SUPPORT_GFX_MGCG
;
2777 /* AMD_CG_SUPPORT_GFX_CGCG */
2778 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL
);
2779 if (data
& RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK
)
2780 *flags
|= AMD_CG_SUPPORT_GFX_CGCG
;
2782 /* AMD_CG_SUPPORT_GFX_CGLS */
2783 if (data
& RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK
)
2784 *flags
|= AMD_CG_SUPPORT_GFX_CGLS
;
2786 /* AMD_CG_SUPPORT_GFX_RLC_LS */
2787 data
= RREG32_SOC15(GC
, 0, mmRLC_MEM_SLP_CNTL
);
2788 if (data
& RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK
)
2789 *flags
|= AMD_CG_SUPPORT_GFX_RLC_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
2791 /* AMD_CG_SUPPORT_GFX_CP_LS */
2792 data
= RREG32_SOC15(GC
, 0, mmCP_MEM_SLP_CNTL
);
2793 if (data
& CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK
)
2794 *flags
|= AMD_CG_SUPPORT_GFX_CP_LS
| AMD_CG_SUPPORT_GFX_MGLS
;
2796 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
2797 data
= RREG32_SOC15(GC
, 0, mmRLC_CGCG_CGLS_CTRL_3D
);
2798 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK
)
2799 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGCG
;
2801 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
2802 if (data
& RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK
)
2803 *flags
|= AMD_CG_SUPPORT_GFX_3D_CGLS
;
2806 static u64
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring
*ring
)
2808 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 is 32bit rptr*/
2811 static u64
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring
*ring
)
2813 struct amdgpu_device
*adev
= ring
->adev
;
2816 /* XXX check if swapping is necessary on BE */
2817 if (ring
->use_doorbell
) {
2818 wptr
= atomic64_read((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
]);
2820 wptr
= RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
);
2821 wptr
+= (u64
)RREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
) << 32;
2827 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring
*ring
)
2829 struct amdgpu_device
*adev
= ring
->adev
;
2831 if (ring
->use_doorbell
) {
2832 /* XXX check if swapping is necessary on BE */
2833 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
2834 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
2836 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR
, lower_32_bits(ring
->wptr
));
2837 WREG32_SOC15(GC
, 0, mmCP_RB0_WPTR_HI
, upper_32_bits(ring
->wptr
));
2841 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring
*ring
)
2843 u32 ref_and_mask
, reg_mem_engine
;
2844 struct nbio_hdp_flush_reg
*nbio_hf_reg
;
2846 if (ring
->adev
->asic_type
== CHIP_VEGA10
)
2847 nbio_hf_reg
= &nbio_v6_1_hdp_flush_reg
;
2849 if (ring
->funcs
->type
== AMDGPU_RING_TYPE_COMPUTE
) {
2852 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp2
<< ring
->pipe
;
2855 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp6
<< ring
->pipe
;
2862 ref_and_mask
= nbio_hf_reg
->ref_and_mask_cp0
;
2863 reg_mem_engine
= 1; /* pfp */
2866 gfx_v9_0_wait_reg_mem(ring
, reg_mem_engine
, 0, 1,
2867 nbio_hf_reg
->hdp_flush_req_offset
,
2868 nbio_hf_reg
->hdp_flush_done_offset
,
2869 ref_and_mask
, ref_and_mask
, 0x20);
2872 static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring
*ring
)
2874 gfx_v9_0_write_data_to_reg(ring
, 0, true,
2875 SOC15_REG_OFFSET(HDP
, 0, mmHDP_DEBUG0
), 1);
2878 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring
*ring
,
2879 struct amdgpu_ib
*ib
,
2880 unsigned vm_id
, bool ctx_switch
)
2882 u32 header
, control
= 0;
2884 if (ib
->flags
& AMDGPU_IB_FLAG_CE
)
2885 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
2887 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
2889 control
|= ib
->length_dw
| (vm_id
<< 24);
2891 if (amdgpu_sriov_vf(ring
->adev
) && (ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
))
2892 control
|= INDIRECT_BUFFER_PRE_ENB(1);
2894 amdgpu_ring_write(ring
, header
);
2895 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
2896 amdgpu_ring_write(ring
,
2900 lower_32_bits(ib
->gpu_addr
));
2901 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
2902 amdgpu_ring_write(ring
, control
);
2905 #define INDIRECT_BUFFER_VALID (1 << 23)
2907 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring
*ring
,
2908 struct amdgpu_ib
*ib
,
2909 unsigned vm_id
, bool ctx_switch
)
2911 u32 control
= INDIRECT_BUFFER_VALID
| ib
->length_dw
| (vm_id
<< 24);
2913 amdgpu_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
2914 BUG_ON(ib
->gpu_addr
& 0x3); /* Dword align */
2915 amdgpu_ring_write(ring
,
2919 lower_32_bits(ib
->gpu_addr
));
2920 amdgpu_ring_write(ring
, upper_32_bits(ib
->gpu_addr
));
2921 amdgpu_ring_write(ring
, control
);
2924 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
,
2925 u64 seq
, unsigned flags
)
2927 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
2928 bool int_sel
= flags
& AMDGPU_FENCE_FLAG_INT
;
2930 /* RELEASE_MEM - flush caches, send int */
2931 amdgpu_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 6));
2932 amdgpu_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
2934 EOP_TC_WB_ACTION_EN
|
2935 EOP_TC_MD_ACTION_EN
|
2936 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
2938 amdgpu_ring_write(ring
, DATA_SEL(write64bit
? 2 : 1) | INT_SEL(int_sel
? 2 : 0));
2941 * the address should be Qword aligned if 64bit write, Dword
2942 * aligned if only send 32bit data low (discard data high)
2948 amdgpu_ring_write(ring
, lower_32_bits(addr
));
2949 amdgpu_ring_write(ring
, upper_32_bits(addr
));
2950 amdgpu_ring_write(ring
, lower_32_bits(seq
));
2951 amdgpu_ring_write(ring
, upper_32_bits(seq
));
2952 amdgpu_ring_write(ring
, 0);
2955 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
2957 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
2958 uint32_t seq
= ring
->fence_drv
.sync_seq
;
2959 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
2961 gfx_v9_0_wait_reg_mem(ring
, usepfp
, 1, 0,
2962 lower_32_bits(addr
), upper_32_bits(addr
),
2963 seq
, 0xffffffff, 4);
2966 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
2967 unsigned vm_id
, uint64_t pd_addr
)
2969 struct amdgpu_vmhub
*hub
= &ring
->adev
->vmhub
[ring
->funcs
->vmhub
];
2970 int usepfp
= (ring
->funcs
->type
== AMDGPU_RING_TYPE_GFX
);
2971 uint32_t req
= ring
->adev
->gart
.gart_funcs
->get_invalidate_req(vm_id
);
2972 unsigned eng
= ring
->vm_inv_eng
;
2974 pd_addr
= pd_addr
| 0x1; /* valid bit */
2975 /* now only use physical base address of PDE and valid */
2976 BUG_ON(pd_addr
& 0xFFFF00000000003EULL
);
2978 gfx_v9_0_write_data_to_reg(ring
, usepfp
, true,
2979 hub
->ctx0_ptb_addr_lo32
+ (2 * vm_id
),
2980 lower_32_bits(pd_addr
));
2982 gfx_v9_0_write_data_to_reg(ring
, usepfp
, true,
2983 hub
->ctx0_ptb_addr_hi32
+ (2 * vm_id
),
2984 upper_32_bits(pd_addr
));
2986 gfx_v9_0_write_data_to_reg(ring
, usepfp
, true,
2987 hub
->vm_inv_eng0_req
+ eng
, req
);
2989 /* wait for the invalidate to complete */
2990 gfx_v9_0_wait_reg_mem(ring
, 0, 0, 0, hub
->vm_inv_eng0_ack
+
2991 eng
, 0, 1 << vm_id
, 1 << vm_id
, 0x20);
2993 /* compute doesn't have PFP */
2995 /* sync PFP to ME, otherwise we might get invalid PFP reads */
2996 amdgpu_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
2997 amdgpu_ring_write(ring
, 0x0);
3001 static u64
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring
*ring
)
3003 return ring
->adev
->wb
.wb
[ring
->rptr_offs
]; /* gfx9 hardware is 32bit rptr */
3006 static u64
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring
*ring
)
3010 /* XXX check if swapping is necessary on BE */
3011 if (ring
->use_doorbell
)
3012 wptr
= atomic64_read((atomic64_t
*)&ring
->adev
->wb
.wb
[ring
->wptr_offs
]);
3018 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring
*ring
)
3020 struct amdgpu_device
*adev
= ring
->adev
;
3022 /* XXX check if swapping is necessary on BE */
3023 if (ring
->use_doorbell
) {
3024 atomic64_set((atomic64_t
*)&adev
->wb
.wb
[ring
->wptr_offs
], ring
->wptr
);
3025 WDOORBELL64(ring
->doorbell_index
, ring
->wptr
);
3027 BUG(); /* only DOORBELL method supported on gfx9 now */
3031 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring
*ring
, u64 addr
,
3032 u64 seq
, unsigned int flags
)
3034 /* we only allocate 32bit for each seq wb address */
3035 BUG_ON(flags
& AMDGPU_FENCE_FLAG_64BIT
);
3037 /* write fence seq to the "addr" */
3038 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3039 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3040 WRITE_DATA_DST_SEL(5) | WR_CONFIRM
));
3041 amdgpu_ring_write(ring
, lower_32_bits(addr
));
3042 amdgpu_ring_write(ring
, upper_32_bits(addr
));
3043 amdgpu_ring_write(ring
, lower_32_bits(seq
));
3045 if (flags
& AMDGPU_FENCE_FLAG_INT
) {
3046 /* set register to trigger INT */
3047 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3048 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3049 WRITE_DATA_DST_SEL(0) | WR_CONFIRM
));
3050 amdgpu_ring_write(ring
, SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_STATUS
));
3051 amdgpu_ring_write(ring
, 0);
3052 amdgpu_ring_write(ring
, 0x20000000); /* src_id is 178 */
3056 static void gfx_v9_ring_emit_sb(struct amdgpu_ring
*ring
)
3058 amdgpu_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
3059 amdgpu_ring_write(ring
, 0);
3062 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring
*ring
)
3064 static struct v9_ce_ib_state ce_payload
= {0};
3068 cnt
= (sizeof(ce_payload
) >> 2) + 4 - 2;
3069 csa_addr
= AMDGPU_VA_RESERVED_SIZE
- 2 * 4096;
3071 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
3072 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(2) |
3073 WRITE_DATA_DST_SEL(8) |
3075 WRITE_DATA_CACHE_POLICY(0));
3076 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
3077 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, ce_payload
)));
3078 amdgpu_ring_write_multiple(ring
, (void *)&ce_payload
, sizeof(ce_payload
) >> 2);
3081 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring
*ring
)
3083 static struct v9_de_ib_state de_payload
= {0};
3084 uint64_t csa_addr
, gds_addr
;
3087 csa_addr
= AMDGPU_VA_RESERVED_SIZE
- 2 * 4096;
3088 gds_addr
= csa_addr
+ 4096;
3089 de_payload
.gds_backup_addrlo
= lower_32_bits(gds_addr
);
3090 de_payload
.gds_backup_addrhi
= upper_32_bits(gds_addr
);
3092 cnt
= (sizeof(de_payload
) >> 2) + 4 - 2;
3093 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, cnt
));
3094 amdgpu_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(1) |
3095 WRITE_DATA_DST_SEL(8) |
3097 WRITE_DATA_CACHE_POLICY(0));
3098 amdgpu_ring_write(ring
, lower_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
3099 amdgpu_ring_write(ring
, upper_32_bits(csa_addr
+ offsetof(struct v9_gfx_meta_data
, de_payload
)));
3100 amdgpu_ring_write_multiple(ring
, (void *)&de_payload
, sizeof(de_payload
) >> 2);
3103 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring
*ring
, uint32_t flags
)
3107 if (amdgpu_sriov_vf(ring
->adev
))
3108 gfx_v9_0_ring_emit_ce_meta(ring
);
3110 dw2
|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
3111 if (flags
& AMDGPU_HAVE_CTX_SWITCH
) {
3112 /* set load_global_config & load_global_uconfig */
3114 /* set load_cs_sh_regs */
3116 /* set load_per_context_state & load_gfx_sh_regs for GFX */
3119 /* set load_ce_ram if preamble presented */
3120 if (AMDGPU_PREAMBLE_IB_PRESENT
& flags
)
3123 /* still load_ce_ram if this is the first time preamble presented
3124 * although there is no context switch happens.
3126 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST
& flags
)
3130 amdgpu_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
3131 amdgpu_ring_write(ring
, dw2
);
3132 amdgpu_ring_write(ring
, 0);
3134 if (amdgpu_sriov_vf(ring
->adev
))
3135 gfx_v9_0_ring_emit_de_meta(ring
);
3138 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring
*ring
)
3141 amdgpu_ring_write(ring
, PACKET3(PACKET3_COND_EXEC
, 3));
3142 amdgpu_ring_write(ring
, lower_32_bits(ring
->cond_exe_gpu_addr
));
3143 amdgpu_ring_write(ring
, upper_32_bits(ring
->cond_exe_gpu_addr
));
3144 amdgpu_ring_write(ring
, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
3145 ret
= ring
->wptr
& ring
->buf_mask
;
3146 amdgpu_ring_write(ring
, 0x55aa55aa); /* patch dummy value later */
3150 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring
*ring
, unsigned offset
)
3153 BUG_ON(offset
> ring
->buf_mask
);
3154 BUG_ON(ring
->ring
[offset
] != 0x55aa55aa);
3156 cur
= (ring
->wptr
& ring
->buf_mask
) - 1;
3157 if (likely(cur
> offset
))
3158 ring
->ring
[offset
] = cur
- offset
;
3160 ring
->ring
[offset
] = (ring
->ring_size
>>2) - offset
+ cur
;
3163 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring
*ring
, uint32_t reg
)
3165 struct amdgpu_device
*adev
= ring
->adev
;
3167 amdgpu_ring_write(ring
, PACKET3(PACKET3_COPY_DATA
, 4));
3168 amdgpu_ring_write(ring
, 0 | /* src: register*/
3169 (5 << 8) | /* dst: memory */
3170 (1 << 20)); /* write confirm */
3171 amdgpu_ring_write(ring
, reg
);
3172 amdgpu_ring_write(ring
, 0);
3173 amdgpu_ring_write(ring
, lower_32_bits(adev
->wb
.gpu_addr
+
3174 adev
->virt
.reg_val_offs
* 4));
3175 amdgpu_ring_write(ring
, upper_32_bits(adev
->wb
.gpu_addr
+
3176 adev
->virt
.reg_val_offs
* 4));
3179 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring
*ring
, uint32_t reg
,
3182 amdgpu_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3183 amdgpu_ring_write(ring
, (1 << 16)); /* no inc addr */
3184 amdgpu_ring_write(ring
, reg
);
3185 amdgpu_ring_write(ring
, 0);
3186 amdgpu_ring_write(ring
, val
);
3189 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev
,
3190 enum amdgpu_interrupt_state state
)
3193 case AMDGPU_IRQ_STATE_DISABLE
:
3194 case AMDGPU_IRQ_STATE_ENABLE
:
3195 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
3196 TIME_STAMP_INT_ENABLE
,
3197 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
3204 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device
*adev
,
3206 enum amdgpu_interrupt_state state
)
3208 u32 mec_int_cntl
, mec_int_cntl_reg
;
3211 * amdgpu controls only pipe 0 of MEC1. That's why this function only
3212 * handles the setting of interrupts for this specific pipe. All other
3213 * pipes' interrupts are set by amdkfd.
3219 mec_int_cntl_reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
3222 DRM_DEBUG("invalid pipe %d\n", pipe
);
3226 DRM_DEBUG("invalid me %d\n", me
);
3231 case AMDGPU_IRQ_STATE_DISABLE
:
3232 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
3233 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
3234 TIME_STAMP_INT_ENABLE
, 0);
3235 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
3237 case AMDGPU_IRQ_STATE_ENABLE
:
3238 mec_int_cntl
= RREG32(mec_int_cntl_reg
);
3239 mec_int_cntl
= REG_SET_FIELD(mec_int_cntl
, CP_ME1_PIPE0_INT_CNTL
,
3240 TIME_STAMP_INT_ENABLE
, 1);
3241 WREG32(mec_int_cntl_reg
, mec_int_cntl
);
3248 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device
*adev
,
3249 struct amdgpu_irq_src
*source
,
3251 enum amdgpu_interrupt_state state
)
3254 case AMDGPU_IRQ_STATE_DISABLE
:
3255 case AMDGPU_IRQ_STATE_ENABLE
:
3256 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
3257 PRIV_REG_INT_ENABLE
,
3258 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
3267 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device
*adev
,
3268 struct amdgpu_irq_src
*source
,
3270 enum amdgpu_interrupt_state state
)
3273 case AMDGPU_IRQ_STATE_DISABLE
:
3274 case AMDGPU_IRQ_STATE_ENABLE
:
3275 WREG32_FIELD15(GC
, 0, CP_INT_CNTL_RING0
,
3276 PRIV_INSTR_INT_ENABLE
,
3277 state
== AMDGPU_IRQ_STATE_ENABLE
? 1 : 0);
3285 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device
*adev
,
3286 struct amdgpu_irq_src
*src
,
3288 enum amdgpu_interrupt_state state
)
3291 case AMDGPU_CP_IRQ_GFX_EOP
:
3292 gfx_v9_0_set_gfx_eop_interrupt_state(adev
, state
);
3294 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
:
3295 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 0, state
);
3297 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP
:
3298 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 1, state
);
3300 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP
:
3301 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 2, state
);
3303 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP
:
3304 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 1, 3, state
);
3306 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP
:
3307 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 0, state
);
3309 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP
:
3310 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 1, state
);
3312 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP
:
3313 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 2, state
);
3315 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP
:
3316 gfx_v9_0_set_compute_eop_interrupt_state(adev
, 2, 3, state
);
3324 static int gfx_v9_0_eop_irq(struct amdgpu_device
*adev
,
3325 struct amdgpu_irq_src
*source
,
3326 struct amdgpu_iv_entry
*entry
)
3329 u8 me_id
, pipe_id
, queue_id
;
3330 struct amdgpu_ring
*ring
;
3332 DRM_DEBUG("IH: CP EOP\n");
3333 me_id
= (entry
->ring_id
& 0x0c) >> 2;
3334 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
3335 queue_id
= (entry
->ring_id
& 0x70) >> 4;
3339 amdgpu_fence_process(&adev
->gfx
.gfx_ring
[0]);
3343 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
3344 ring
= &adev
->gfx
.compute_ring
[i
];
3345 /* Per-queue interrupt is supported for MEC starting from VI.
3346 * The interrupt can only be enabled/disabled per pipe instead of per queue.
3348 if ((ring
->me
== me_id
) && (ring
->pipe
== pipe_id
) && (ring
->queue
== queue_id
))
3349 amdgpu_fence_process(ring
);
3356 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device
*adev
,
3357 struct amdgpu_irq_src
*source
,
3358 struct amdgpu_iv_entry
*entry
)
3360 DRM_ERROR("Illegal register access in command stream\n");
3361 schedule_work(&adev
->reset_work
);
3365 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device
*adev
,
3366 struct amdgpu_irq_src
*source
,
3367 struct amdgpu_iv_entry
*entry
)
3369 DRM_ERROR("Illegal instruction in command stream\n");
3370 schedule_work(&adev
->reset_work
);
3374 static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device
*adev
,
3375 struct amdgpu_irq_src
*src
,
3377 enum amdgpu_interrupt_state state
)
3379 uint32_t tmp
, target
;
3380 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
3383 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME1_PIPE0_INT_CNTL
);
3385 target
= SOC15_REG_OFFSET(GC
, 0, mmCP_ME2_PIPE0_INT_CNTL
);
3386 target
+= ring
->pipe
;
3389 case AMDGPU_CP_KIQ_IRQ_DRIVER0
:
3390 if (state
== AMDGPU_IRQ_STATE_DISABLE
) {
3391 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
3392 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
3393 GENERIC2_INT_ENABLE
, 0);
3394 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
3396 tmp
= RREG32(target
);
3397 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
3398 GENERIC2_INT_ENABLE
, 0);
3399 WREG32(target
, tmp
);
3401 tmp
= RREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
);
3402 tmp
= REG_SET_FIELD(tmp
, CPC_INT_CNTL
,
3403 GENERIC2_INT_ENABLE
, 1);
3404 WREG32_SOC15(GC
, 0, mmCPC_INT_CNTL
, tmp
);
3406 tmp
= RREG32(target
);
3407 tmp
= REG_SET_FIELD(tmp
, CP_ME2_PIPE0_INT_CNTL
,
3408 GENERIC2_INT_ENABLE
, 1);
3409 WREG32(target
, tmp
);
3413 BUG(); /* kiq only support GENERIC2_INT now */
3419 static int gfx_v9_0_kiq_irq(struct amdgpu_device
*adev
,
3420 struct amdgpu_irq_src
*source
,
3421 struct amdgpu_iv_entry
*entry
)
3423 u8 me_id
, pipe_id
, queue_id
;
3424 struct amdgpu_ring
*ring
= &(adev
->gfx
.kiq
.ring
);
3426 me_id
= (entry
->ring_id
& 0x0c) >> 2;
3427 pipe_id
= (entry
->ring_id
& 0x03) >> 0;
3428 queue_id
= (entry
->ring_id
& 0x70) >> 4;
3429 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
3430 me_id
, pipe_id
, queue_id
);
3432 amdgpu_fence_process(ring
);
3436 const struct amd_ip_funcs gfx_v9_0_ip_funcs
= {
3438 .early_init
= gfx_v9_0_early_init
,
3439 .late_init
= gfx_v9_0_late_init
,
3440 .sw_init
= gfx_v9_0_sw_init
,
3441 .sw_fini
= gfx_v9_0_sw_fini
,
3442 .hw_init
= gfx_v9_0_hw_init
,
3443 .hw_fini
= gfx_v9_0_hw_fini
,
3444 .suspend
= gfx_v9_0_suspend
,
3445 .resume
= gfx_v9_0_resume
,
3446 .is_idle
= gfx_v9_0_is_idle
,
3447 .wait_for_idle
= gfx_v9_0_wait_for_idle
,
3448 .soft_reset
= gfx_v9_0_soft_reset
,
3449 .set_clockgating_state
= gfx_v9_0_set_clockgating_state
,
3450 .set_powergating_state
= gfx_v9_0_set_powergating_state
,
3451 .get_clockgating_state
= gfx_v9_0_get_clockgating_state
,
3454 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx
= {
3455 .type
= AMDGPU_RING_TYPE_GFX
,
3457 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
3458 .support_64bit_ptrs
= true,
3459 .vmhub
= AMDGPU_GFXHUB
,
3460 .get_rptr
= gfx_v9_0_ring_get_rptr_gfx
,
3461 .get_wptr
= gfx_v9_0_ring_get_wptr_gfx
,
3462 .set_wptr
= gfx_v9_0_ring_set_wptr_gfx
,
3463 .emit_frame_size
= /* totally 242 maximum if 16 IBs */
3465 7 + /* PIPELINE_SYNC */
3467 8 + /* FENCE for VM_FLUSH */
3468 20 + /* GDS switch */
3469 4 + /* double SWITCH_BUFFER,
3470 the first COND_EXEC jump to the place just
3471 prior to this double SWITCH_BUFFER */
3479 8 + 8 + /* FENCE x2 */
3480 2, /* SWITCH_BUFFER */
3481 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_gfx */
3482 .emit_ib
= gfx_v9_0_ring_emit_ib_gfx
,
3483 .emit_fence
= gfx_v9_0_ring_emit_fence
,
3484 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
3485 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
3486 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
3487 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
3488 .emit_hdp_invalidate
= gfx_v9_0_ring_emit_hdp_invalidate
,
3489 .test_ring
= gfx_v9_0_ring_test_ring
,
3490 .test_ib
= gfx_v9_0_ring_test_ib
,
3491 .insert_nop
= amdgpu_ring_insert_nop
,
3492 .pad_ib
= amdgpu_ring_generic_pad_ib
,
3493 .emit_switch_buffer
= gfx_v9_ring_emit_sb
,
3494 .emit_cntxcntl
= gfx_v9_ring_emit_cntxcntl
,
3495 .init_cond_exec
= gfx_v9_0_ring_emit_init_cond_exec
,
3496 .patch_cond_exec
= gfx_v9_0_ring_emit_patch_cond_exec
,
3499 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute
= {
3500 .type
= AMDGPU_RING_TYPE_COMPUTE
,
3502 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
3503 .support_64bit_ptrs
= true,
3504 .vmhub
= AMDGPU_GFXHUB
,
3505 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
3506 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
3507 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
3509 20 + /* gfx_v9_0_ring_emit_gds_switch */
3510 7 + /* gfx_v9_0_ring_emit_hdp_flush */
3511 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
3512 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
3513 24 + /* gfx_v9_0_ring_emit_vm_flush */
3514 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
3515 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_compute */
3516 .emit_ib
= gfx_v9_0_ring_emit_ib_compute
,
3517 .emit_fence
= gfx_v9_0_ring_emit_fence
,
3518 .emit_pipeline_sync
= gfx_v9_0_ring_emit_pipeline_sync
,
3519 .emit_vm_flush
= gfx_v9_0_ring_emit_vm_flush
,
3520 .emit_gds_switch
= gfx_v9_0_ring_emit_gds_switch
,
3521 .emit_hdp_flush
= gfx_v9_0_ring_emit_hdp_flush
,
3522 .emit_hdp_invalidate
= gfx_v9_0_ring_emit_hdp_invalidate
,
3523 .test_ring
= gfx_v9_0_ring_test_ring
,
3524 .test_ib
= gfx_v9_0_ring_test_ib
,
3525 .insert_nop
= amdgpu_ring_insert_nop
,
3526 .pad_ib
= amdgpu_ring_generic_pad_ib
,
3529 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq
= {
3530 .type
= AMDGPU_RING_TYPE_KIQ
,
3532 .nop
= PACKET3(PACKET3_NOP
, 0x3FFF),
3533 .support_64bit_ptrs
= true,
3534 .vmhub
= AMDGPU_GFXHUB
,
3535 .get_rptr
= gfx_v9_0_ring_get_rptr_compute
,
3536 .get_wptr
= gfx_v9_0_ring_get_wptr_compute
,
3537 .set_wptr
= gfx_v9_0_ring_set_wptr_compute
,
3539 20 + /* gfx_v9_0_ring_emit_gds_switch */
3540 7 + /* gfx_v9_0_ring_emit_hdp_flush */
3541 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
3542 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
3543 24 + /* gfx_v9_0_ring_emit_vm_flush */
3544 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
3545 .emit_ib_size
= 4, /* gfx_v9_0_ring_emit_ib_compute */
3546 .emit_ib
= gfx_v9_0_ring_emit_ib_compute
,
3547 .emit_fence
= gfx_v9_0_ring_emit_fence_kiq
,
3548 .test_ring
= gfx_v9_0_ring_test_ring
,
3549 .test_ib
= gfx_v9_0_ring_test_ib
,
3550 .insert_nop
= amdgpu_ring_insert_nop
,
3551 .pad_ib
= amdgpu_ring_generic_pad_ib
,
3552 .emit_rreg
= gfx_v9_0_ring_emit_rreg
,
3553 .emit_wreg
= gfx_v9_0_ring_emit_wreg
,
3556 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device
*adev
)
3560 adev
->gfx
.kiq
.ring
.funcs
= &gfx_v9_0_ring_funcs_kiq
;
3562 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++)
3563 adev
->gfx
.gfx_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_gfx
;
3565 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++)
3566 adev
->gfx
.compute_ring
[i
].funcs
= &gfx_v9_0_ring_funcs_compute
;
3569 static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs
= {
3570 .set
= gfx_v9_0_kiq_set_interrupt_state
,
3571 .process
= gfx_v9_0_kiq_irq
,
3574 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs
= {
3575 .set
= gfx_v9_0_set_eop_interrupt_state
,
3576 .process
= gfx_v9_0_eop_irq
,
3579 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs
= {
3580 .set
= gfx_v9_0_set_priv_reg_fault_state
,
3581 .process
= gfx_v9_0_priv_reg_irq
,
3584 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs
= {
3585 .set
= gfx_v9_0_set_priv_inst_fault_state
,
3586 .process
= gfx_v9_0_priv_inst_irq
,
3589 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device
*adev
)
3591 adev
->gfx
.eop_irq
.num_types
= AMDGPU_CP_IRQ_LAST
;
3592 adev
->gfx
.eop_irq
.funcs
= &gfx_v9_0_eop_irq_funcs
;
3594 adev
->gfx
.priv_reg_irq
.num_types
= 1;
3595 adev
->gfx
.priv_reg_irq
.funcs
= &gfx_v9_0_priv_reg_irq_funcs
;
3597 adev
->gfx
.priv_inst_irq
.num_types
= 1;
3598 adev
->gfx
.priv_inst_irq
.funcs
= &gfx_v9_0_priv_inst_irq_funcs
;
3600 adev
->gfx
.kiq
.irq
.num_types
= AMDGPU_CP_KIQ_IRQ_LAST
;
3601 adev
->gfx
.kiq
.irq
.funcs
= &gfx_v9_0_kiq_irq_funcs
;
3604 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device
*adev
)
3606 switch (adev
->asic_type
) {
3608 adev
->gfx
.rlc
.funcs
= &gfx_v9_0_rlc_funcs
;
3615 static void gfx_v9_0_set_gds_init(struct amdgpu_device
*adev
)
3617 /* init asci gds info */
3618 adev
->gds
.mem
.total_size
= RREG32_SOC15(GC
, 0, mmGDS_VMID0_SIZE
);
3619 adev
->gds
.gws
.total_size
= 64;
3620 adev
->gds
.oa
.total_size
= 16;
3622 if (adev
->gds
.mem
.total_size
== 64 * 1024) {
3623 adev
->gds
.mem
.gfx_partition_size
= 4096;
3624 adev
->gds
.mem
.cs_partition_size
= 4096;
3626 adev
->gds
.gws
.gfx_partition_size
= 4;
3627 adev
->gds
.gws
.cs_partition_size
= 4;
3629 adev
->gds
.oa
.gfx_partition_size
= 4;
3630 adev
->gds
.oa
.cs_partition_size
= 1;
3632 adev
->gds
.mem
.gfx_partition_size
= 1024;
3633 adev
->gds
.mem
.cs_partition_size
= 1024;
3635 adev
->gds
.gws
.gfx_partition_size
= 16;
3636 adev
->gds
.gws
.cs_partition_size
= 16;
3638 adev
->gds
.oa
.gfx_partition_size
= 4;
3639 adev
->gds
.oa
.cs_partition_size
= 4;
3643 static u32
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device
*adev
)
3647 data
= RREG32_SOC15(GC
, 0, mmCC_GC_SHADER_ARRAY_CONFIG
);
3648 data
|= RREG32_SOC15(GC
, 0, mmGC_USER_SHADER_ARRAY_CONFIG
);
3650 data
&= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK
;
3651 data
>>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT
;
3653 mask
= gfx_v9_0_create_bitmask(adev
->gfx
.config
.max_cu_per_sh
);
3655 return (~data
) & mask
;
3658 static int gfx_v9_0_get_cu_info(struct amdgpu_device
*adev
,
3659 struct amdgpu_cu_info
*cu_info
)
3661 int i
, j
, k
, counter
, active_cu_number
= 0;
3662 u32 mask
, bitmap
, ao_bitmap
, ao_cu_mask
= 0;
3664 if (!adev
|| !cu_info
)
3667 memset(cu_info
, 0, sizeof(*cu_info
));
3669 mutex_lock(&adev
->grbm_idx_mutex
);
3670 for (i
= 0; i
< adev
->gfx
.config
.max_shader_engines
; i
++) {
3671 for (j
= 0; j
< adev
->gfx
.config
.max_sh_per_se
; j
++) {
3675 gfx_v9_0_select_se_sh(adev
, i
, j
, 0xffffffff);
3676 bitmap
= gfx_v9_0_get_cu_active_bitmap(adev
);
3677 cu_info
->bitmap
[i
][j
] = bitmap
;
3679 for (k
= 0; k
< 16; k
++) {
3680 if (bitmap
& mask
) {
3687 active_cu_number
+= counter
;
3688 ao_cu_mask
|= (ao_bitmap
<< (i
* 16 + j
* 8));
3691 gfx_v9_0_select_se_sh(adev
, 0xffffffff, 0xffffffff, 0xffffffff);
3692 mutex_unlock(&adev
->grbm_idx_mutex
);
3694 cu_info
->number
= active_cu_number
;
3695 cu_info
->ao_cu_mask
= ao_cu_mask
;
3700 static int gfx_v9_0_init_queue(struct amdgpu_ring
*ring
)
3704 bool use_doorbell
= true;
3711 struct amdgpu_device
*adev
;
3714 if (ring
->mqd_obj
== NULL
) {
3715 r
= amdgpu_bo_create(adev
,
3716 sizeof(struct v9_mqd
),
3718 AMDGPU_GEM_DOMAIN_GTT
, 0, NULL
,
3719 NULL
, &ring
->mqd_obj
);
3721 dev_warn(adev
->dev
, "(%d) create MQD bo failed\n", r
);
3726 r
= amdgpu_bo_reserve(ring
->mqd_obj
, false);
3727 if (unlikely(r
!= 0)) {
3728 gfx_v9_0_cp_compute_fini(adev
);
3732 r
= amdgpu_bo_pin(ring
->mqd_obj
, AMDGPU_GEM_DOMAIN_GTT
,
3735 dev_warn(adev
->dev
, "(%d) pin MQD bo failed\n", r
);
3736 gfx_v9_0_cp_compute_fini(adev
);
3739 r
= amdgpu_bo_kmap(ring
->mqd_obj
, (void **)&buf
);
3741 dev_warn(adev
->dev
, "(%d) map MQD bo failed\n", r
);
3742 gfx_v9_0_cp_compute_fini(adev
);
3746 /* init the mqd struct */
3747 memset(buf
, 0, sizeof(struct v9_mqd
));
3749 mqd
= (struct v9_mqd
*)buf
;
3750 mqd
->header
= 0xC0310800;
3751 mqd
->compute_pipelinestat_enable
= 0x00000001;
3752 mqd
->compute_static_thread_mgmt_se0
= 0xffffffff;
3753 mqd
->compute_static_thread_mgmt_se1
= 0xffffffff;
3754 mqd
->compute_static_thread_mgmt_se2
= 0xffffffff;
3755 mqd
->compute_static_thread_mgmt_se3
= 0xffffffff;
3756 mqd
->compute_misc_reserved
= 0x00000003;
3757 mutex_lock(&adev
->srbm_mutex
);
3758 soc15_grbm_select(adev
, ring
->me
,
3761 /* disable wptr polling */
3762 WREG32_FIELD15(GC
, 0, CP_PQ_WPTR_POLL_CNTL
, EN
, 0);
3764 /* write the EOP addr */
3765 BUG_ON(ring
->me
!= 1 || ring
->pipe
!= 0); /* can't handle other cases eop address */
3766 eop_gpu_addr
= adev
->gfx
.mec
.hpd_eop_gpu_addr
+ (ring
->queue
* MEC_HPD_SIZE
);
3769 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR
, lower_32_bits(eop_gpu_addr
));
3770 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_BASE_ADDR_HI
, upper_32_bits(eop_gpu_addr
));
3771 mqd
->cp_hqd_eop_base_addr_lo
= lower_32_bits(eop_gpu_addr
);
3772 mqd
->cp_hqd_eop_base_addr_hi
= upper_32_bits(eop_gpu_addr
);
3774 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3775 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
);
3776 tmp
= REG_SET_FIELD(tmp
, CP_HQD_EOP_CONTROL
, EOP_SIZE
,
3777 (order_base_2(MEC_HPD_SIZE
/ 4) - 1));
3778 WREG32_SOC15(GC
, 0, mmCP_HQD_EOP_CONTROL
, tmp
);
3780 /* enable doorbell? */
3781 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
3783 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 1);
3785 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 0);
3787 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
, tmp
);
3788 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3790 /* disable the queue if it's active */
3792 mqd
->cp_hqd_dequeue_request
= 0;
3793 mqd
->cp_hqd_pq_rptr
= 0;
3794 mqd
->cp_hqd_pq_wptr_lo
= 0;
3795 mqd
->cp_hqd_pq_wptr_hi
= 0;
3796 if (RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1) {
3797 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, 1);
3798 for (j
= 0; j
< adev
->usec_timeout
; j
++) {
3799 if (!(RREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
) & 1))
3803 WREG32_SOC15(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
, mqd
->cp_hqd_dequeue_request
);
3804 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR
, mqd
->cp_hqd_pq_rptr
);
3805 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
, mqd
->cp_hqd_pq_wptr_lo
);
3806 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
, mqd
->cp_hqd_pq_wptr_hi
);
3809 /* set the pointer to the MQD */
3810 mqd
->cp_mqd_base_addr_lo
= mqd_gpu_addr
& 0xfffffffc;
3811 mqd
->cp_mqd_base_addr_hi
= upper_32_bits(mqd_gpu_addr
);
3812 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR
, mqd
->cp_mqd_base_addr_lo
);
3813 WREG32_SOC15(GC
, 0, mmCP_MQD_BASE_ADDR_HI
, mqd
->cp_mqd_base_addr_hi
);
3815 /* set MQD vmid to 0 */
3816 tmp
= RREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
);
3817 tmp
= REG_SET_FIELD(tmp
, CP_MQD_CONTROL
, VMID
, 0);
3818 WREG32_SOC15(GC
, 0, mmCP_MQD_CONTROL
, tmp
);
3819 mqd
->cp_mqd_control
= tmp
;
3821 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3822 hqd_gpu_addr
= ring
->gpu_addr
>> 8;
3823 mqd
->cp_hqd_pq_base_lo
= hqd_gpu_addr
;
3824 mqd
->cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
3825 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE
, mqd
->cp_hqd_pq_base_lo
);
3826 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_BASE_HI
, mqd
->cp_hqd_pq_base_hi
);
3828 /* set up the HQD, this is similar to CP_RB0_CNTL */
3829 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
);
3830 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, QUEUE_SIZE
,
3831 (order_base_2(ring
->ring_size
/ 4) - 1));
3832 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, RPTR_BLOCK_SIZE
,
3833 ((order_base_2(AMDGPU_GPU_PAGE_SIZE
/ 4) - 1) << 8));
3835 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ENDIAN_SWAP
, 1);
3837 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, UNORD_DISPATCH
, 0);
3838 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, ROQ_PQ_IB_FLIP
, 0);
3839 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, PRIV_STATE
, 1);
3840 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_CONTROL
, KMD_QUEUE
, 1);
3841 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_CONTROL
, tmp
);
3842 mqd
->cp_hqd_pq_control
= tmp
;
3844 /* set the wb address wether it's enabled or not */
3845 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
3846 mqd
->cp_hqd_pq_rptr_report_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3847 mqd
->cp_hqd_pq_rptr_report_addr_hi
=
3848 upper_32_bits(wb_gpu_addr
) & 0xffff;
3849 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR
,
3850 mqd
->cp_hqd_pq_rptr_report_addr_lo
);
3851 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
3852 mqd
->cp_hqd_pq_rptr_report_addr_hi
);
3854 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3855 wb_gpu_addr
= adev
->wb
.gpu_addr
+ (ring
->wptr_offs
* 4);
3856 mqd
->cp_hqd_pq_wptr_poll_addr_lo
= wb_gpu_addr
& 0xfffffffc;
3857 mqd
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3858 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
,
3859 mqd
->cp_hqd_pq_wptr_poll_addr_lo
);
3860 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
,
3861 mqd
->cp_hqd_pq_wptr_poll_addr_hi
);
3863 /* enable the doorbell if requested */
3865 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_LOWER
,
3866 (AMDGPU_DOORBELL64_KIQ
* 2) << 2);
3867 WREG32_SOC15(GC
, 0, mmCP_MEC_DOORBELL_RANGE_UPPER
,
3868 (AMDGPU_DOORBELL64_MEC_RING7
* 2) << 2);
3869 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
);
3870 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
,
3871 DOORBELL_OFFSET
, ring
->doorbell_index
);
3872 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 1);
3873 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_SOURCE
, 0);
3874 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_HIT
, 0);
3875 mqd
->cp_hqd_pq_doorbell_control
= tmp
;
3878 mqd
->cp_hqd_pq_doorbell_control
= 0;
3880 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
,
3881 mqd
->cp_hqd_pq_doorbell_control
);
3883 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3884 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_LO
, mqd
->cp_hqd_pq_wptr_lo
);
3885 WREG32_SOC15(GC
, 0, mmCP_HQD_PQ_WPTR_HI
, mqd
->cp_hqd_pq_wptr_hi
);
3887 /* set the vmid for the queue */
3888 mqd
->cp_hqd_vmid
= 0;
3889 WREG32_SOC15(GC
, 0, mmCP_HQD_VMID
, mqd
->cp_hqd_vmid
);
3891 tmp
= RREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
);
3892 tmp
= REG_SET_FIELD(tmp
, CP_HQD_PERSISTENT_STATE
, PRELOAD_SIZE
, 0x53);
3893 WREG32_SOC15(GC
, 0, mmCP_HQD_PERSISTENT_STATE
, tmp
);
3894 mqd
->cp_hqd_persistent_state
= tmp
;
3896 /* activate the queue */
3897 mqd
->cp_hqd_active
= 1;
3898 WREG32_SOC15(GC
, 0, mmCP_HQD_ACTIVE
, mqd
->cp_hqd_active
);
3900 soc15_grbm_select(adev
, 0, 0, 0, 0);
3901 mutex_unlock(&adev
->srbm_mutex
);
3903 amdgpu_bo_kunmap(ring
->mqd_obj
);
3904 amdgpu_bo_unreserve(ring
->mqd_obj
);
3907 WREG32_FIELD15(GC
, 0, CP_PQ_STATUS
, DOORBELL_ENABLE
, 1);
3912 const struct amdgpu_ip_block_version gfx_v9_0_ip_block
=
3914 .type
= AMD_IP_BLOCK_TYPE_GFX
,
3918 .funcs
= &gfx_v9_0_ip_funcs
,