2 * Copyright 2014-2018 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 #include <linux/mmu_context.h>
25 #include "amdgpu_amdkfd.h"
26 #include "gc/gc_9_0_offset.h"
27 #include "gc/gc_9_0_sh_mask.h"
28 #include "vega10_enum.h"
29 #include "sdma0/sdma0_4_0_offset.h"
30 #include "sdma0/sdma0_4_0_sh_mask.h"
31 #include "sdma1/sdma1_4_0_offset.h"
32 #include "sdma1/sdma1_4_0_sh_mask.h"
33 #include "athub/athub_1_0_offset.h"
34 #include "athub/athub_1_0_sh_mask.h"
35 #include "oss/osssys_4_0_offset.h"
36 #include "oss/osssys_4_0_sh_mask.h"
37 #include "soc15_common.h"
38 #include "v9_structs.h"
41 #include "mmhub_v1_0.h"
42 #include "gfxhub_v1_0.h"
45 enum hqd_dequeue_request_type
{
52 /* Because of REG_GET_FIELD() being used, we put this function in the
55 int kgd_gfx_v9_get_tile_config(struct kgd_dev
*kgd
,
56 struct tile_config
*config
)
58 struct amdgpu_device
*adev
= (struct amdgpu_device
*)kgd
;
60 config
->gb_addr_config
= adev
->gfx
.config
.gb_addr_config
;
62 config
->tile_config_ptr
= adev
->gfx
.config
.tile_mode_array
;
63 config
->num_tile_configs
=
64 ARRAY_SIZE(adev
->gfx
.config
.tile_mode_array
);
65 config
->macro_tile_config_ptr
=
66 adev
->gfx
.config
.macrotile_mode_array
;
67 config
->num_macro_tile_configs
=
68 ARRAY_SIZE(adev
->gfx
.config
.macrotile_mode_array
);
73 static inline struct amdgpu_device
*get_amdgpu_device(struct kgd_dev
*kgd
)
75 return (struct amdgpu_device
*)kgd
;
78 static void lock_srbm(struct kgd_dev
*kgd
, uint32_t mec
, uint32_t pipe
,
79 uint32_t queue
, uint32_t vmid
)
81 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
83 mutex_lock(&adev
->srbm_mutex
);
84 soc15_grbm_select(adev
, mec
, pipe
, queue
, vmid
);
87 static void unlock_srbm(struct kgd_dev
*kgd
)
89 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
91 soc15_grbm_select(adev
, 0, 0, 0, 0);
92 mutex_unlock(&adev
->srbm_mutex
);
95 static void acquire_queue(struct kgd_dev
*kgd
, uint32_t pipe_id
,
98 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
100 uint32_t mec
= (pipe_id
/ adev
->gfx
.mec
.num_pipe_per_mec
) + 1;
101 uint32_t pipe
= (pipe_id
% adev
->gfx
.mec
.num_pipe_per_mec
);
103 lock_srbm(kgd
, mec
, pipe
, queue_id
, 0);
106 static uint64_t get_queue_mask(struct amdgpu_device
*adev
,
107 uint32_t pipe_id
, uint32_t queue_id
)
109 unsigned int bit
= pipe_id
* adev
->gfx
.mec
.num_queue_per_pipe
+
115 static void release_queue(struct kgd_dev
*kgd
)
120 void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev
*kgd
, uint32_t vmid
,
121 uint32_t sh_mem_config
,
122 uint32_t sh_mem_ape1_base
,
123 uint32_t sh_mem_ape1_limit
,
124 uint32_t sh_mem_bases
)
126 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
128 lock_srbm(kgd
, 0, 0, 0, vmid
);
130 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmSH_MEM_CONFIG
), sh_mem_config
);
131 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmSH_MEM_BASES
), sh_mem_bases
);
132 /* APE1 no longer exists on GFX9 */
137 int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev
*kgd
, unsigned int pasid
,
140 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
143 * We have to assume that there is no outstanding mapping.
144 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
145 * a mapping is in progress or because a mapping finished
146 * and the SW cleared it.
147 * So the protocol is to always wait & clear.
149 uint32_t pasid_mapping
= (pasid
== 0) ? 0 : (uint32_t)pasid
|
150 ATC_VMID0_PASID_MAPPING__VALID_MASK
;
153 * need to do this twice, once for gfx and once for mmhub
154 * for ATC add 16 to VMID for mmhub, for IH different registers.
155 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
158 WREG32(SOC15_REG_OFFSET(ATHUB
, 0, mmATC_VMID0_PASID_MAPPING
) + vmid
,
161 while (!(RREG32(SOC15_REG_OFFSET(
163 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS
)) &
167 WREG32(SOC15_REG_OFFSET(ATHUB
, 0,
168 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS
),
171 /* Mapping vmid to pasid also for IH block */
172 WREG32(SOC15_REG_OFFSET(OSSSYS
, 0, mmIH_VMID_0_LUT
) + vmid
,
175 WREG32(SOC15_REG_OFFSET(ATHUB
, 0, mmATC_VMID16_PASID_MAPPING
) + vmid
,
178 while (!(RREG32(SOC15_REG_OFFSET(
180 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS
)) &
181 (1U << (vmid
+ 16))))
184 WREG32(SOC15_REG_OFFSET(ATHUB
, 0,
185 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS
),
188 /* Mapping vmid to pasid also for IH block */
189 WREG32(SOC15_REG_OFFSET(OSSSYS
, 0, mmIH_VMID_0_LUT_MM
) + vmid
,
194 /* TODO - RING0 form of field is obsolete, seems to date back to SI
198 int kgd_gfx_v9_init_interrupts(struct kgd_dev
*kgd
, uint32_t pipe_id
)
200 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
204 mec
= (pipe_id
/ adev
->gfx
.mec
.num_pipe_per_mec
) + 1;
205 pipe
= (pipe_id
% adev
->gfx
.mec
.num_pipe_per_mec
);
207 lock_srbm(kgd
, mec
, pipe
, 0, 0);
209 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCPC_INT_CNTL
),
210 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK
|
211 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK
);
218 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device
*adev
,
219 unsigned int engine_id
,
220 unsigned int queue_id
)
222 uint32_t sdma_engine_reg_base
[2] = {
223 SOC15_REG_OFFSET(SDMA0
, 0,
224 mmSDMA0_RLC0_RB_CNTL
) - mmSDMA0_RLC0_RB_CNTL
,
225 SOC15_REG_OFFSET(SDMA1
, 0,
226 mmSDMA1_RLC0_RB_CNTL
) - mmSDMA1_RLC0_RB_CNTL
228 uint32_t retval
= sdma_engine_reg_base
[engine_id
]
229 + queue_id
* (mmSDMA0_RLC1_RB_CNTL
- mmSDMA0_RLC0_RB_CNTL
);
231 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id
,
237 static inline struct v9_mqd
*get_mqd(void *mqd
)
239 return (struct v9_mqd
*)mqd
;
242 static inline struct v9_sdma_mqd
*get_sdma_mqd(void *mqd
)
244 return (struct v9_sdma_mqd
*)mqd
;
247 int kgd_gfx_v9_hqd_load(struct kgd_dev
*kgd
, void *mqd
, uint32_t pipe_id
,
248 uint32_t queue_id
, uint32_t __user
*wptr
,
249 uint32_t wptr_shift
, uint32_t wptr_mask
,
250 struct mm_struct
*mm
)
252 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
255 uint32_t reg
, hqd_base
, data
;
259 acquire_queue(kgd
, pipe_id
, queue_id
);
261 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
262 mqd_hqd
= &m
->cp_mqd_base_addr_lo
;
263 hqd_base
= SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_BASE_ADDR
);
266 reg
<= SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_HI
); reg
++)
267 WREG32_RLC(reg
, mqd_hqd
[reg
- hqd_base
]);
270 /* Activate doorbell logic before triggering WPTR poll. */
271 data
= REG_SET_FIELD(m
->cp_hqd_pq_doorbell_control
,
272 CP_HQD_PQ_DOORBELL_CONTROL
, DOORBELL_EN
, 1);
273 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_DOORBELL_CONTROL
), data
);
276 /* Don't read wptr with get_user because the user
277 * context may not be accessible (if this function
278 * runs in a work queue). Instead trigger a one-shot
279 * polling read from memory in the CP. This assumes
280 * that wptr is GPU-accessible in the queue's VMID via
281 * ATC or SVM. WPTR==RPTR before starting the poll so
282 * the CP starts fetching new commands from the right
285 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
286 * tricky. Assume that the queue didn't overflow. The
287 * number of valid bits in the 32-bit RPTR depends on
288 * the queue size. The remaining bits are taken from
289 * the saved 64-bit WPTR. If the WPTR wrapped, add the
292 uint32_t queue_size
=
293 2 << REG_GET_FIELD(m
->cp_hqd_pq_control
,
294 CP_HQD_PQ_CONTROL
, QUEUE_SIZE
);
295 uint64_t guessed_wptr
= m
->cp_hqd_pq_rptr
& (queue_size
- 1);
297 if ((m
->cp_hqd_pq_wptr_lo
& (queue_size
- 1)) < guessed_wptr
)
298 guessed_wptr
+= queue_size
;
299 guessed_wptr
+= m
->cp_hqd_pq_wptr_lo
& ~(queue_size
- 1);
300 guessed_wptr
+= (uint64_t)m
->cp_hqd_pq_wptr_hi
<< 32;
302 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_LO
),
303 lower_32_bits(guessed_wptr
));
304 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_HI
),
305 upper_32_bits(guessed_wptr
));
306 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR
),
307 lower_32_bits((uintptr_t)wptr
));
308 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
),
309 upper_32_bits((uintptr_t)wptr
));
310 WREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_PQ_WPTR_POLL_CNTL1
),
311 (uint32_t)get_queue_mask(adev
, pipe_id
, queue_id
));
314 /* Start the EOP fetcher */
315 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_EOP_RPTR
),
316 REG_SET_FIELD(m
->cp_hqd_eop_rptr
,
317 CP_HQD_EOP_RPTR
, INIT_FETCHER
, 1));
319 data
= REG_SET_FIELD(m
->cp_hqd_active
, CP_HQD_ACTIVE
, ACTIVE
, 1);
320 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
), data
);
327 int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev
*kgd
, void *mqd
,
328 uint32_t pipe_id
, uint32_t queue_id
,
329 uint32_t doorbell_off
)
331 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
332 struct amdgpu_ring
*kiq_ring
= &adev
->gfx
.kiq
.ring
;
339 acquire_queue(kgd
, pipe_id
, queue_id
);
341 mec
= (pipe_id
/ adev
->gfx
.mec
.num_pipe_per_mec
) + 1;
342 pipe
= (pipe_id
% adev
->gfx
.mec
.num_pipe_per_mec
);
344 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
345 mec
, pipe
, queue_id
);
347 spin_lock(&adev
->gfx
.kiq
.ring_lock
);
348 r
= amdgpu_ring_alloc(kiq_ring
, 7);
350 pr_err("Failed to alloc KIQ (%d).\n", r
);
354 amdgpu_ring_write(kiq_ring
, PACKET3(PACKET3_MAP_QUEUES
, 5));
355 amdgpu_ring_write(kiq_ring
,
356 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
357 PACKET3_MAP_QUEUES_VMID(m
->cp_hqd_vmid
) | /* VMID */
358 PACKET3_MAP_QUEUES_QUEUE(queue_id
) |
359 PACKET3_MAP_QUEUES_PIPE(pipe
) |
360 PACKET3_MAP_QUEUES_ME((mec
- 1)) |
361 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
362 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
363 PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
364 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
365 amdgpu_ring_write(kiq_ring
,
366 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off
));
367 amdgpu_ring_write(kiq_ring
, m
->cp_mqd_base_addr_lo
);
368 amdgpu_ring_write(kiq_ring
, m
->cp_mqd_base_addr_hi
);
369 amdgpu_ring_write(kiq_ring
, m
->cp_hqd_pq_wptr_poll_addr_lo
);
370 amdgpu_ring_write(kiq_ring
, m
->cp_hqd_pq_wptr_poll_addr_hi
);
371 amdgpu_ring_commit(kiq_ring
);
374 spin_unlock(&adev
->gfx
.kiq
.ring_lock
);
380 int kgd_gfx_v9_hqd_dump(struct kgd_dev
*kgd
,
381 uint32_t pipe_id
, uint32_t queue_id
,
382 uint32_t (**dump
)[2], uint32_t *n_regs
)
384 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
386 #define HQD_N_REGS 56
387 #define DUMP_REG(addr) do { \
388 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
390 (*dump)[i][0] = (addr) << 2; \
391 (*dump)[i++][1] = RREG32(addr); \
394 *dump
= kmalloc_array(HQD_N_REGS
* 2, sizeof(uint32_t), GFP_KERNEL
);
398 acquire_queue(kgd
, pipe_id
, queue_id
);
400 for (reg
= SOC15_REG_OFFSET(GC
, 0, mmCP_MQD_BASE_ADDR
);
401 reg
<= SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_WPTR_HI
); reg
++)
406 WARN_ON_ONCE(i
!= HQD_N_REGS
);
412 static int kgd_hqd_sdma_load(struct kgd_dev
*kgd
, void *mqd
,
413 uint32_t __user
*wptr
, struct mm_struct
*mm
)
415 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
416 struct v9_sdma_mqd
*m
;
417 uint32_t sdma_rlc_reg_offset
;
418 unsigned long end_jiffies
;
421 uint64_t __user
*wptr64
= (uint64_t __user
*)wptr
;
423 m
= get_sdma_mqd(mqd
);
424 sdma_rlc_reg_offset
= get_sdma_rlc_reg_offset(adev
, m
->sdma_engine_id
,
427 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_CNTL
,
428 m
->sdmax_rlcx_rb_cntl
& (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK
));
430 end_jiffies
= msecs_to_jiffies(2000) + jiffies
;
432 data
= RREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_CONTEXT_STATUS
);
433 if (data
& SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK
)
435 if (time_after(jiffies
, end_jiffies
)) {
436 pr_err("SDMA RLC not idle in %s\n", __func__
);
439 usleep_range(500, 1000);
442 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_DOORBELL_OFFSET
,
443 m
->sdmax_rlcx_doorbell_offset
);
445 data
= REG_SET_FIELD(m
->sdmax_rlcx_doorbell
, SDMA0_RLC0_DOORBELL
,
447 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_DOORBELL
, data
);
448 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_RPTR
,
449 m
->sdmax_rlcx_rb_rptr
);
450 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_RPTR_HI
,
451 m
->sdmax_rlcx_rb_rptr_hi
);
453 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_MINOR_PTR_UPDATE
, 1);
454 if (read_user_wptr(mm
, wptr64
, data64
)) {
455 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_WPTR
,
456 lower_32_bits(data64
));
457 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_WPTR_HI
,
458 upper_32_bits(data64
));
460 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_WPTR
,
461 m
->sdmax_rlcx_rb_rptr
);
462 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_WPTR_HI
,
463 m
->sdmax_rlcx_rb_rptr_hi
);
465 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_MINOR_PTR_UPDATE
, 0);
467 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_BASE
, m
->sdmax_rlcx_rb_base
);
468 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_BASE_HI
,
469 m
->sdmax_rlcx_rb_base_hi
);
470 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_RPTR_ADDR_LO
,
471 m
->sdmax_rlcx_rb_rptr_addr_lo
);
472 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_RPTR_ADDR_HI
,
473 m
->sdmax_rlcx_rb_rptr_addr_hi
);
475 data
= REG_SET_FIELD(m
->sdmax_rlcx_rb_cntl
, SDMA0_RLC0_RB_CNTL
,
477 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_CNTL
, data
);
482 static int kgd_hqd_sdma_dump(struct kgd_dev
*kgd
,
483 uint32_t engine_id
, uint32_t queue_id
,
484 uint32_t (**dump
)[2], uint32_t *n_regs
)
486 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
487 uint32_t sdma_rlc_reg_offset
= get_sdma_rlc_reg_offset(adev
,
488 engine_id
, queue_id
);
491 #define HQD_N_REGS (19+6+7+10)
493 *dump
= kmalloc_array(HQD_N_REGS
* 2, sizeof(uint32_t), GFP_KERNEL
);
497 for (reg
= mmSDMA0_RLC0_RB_CNTL
; reg
<= mmSDMA0_RLC0_DOORBELL
; reg
++)
498 DUMP_REG(sdma_rlc_reg_offset
+ reg
);
499 for (reg
= mmSDMA0_RLC0_STATUS
; reg
<= mmSDMA0_RLC0_CSA_ADDR_HI
; reg
++)
500 DUMP_REG(sdma_rlc_reg_offset
+ reg
);
501 for (reg
= mmSDMA0_RLC0_IB_SUB_REMAIN
;
502 reg
<= mmSDMA0_RLC0_MINOR_PTR_UPDATE
; reg
++)
503 DUMP_REG(sdma_rlc_reg_offset
+ reg
);
504 for (reg
= mmSDMA0_RLC0_MIDCMD_DATA0
;
505 reg
<= mmSDMA0_RLC0_MIDCMD_CNTL
; reg
++)
506 DUMP_REG(sdma_rlc_reg_offset
+ reg
);
508 WARN_ON_ONCE(i
!= HQD_N_REGS
);
514 bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev
*kgd
, uint64_t queue_address
,
515 uint32_t pipe_id
, uint32_t queue_id
)
517 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
522 acquire_queue(kgd
, pipe_id
, queue_id
);
523 act
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
));
525 low
= lower_32_bits(queue_address
>> 8);
526 high
= upper_32_bits(queue_address
>> 8);
528 if (low
== RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_BASE
)) &&
529 high
== RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_PQ_BASE_HI
)))
536 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev
*kgd
, void *mqd
)
538 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
539 struct v9_sdma_mqd
*m
;
540 uint32_t sdma_rlc_reg_offset
;
541 uint32_t sdma_rlc_rb_cntl
;
543 m
= get_sdma_mqd(mqd
);
544 sdma_rlc_reg_offset
= get_sdma_rlc_reg_offset(adev
, m
->sdma_engine_id
,
547 sdma_rlc_rb_cntl
= RREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_CNTL
);
549 if (sdma_rlc_rb_cntl
& SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK
)
555 int kgd_gfx_v9_hqd_destroy(struct kgd_dev
*kgd
, void *mqd
,
556 enum kfd_preempt_type reset_type
,
557 unsigned int utimeout
, uint32_t pipe_id
,
560 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
561 enum hqd_dequeue_request_type type
;
562 unsigned long end_jiffies
;
564 struct v9_mqd
*m
= get_mqd(mqd
);
566 if (adev
->in_gpu_reset
)
569 acquire_queue(kgd
, pipe_id
, queue_id
);
571 if (m
->cp_hqd_vmid
== 0)
572 WREG32_FIELD15_RLC(GC
, 0, RLC_CP_SCHEDULERS
, scheduler1
, 0);
574 switch (reset_type
) {
575 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN
:
578 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET
:
586 WREG32_RLC(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_DEQUEUE_REQUEST
), type
);
588 end_jiffies
= (utimeout
* HZ
/ 1000) + jiffies
;
590 temp
= RREG32(SOC15_REG_OFFSET(GC
, 0, mmCP_HQD_ACTIVE
));
591 if (!(temp
& CP_HQD_ACTIVE__ACTIVE_MASK
))
593 if (time_after(jiffies
, end_jiffies
)) {
594 pr_err("cp queue preemption time out.\n");
598 usleep_range(500, 1000);
605 static int kgd_hqd_sdma_destroy(struct kgd_dev
*kgd
, void *mqd
,
606 unsigned int utimeout
)
608 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
609 struct v9_sdma_mqd
*m
;
610 uint32_t sdma_rlc_reg_offset
;
612 unsigned long end_jiffies
= (utimeout
* HZ
/ 1000) + jiffies
;
614 m
= get_sdma_mqd(mqd
);
615 sdma_rlc_reg_offset
= get_sdma_rlc_reg_offset(adev
, m
->sdma_engine_id
,
618 temp
= RREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_CNTL
);
619 temp
= temp
& ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK
;
620 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_CNTL
, temp
);
623 temp
= RREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_CONTEXT_STATUS
);
624 if (temp
& SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK
)
626 if (time_after(jiffies
, end_jiffies
)) {
627 pr_err("SDMA RLC not idle in %s\n", __func__
);
630 usleep_range(500, 1000);
633 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_DOORBELL
, 0);
634 WREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_CNTL
,
635 RREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_CNTL
) |
636 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK
);
638 m
->sdmax_rlcx_rb_rptr
= RREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_RPTR
);
639 m
->sdmax_rlcx_rb_rptr_hi
=
640 RREG32(sdma_rlc_reg_offset
+ mmSDMA0_RLC0_RB_RPTR_HI
);
645 bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev
*kgd
,
646 uint8_t vmid
, uint16_t *p_pasid
)
649 struct amdgpu_device
*adev
= (struct amdgpu_device
*) kgd
;
651 value
= RREG32(SOC15_REG_OFFSET(ATHUB
, 0, mmATC_VMID0_PASID_MAPPING
)
653 *p_pasid
= value
& ATC_VMID0_PASID_MAPPING__PASID_MASK
;
655 return !!(value
& ATC_VMID0_PASID_MAPPING__VALID_MASK
);
658 int kgd_gfx_v9_address_watch_disable(struct kgd_dev
*kgd
)
663 int kgd_gfx_v9_address_watch_execute(struct kgd_dev
*kgd
,
664 unsigned int watch_point_id
,
672 int kgd_gfx_v9_wave_control_execute(struct kgd_dev
*kgd
,
673 uint32_t gfx_index_val
,
676 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
679 mutex_lock(&adev
->grbm_idx_mutex
);
681 WREG32_SOC15_RLC_SHADOW(GC
, 0, mmGRBM_GFX_INDEX
, gfx_index_val
);
682 WREG32(SOC15_REG_OFFSET(GC
, 0, mmSQ_CMD
), sq_cmd
);
684 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
,
685 INSTANCE_BROADCAST_WRITES
, 1);
686 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
,
687 SH_BROADCAST_WRITES
, 1);
688 data
= REG_SET_FIELD(data
, GRBM_GFX_INDEX
,
689 SE_BROADCAST_WRITES
, 1);
691 WREG32_SOC15_RLC_SHADOW(GC
, 0, mmGRBM_GFX_INDEX
, data
);
692 mutex_unlock(&adev
->grbm_idx_mutex
);
697 uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev
*kgd
,
698 unsigned int watch_point_id
,
699 unsigned int reg_offset
)
704 static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev
*kgd
,
705 uint32_t vmid
, uint64_t page_table_base
)
707 struct amdgpu_device
*adev
= get_amdgpu_device(kgd
);
709 if (!amdgpu_amdkfd_is_kfd_vmid(adev
, vmid
)) {
710 pr_err("trying to set page table base for wrong VMID %u\n",
715 mmhub_v1_0_setup_vm_pt_regs(adev
, vmid
, page_table_base
);
717 gfxhub_v1_0_setup_vm_pt_regs(adev
, vmid
, page_table_base
);
720 const struct kfd2kgd_calls gfx_v9_kfd2kgd
= {
721 .program_sh_mem_settings
= kgd_gfx_v9_program_sh_mem_settings
,
722 .set_pasid_vmid_mapping
= kgd_gfx_v9_set_pasid_vmid_mapping
,
723 .init_interrupts
= kgd_gfx_v9_init_interrupts
,
724 .hqd_load
= kgd_gfx_v9_hqd_load
,
725 .hiq_mqd_load
= kgd_gfx_v9_hiq_mqd_load
,
726 .hqd_sdma_load
= kgd_hqd_sdma_load
,
727 .hqd_dump
= kgd_gfx_v9_hqd_dump
,
728 .hqd_sdma_dump
= kgd_hqd_sdma_dump
,
729 .hqd_is_occupied
= kgd_gfx_v9_hqd_is_occupied
,
730 .hqd_sdma_is_occupied
= kgd_hqd_sdma_is_occupied
,
731 .hqd_destroy
= kgd_gfx_v9_hqd_destroy
,
732 .hqd_sdma_destroy
= kgd_hqd_sdma_destroy
,
733 .address_watch_disable
= kgd_gfx_v9_address_watch_disable
,
734 .address_watch_execute
= kgd_gfx_v9_address_watch_execute
,
735 .wave_control_execute
= kgd_gfx_v9_wave_control_execute
,
736 .address_watch_get_offset
= kgd_gfx_v9_address_watch_get_offset
,
737 .get_atc_vmid_pasid_mapping_info
=
738 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info
,
739 .get_tile_config
= kgd_gfx_v9_get_tile_config
,
740 .set_vm_context_page_table_base
= kgd_gfx_v9_set_vm_context_page_table_base
,
741 .get_hive_id
= amdgpu_amdkfd_get_hive_id
,