2 * Copyright 2016-2018 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/printk.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
28 #include "kfd_mqd_manager.h"
29 #include "v9_structs.h"
30 #include "gc/gc_9_0_offset.h"
31 #include "gc/gc_9_0_sh_mask.h"
32 #include "sdma0/sdma0_4_0_sh_mask.h"
33 #include "amdgpu_amdkfd.h"
35 static inline struct v9_mqd
*get_mqd(void *mqd
)
37 return (struct v9_mqd
*)mqd
;
40 static inline struct v9_sdma_mqd
*get_sdma_mqd(void *mqd
)
42 return (struct v9_sdma_mqd
*)mqd
;
45 static void update_cu_mask(struct mqd_manager
*mm
, void *mqd
,
46 struct queue_properties
*q
)
49 uint32_t se_mask
[4] = {0}; /* 4 is the max # of SEs */
51 if (q
->cu_mask_count
== 0)
54 mqd_symmetrically_map_cu_mask(mm
,
55 q
->cu_mask
, q
->cu_mask_count
, se_mask
);
58 m
->compute_static_thread_mgmt_se0
= se_mask
[0];
59 m
->compute_static_thread_mgmt_se1
= se_mask
[1];
60 m
->compute_static_thread_mgmt_se2
= se_mask
[2];
61 m
->compute_static_thread_mgmt_se3
= se_mask
[3];
63 pr_debug("update cu mask to %#x %#x %#x %#x\n",
64 m
->compute_static_thread_mgmt_se0
,
65 m
->compute_static_thread_mgmt_se1
,
66 m
->compute_static_thread_mgmt_se2
,
67 m
->compute_static_thread_mgmt_se3
);
70 static int init_mqd(struct mqd_manager
*mm
, void **mqd
,
71 struct kfd_mem_obj
**mqd_mem_obj
, uint64_t *gart_addr
,
72 struct queue_properties
*q
)
77 struct kfd_dev
*kfd
= mm
->dev
;
79 /* From V9, for CWSR, the control stack is located on the next page
80 * boundary after the mqd, we will use the gtt allocation function
81 * instead of sub-allocation function.
83 if (kfd
->cwsr_enabled
&& (q
->type
== KFD_QUEUE_TYPE_COMPUTE
)) {
84 *mqd_mem_obj
= kzalloc(sizeof(struct kfd_mem_obj
), GFP_KERNEL
);
87 retval
= amdgpu_amdkfd_alloc_gtt_mem(kfd
->kgd
,
88 ALIGN(q
->ctl_stack_size
, PAGE_SIZE
) +
89 ALIGN(sizeof(struct v9_mqd
), PAGE_SIZE
),
90 &((*mqd_mem_obj
)->gtt_mem
),
91 &((*mqd_mem_obj
)->gpu_addr
),
92 (void *)&((*mqd_mem_obj
)->cpu_ptr
), true);
94 retval
= kfd_gtt_sa_allocate(mm
->dev
, sizeof(struct v9_mqd
),
99 m
= (struct v9_mqd
*) (*mqd_mem_obj
)->cpu_ptr
;
100 addr
= (*mqd_mem_obj
)->gpu_addr
;
102 memset(m
, 0, sizeof(struct v9_mqd
));
104 m
->header
= 0xC0310800;
105 m
->compute_pipelinestat_enable
= 1;
106 m
->compute_static_thread_mgmt_se0
= 0xFFFFFFFF;
107 m
->compute_static_thread_mgmt_se1
= 0xFFFFFFFF;
108 m
->compute_static_thread_mgmt_se2
= 0xFFFFFFFF;
109 m
->compute_static_thread_mgmt_se3
= 0xFFFFFFFF;
111 m
->cp_hqd_persistent_state
= CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK
|
112 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT
;
114 m
->cp_mqd_control
= 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT
;
116 m
->cp_mqd_base_addr_lo
= lower_32_bits(addr
);
117 m
->cp_mqd_base_addr_hi
= upper_32_bits(addr
);
119 m
->cp_hqd_quantum
= 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT
|
120 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT
|
121 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT
;
123 m
->cp_hqd_pipe_priority
= 1;
124 m
->cp_hqd_queue_priority
= 15;
126 if (q
->format
== KFD_QUEUE_FORMAT_AQL
) {
127 m
->cp_hqd_aql_control
=
128 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT
;
132 m
->compute_pgm_rsrc2
|=
133 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT
);
136 if (mm
->dev
->cwsr_enabled
&& q
->ctx_save_restore_area_address
) {
137 m
->cp_hqd_persistent_state
|=
138 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT
);
139 m
->cp_hqd_ctx_save_base_addr_lo
=
140 lower_32_bits(q
->ctx_save_restore_area_address
);
141 m
->cp_hqd_ctx_save_base_addr_hi
=
142 upper_32_bits(q
->ctx_save_restore_area_address
);
143 m
->cp_hqd_ctx_save_size
= q
->ctx_save_restore_area_size
;
144 m
->cp_hqd_cntl_stack_size
= q
->ctl_stack_size
;
145 m
->cp_hqd_cntl_stack_offset
= q
->ctl_stack_size
;
146 m
->cp_hqd_wg_state_offset
= q
->ctl_stack_size
;
152 retval
= mm
->update_mqd(mm
, m
, q
);
157 static int load_mqd(struct mqd_manager
*mm
, void *mqd
,
158 uint32_t pipe_id
, uint32_t queue_id
,
159 struct queue_properties
*p
, struct mm_struct
*mms
)
161 /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
162 uint32_t wptr_shift
= (p
->format
== KFD_QUEUE_FORMAT_AQL
? 4 : 0);
164 return mm
->dev
->kfd2kgd
->hqd_load(mm
->dev
->kgd
, mqd
, pipe_id
, queue_id
,
165 (uint32_t __user
*)p
->write_ptr
,
169 static int update_mqd(struct mqd_manager
*mm
, void *mqd
,
170 struct queue_properties
*q
)
176 m
->cp_hqd_pq_control
= 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT
;
177 m
->cp_hqd_pq_control
|= order_base_2(q
->queue_size
/ 4) - 1;
178 pr_debug("cp_hqd_pq_control 0x%x\n", m
->cp_hqd_pq_control
);
180 m
->cp_hqd_pq_base_lo
= lower_32_bits((uint64_t)q
->queue_address
>> 8);
181 m
->cp_hqd_pq_base_hi
= upper_32_bits((uint64_t)q
->queue_address
>> 8);
183 m
->cp_hqd_pq_rptr_report_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
184 m
->cp_hqd_pq_rptr_report_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
185 m
->cp_hqd_pq_wptr_poll_addr_lo
= lower_32_bits((uint64_t)q
->write_ptr
);
186 m
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits((uint64_t)q
->write_ptr
);
188 m
->cp_hqd_pq_doorbell_control
=
190 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT
;
191 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
192 m
->cp_hqd_pq_doorbell_control
);
194 m
->cp_hqd_ib_control
=
195 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT
|
196 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT
;
199 * HW does not clamp this field correctly. Maximum EOP queue size
200 * is constrained by per-SE EOP done signal count, which is 8-bit.
201 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
202 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
203 * is safe, giving a maximum field value of 0xA.
205 m
->cp_hqd_eop_control
= min(0xA,
206 order_base_2(q
->eop_ring_buffer_size
/ 4) - 1);
207 m
->cp_hqd_eop_base_addr_lo
=
208 lower_32_bits(q
->eop_ring_buffer_address
>> 8);
209 m
->cp_hqd_eop_base_addr_hi
=
210 upper_32_bits(q
->eop_ring_buffer_address
>> 8);
212 m
->cp_hqd_iq_timer
= 0;
214 m
->cp_hqd_vmid
= q
->vmid
;
216 if (q
->format
== KFD_QUEUE_FORMAT_AQL
) {
217 m
->cp_hqd_pq_control
|= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK
|
218 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT
|
219 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT
|
220 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT
;
221 m
->cp_hqd_pq_doorbell_control
|= 1 <<
222 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT
;
224 if (mm
->dev
->cwsr_enabled
&& q
->ctx_save_restore_area_address
)
225 m
->cp_hqd_ctx_save_control
= 0;
227 update_cu_mask(mm
, mqd
, q
);
229 q
->is_active
= (q
->queue_size
> 0 &&
230 q
->queue_address
!= 0 &&
231 q
->queue_percent
> 0 &&
238 static int destroy_mqd(struct mqd_manager
*mm
, void *mqd
,
239 enum kfd_preempt_type type
,
240 unsigned int timeout
, uint32_t pipe_id
,
243 return mm
->dev
->kfd2kgd
->hqd_destroy
244 (mm
->dev
->kgd
, mqd
, type
, timeout
,
248 static void uninit_mqd(struct mqd_manager
*mm
, void *mqd
,
249 struct kfd_mem_obj
*mqd_mem_obj
)
251 struct kfd_dev
*kfd
= mm
->dev
;
253 if (mqd_mem_obj
->gtt_mem
) {
254 amdgpu_amdkfd_free_gtt_mem(kfd
->kgd
, mqd_mem_obj
->gtt_mem
);
257 kfd_gtt_sa_free(mm
->dev
, mqd_mem_obj
);
261 static bool is_occupied(struct mqd_manager
*mm
, void *mqd
,
262 uint64_t queue_address
, uint32_t pipe_id
,
265 return mm
->dev
->kfd2kgd
->hqd_is_occupied(
266 mm
->dev
->kgd
, queue_address
,
270 static int get_wave_state(struct mqd_manager
*mm
, void *mqd
,
271 void __user
*ctl_stack
,
272 u32
*ctl_stack_used_size
,
273 u32
*save_area_used_size
)
277 /* Control stack is located one page after MQD. */
278 void *mqd_ctl_stack
= (void *)((uintptr_t)mqd
+ PAGE_SIZE
);
282 *ctl_stack_used_size
= m
->cp_hqd_cntl_stack_size
-
283 m
->cp_hqd_cntl_stack_offset
;
284 *save_area_used_size
= m
->cp_hqd_wg_state_offset
;
286 if (copy_to_user(ctl_stack
, mqd_ctl_stack
, m
->cp_hqd_cntl_stack_size
))
292 static int init_mqd_hiq(struct mqd_manager
*mm
, void **mqd
,
293 struct kfd_mem_obj
**mqd_mem_obj
, uint64_t *gart_addr
,
294 struct queue_properties
*q
)
297 int retval
= init_mqd(mm
, mqd
, mqd_mem_obj
, gart_addr
, q
);
304 m
->cp_hqd_pq_control
|= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT
|
305 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT
;
310 static int update_mqd_hiq(struct mqd_manager
*mm
, void *mqd
,
311 struct queue_properties
*q
)
314 int retval
= update_mqd(mm
, mqd
, q
);
319 /* TODO: what's the point? update_mqd already does this. */
321 m
->cp_hqd_vmid
= q
->vmid
;
325 static int init_mqd_sdma(struct mqd_manager
*mm
, void **mqd
,
326 struct kfd_mem_obj
**mqd_mem_obj
, uint64_t *gart_addr
,
327 struct queue_properties
*q
)
330 struct v9_sdma_mqd
*m
;
333 retval
= kfd_gtt_sa_allocate(mm
->dev
,
334 sizeof(struct v9_sdma_mqd
),
340 m
= (struct v9_sdma_mqd
*) (*mqd_mem_obj
)->cpu_ptr
;
342 memset(m
, 0, sizeof(struct v9_sdma_mqd
));
346 *gart_addr
= (*mqd_mem_obj
)->gpu_addr
;
348 retval
= mm
->update_mqd(mm
, m
, q
);
353 static void uninit_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
354 struct kfd_mem_obj
*mqd_mem_obj
)
356 kfd_gtt_sa_free(mm
->dev
, mqd_mem_obj
);
359 static int load_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
360 uint32_t pipe_id
, uint32_t queue_id
,
361 struct queue_properties
*p
, struct mm_struct
*mms
)
363 return mm
->dev
->kfd2kgd
->hqd_sdma_load(mm
->dev
->kgd
, mqd
,
364 (uint32_t __user
*)p
->write_ptr
,
368 #define SDMA_RLC_DUMMY_DEFAULT 0xf
370 static int update_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
371 struct queue_properties
*q
)
373 struct v9_sdma_mqd
*m
;
375 m
= get_sdma_mqd(mqd
);
376 m
->sdmax_rlcx_rb_cntl
= order_base_2(q
->queue_size
/ 4)
377 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT
|
378 q
->vmid
<< SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT
|
379 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT
|
380 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT
;
382 m
->sdmax_rlcx_rb_base
= lower_32_bits(q
->queue_address
>> 8);
383 m
->sdmax_rlcx_rb_base_hi
= upper_32_bits(q
->queue_address
>> 8);
384 m
->sdmax_rlcx_rb_rptr_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
385 m
->sdmax_rlcx_rb_rptr_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
386 m
->sdmax_rlcx_doorbell_offset
=
387 q
->doorbell_off
<< SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT
;
389 m
->sdma_engine_id
= q
->sdma_engine_id
;
390 m
->sdma_queue_id
= q
->sdma_queue_id
;
391 m
->sdmax_rlcx_dummy_reg
= SDMA_RLC_DUMMY_DEFAULT
;
393 q
->is_active
= (q
->queue_size
> 0 &&
394 q
->queue_address
!= 0 &&
395 q
->queue_percent
> 0 &&
402 * * preempt type here is ignored because there is only one way
403 * * to preempt sdma queue
405 static int destroy_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
406 enum kfd_preempt_type type
,
407 unsigned int timeout
, uint32_t pipe_id
,
410 return mm
->dev
->kfd2kgd
->hqd_sdma_destroy(mm
->dev
->kgd
, mqd
, timeout
);
413 static bool is_occupied_sdma(struct mqd_manager
*mm
, void *mqd
,
414 uint64_t queue_address
, uint32_t pipe_id
,
417 return mm
->dev
->kfd2kgd
->hqd_sdma_is_occupied(mm
->dev
->kgd
, mqd
);
420 #if defined(CONFIG_DEBUG_FS)
422 static int debugfs_show_mqd(struct seq_file
*m
, void *data
)
424 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
425 data
, sizeof(struct v9_mqd
), false);
429 static int debugfs_show_mqd_sdma(struct seq_file
*m
, void *data
)
431 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
432 data
, sizeof(struct v9_sdma_mqd
), false);
438 struct mqd_manager
*mqd_manager_init_v9(enum KFD_MQD_TYPE type
,
441 struct mqd_manager
*mqd
;
443 if (WARN_ON(type
>= KFD_MQD_TYPE_MAX
))
446 mqd
= kzalloc(sizeof(*mqd
), GFP_KERNEL
);
453 case KFD_MQD_TYPE_CP
:
454 case KFD_MQD_TYPE_COMPUTE
:
455 mqd
->init_mqd
= init_mqd
;
456 mqd
->uninit_mqd
= uninit_mqd
;
457 mqd
->load_mqd
= load_mqd
;
458 mqd
->update_mqd
= update_mqd
;
459 mqd
->destroy_mqd
= destroy_mqd
;
460 mqd
->is_occupied
= is_occupied
;
461 mqd
->get_wave_state
= get_wave_state
;
462 #if defined(CONFIG_DEBUG_FS)
463 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
466 case KFD_MQD_TYPE_HIQ
:
467 mqd
->init_mqd
= init_mqd_hiq
;
468 mqd
->uninit_mqd
= uninit_mqd
;
469 mqd
->load_mqd
= load_mqd
;
470 mqd
->update_mqd
= update_mqd_hiq
;
471 mqd
->destroy_mqd
= destroy_mqd
;
472 mqd
->is_occupied
= is_occupied
;
473 #if defined(CONFIG_DEBUG_FS)
474 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
477 case KFD_MQD_TYPE_SDMA
:
478 mqd
->init_mqd
= init_mqd_sdma
;
479 mqd
->uninit_mqd
= uninit_mqd_sdma
;
480 mqd
->load_mqd
= load_mqd_sdma
;
481 mqd
->update_mqd
= update_mqd_sdma
;
482 mqd
->destroy_mqd
= destroy_mqd_sdma
;
483 mqd
->is_occupied
= is_occupied_sdma
;
484 #if defined(CONFIG_DEBUG_FS)
485 mqd
->debugfs_show_mqd
= debugfs_show_mqd_sdma
;