2 * Copyright 2016-2018 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/printk.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
28 #include "kfd_mqd_manager.h"
29 #include "v9_structs.h"
30 #include "gc/gc_9_0_offset.h"
31 #include "gc/gc_9_0_sh_mask.h"
32 #include "sdma0/sdma0_4_0_sh_mask.h"
33 #include "amdgpu_amdkfd.h"
35 static inline struct v9_mqd
*get_mqd(void *mqd
)
37 return (struct v9_mqd
*)mqd
;
40 static inline struct v9_sdma_mqd
*get_sdma_mqd(void *mqd
)
42 return (struct v9_sdma_mqd
*)mqd
;
45 static void update_cu_mask(struct mqd_manager
*mm
, void *mqd
,
46 struct queue_properties
*q
)
49 uint32_t se_mask
[KFD_MAX_NUM_SE
] = {0};
51 if (q
->cu_mask_count
== 0)
54 mqd_symmetrically_map_cu_mask(mm
,
55 q
->cu_mask
, q
->cu_mask_count
, se_mask
);
58 m
->compute_static_thread_mgmt_se0
= se_mask
[0];
59 m
->compute_static_thread_mgmt_se1
= se_mask
[1];
60 m
->compute_static_thread_mgmt_se2
= se_mask
[2];
61 m
->compute_static_thread_mgmt_se3
= se_mask
[3];
62 m
->compute_static_thread_mgmt_se4
= se_mask
[4];
63 m
->compute_static_thread_mgmt_se5
= se_mask
[5];
64 m
->compute_static_thread_mgmt_se6
= se_mask
[6];
65 m
->compute_static_thread_mgmt_se7
= se_mask
[7];
67 pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
68 m
->compute_static_thread_mgmt_se0
,
69 m
->compute_static_thread_mgmt_se1
,
70 m
->compute_static_thread_mgmt_se2
,
71 m
->compute_static_thread_mgmt_se3
,
72 m
->compute_static_thread_mgmt_se4
,
73 m
->compute_static_thread_mgmt_se5
,
74 m
->compute_static_thread_mgmt_se6
,
75 m
->compute_static_thread_mgmt_se7
);
78 static void set_priority(struct v9_mqd
*m
, struct queue_properties
*q
)
80 m
->cp_hqd_pipe_priority
= pipe_priority_map
[q
->priority
];
81 m
->cp_hqd_queue_priority
= q
->priority
;
84 static struct kfd_mem_obj
*allocate_mqd(struct kfd_dev
*kfd
,
85 struct queue_properties
*q
)
88 struct kfd_mem_obj
*mqd_mem_obj
= NULL
;
90 /* From V9, for CWSR, the control stack is located on the next page
91 * boundary after the mqd, we will use the gtt allocation function
92 * instead of sub-allocation function.
94 if (kfd
->cwsr_enabled
&& (q
->type
== KFD_QUEUE_TYPE_COMPUTE
)) {
95 mqd_mem_obj
= kzalloc(sizeof(struct kfd_mem_obj
), GFP_KERNEL
);
98 retval
= amdgpu_amdkfd_alloc_gtt_mem(kfd
->kgd
,
99 ALIGN(q
->ctl_stack_size
, PAGE_SIZE
) +
100 ALIGN(sizeof(struct v9_mqd
), PAGE_SIZE
),
101 &(mqd_mem_obj
->gtt_mem
),
102 &(mqd_mem_obj
->gpu_addr
),
103 (void *)&(mqd_mem_obj
->cpu_ptr
), true);
105 retval
= kfd_gtt_sa_allocate(kfd
, sizeof(struct v9_mqd
),
118 static void init_mqd(struct mqd_manager
*mm
, void **mqd
,
119 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
120 struct queue_properties
*q
)
125 m
= (struct v9_mqd
*) mqd_mem_obj
->cpu_ptr
;
126 addr
= mqd_mem_obj
->gpu_addr
;
128 memset(m
, 0, sizeof(struct v9_mqd
));
130 m
->header
= 0xC0310800;
131 m
->compute_pipelinestat_enable
= 1;
132 m
->compute_static_thread_mgmt_se0
= 0xFFFFFFFF;
133 m
->compute_static_thread_mgmt_se1
= 0xFFFFFFFF;
134 m
->compute_static_thread_mgmt_se2
= 0xFFFFFFFF;
135 m
->compute_static_thread_mgmt_se3
= 0xFFFFFFFF;
136 m
->compute_static_thread_mgmt_se4
= 0xFFFFFFFF;
137 m
->compute_static_thread_mgmt_se5
= 0xFFFFFFFF;
138 m
->compute_static_thread_mgmt_se6
= 0xFFFFFFFF;
139 m
->compute_static_thread_mgmt_se7
= 0xFFFFFFFF;
141 m
->cp_hqd_persistent_state
= CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK
|
142 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT
;
144 m
->cp_mqd_control
= 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT
;
146 m
->cp_mqd_base_addr_lo
= lower_32_bits(addr
);
147 m
->cp_mqd_base_addr_hi
= upper_32_bits(addr
);
149 m
->cp_hqd_quantum
= 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT
|
150 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT
|
151 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT
;
153 if (q
->format
== KFD_QUEUE_FORMAT_AQL
) {
154 m
->cp_hqd_aql_control
=
155 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT
;
159 m
->compute_pgm_rsrc2
|=
160 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT
);
163 if (mm
->dev
->cwsr_enabled
&& q
->ctx_save_restore_area_address
) {
164 m
->cp_hqd_persistent_state
|=
165 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT
);
166 m
->cp_hqd_ctx_save_base_addr_lo
=
167 lower_32_bits(q
->ctx_save_restore_area_address
);
168 m
->cp_hqd_ctx_save_base_addr_hi
=
169 upper_32_bits(q
->ctx_save_restore_area_address
);
170 m
->cp_hqd_ctx_save_size
= q
->ctx_save_restore_area_size
;
171 m
->cp_hqd_cntl_stack_size
= q
->ctl_stack_size
;
172 m
->cp_hqd_cntl_stack_offset
= q
->ctl_stack_size
;
173 m
->cp_hqd_wg_state_offset
= q
->ctl_stack_size
;
179 mm
->update_mqd(mm
, m
, q
);
182 static int load_mqd(struct mqd_manager
*mm
, void *mqd
,
183 uint32_t pipe_id
, uint32_t queue_id
,
184 struct queue_properties
*p
, struct mm_struct
*mms
)
186 /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
187 uint32_t wptr_shift
= (p
->format
== KFD_QUEUE_FORMAT_AQL
? 4 : 0);
189 return mm
->dev
->kfd2kgd
->hqd_load(mm
->dev
->kgd
, mqd
, pipe_id
, queue_id
,
190 (uint32_t __user
*)p
->write_ptr
,
194 static int hiq_load_mqd_kiq(struct mqd_manager
*mm
, void *mqd
,
195 uint32_t pipe_id
, uint32_t queue_id
,
196 struct queue_properties
*p
, struct mm_struct
*mms
)
198 return mm
->dev
->kfd2kgd
->hiq_mqd_load(mm
->dev
->kgd
, mqd
, pipe_id
,
199 queue_id
, p
->doorbell_off
);
202 static void update_mqd(struct mqd_manager
*mm
, void *mqd
,
203 struct queue_properties
*q
)
209 m
->cp_hqd_pq_control
= 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT
;
210 m
->cp_hqd_pq_control
|= order_base_2(q
->queue_size
/ 4) - 1;
211 pr_debug("cp_hqd_pq_control 0x%x\n", m
->cp_hqd_pq_control
);
213 m
->cp_hqd_pq_base_lo
= lower_32_bits((uint64_t)q
->queue_address
>> 8);
214 m
->cp_hqd_pq_base_hi
= upper_32_bits((uint64_t)q
->queue_address
>> 8);
216 m
->cp_hqd_pq_rptr_report_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
217 m
->cp_hqd_pq_rptr_report_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
218 m
->cp_hqd_pq_wptr_poll_addr_lo
= lower_32_bits((uint64_t)q
->write_ptr
);
219 m
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits((uint64_t)q
->write_ptr
);
221 m
->cp_hqd_pq_doorbell_control
=
223 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT
;
224 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
225 m
->cp_hqd_pq_doorbell_control
);
227 m
->cp_hqd_ib_control
=
228 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT
|
229 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT
;
232 * HW does not clamp this field correctly. Maximum EOP queue size
233 * is constrained by per-SE EOP done signal count, which is 8-bit.
234 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
235 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
236 * is safe, giving a maximum field value of 0xA.
238 m
->cp_hqd_eop_control
= min(0xA,
239 order_base_2(q
->eop_ring_buffer_size
/ 4) - 1);
240 m
->cp_hqd_eop_base_addr_lo
=
241 lower_32_bits(q
->eop_ring_buffer_address
>> 8);
242 m
->cp_hqd_eop_base_addr_hi
=
243 upper_32_bits(q
->eop_ring_buffer_address
>> 8);
245 m
->cp_hqd_iq_timer
= 0;
247 m
->cp_hqd_vmid
= q
->vmid
;
249 if (q
->format
== KFD_QUEUE_FORMAT_AQL
) {
250 m
->cp_hqd_pq_control
|= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK
|
251 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT
|
252 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT
|
253 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT
;
254 m
->cp_hqd_pq_doorbell_control
|= 1 <<
255 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT
;
257 if (mm
->dev
->cwsr_enabled
&& q
->ctx_save_restore_area_address
)
258 m
->cp_hqd_ctx_save_control
= 0;
260 update_cu_mask(mm
, mqd
, q
);
263 q
->is_active
= QUEUE_IS_ACTIVE(*q
);
267 static int destroy_mqd(struct mqd_manager
*mm
, void *mqd
,
268 enum kfd_preempt_type type
,
269 unsigned int timeout
, uint32_t pipe_id
,
272 return mm
->dev
->kfd2kgd
->hqd_destroy
273 (mm
->dev
->kgd
, mqd
, type
, timeout
,
277 static void free_mqd(struct mqd_manager
*mm
, void *mqd
,
278 struct kfd_mem_obj
*mqd_mem_obj
)
280 struct kfd_dev
*kfd
= mm
->dev
;
282 if (mqd_mem_obj
->gtt_mem
) {
283 amdgpu_amdkfd_free_gtt_mem(kfd
->kgd
, mqd_mem_obj
->gtt_mem
);
286 kfd_gtt_sa_free(mm
->dev
, mqd_mem_obj
);
290 static bool is_occupied(struct mqd_manager
*mm
, void *mqd
,
291 uint64_t queue_address
, uint32_t pipe_id
,
294 return mm
->dev
->kfd2kgd
->hqd_is_occupied(
295 mm
->dev
->kgd
, queue_address
,
299 static int get_wave_state(struct mqd_manager
*mm
, void *mqd
,
300 void __user
*ctl_stack
,
301 u32
*ctl_stack_used_size
,
302 u32
*save_area_used_size
)
306 /* Control stack is located one page after MQD. */
307 void *mqd_ctl_stack
= (void *)((uintptr_t)mqd
+ PAGE_SIZE
);
311 *ctl_stack_used_size
= m
->cp_hqd_cntl_stack_size
-
312 m
->cp_hqd_cntl_stack_offset
;
313 *save_area_used_size
= m
->cp_hqd_wg_state_offset
-
314 m
->cp_hqd_cntl_stack_size
;
316 if (copy_to_user(ctl_stack
, mqd_ctl_stack
, m
->cp_hqd_cntl_stack_size
))
322 static void init_mqd_hiq(struct mqd_manager
*mm
, void **mqd
,
323 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
324 struct queue_properties
*q
)
328 init_mqd(mm
, mqd
, mqd_mem_obj
, gart_addr
, q
);
332 m
->cp_hqd_pq_control
|= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT
|
333 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT
;
336 static void init_mqd_sdma(struct mqd_manager
*mm
, void **mqd
,
337 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
338 struct queue_properties
*q
)
340 struct v9_sdma_mqd
*m
;
342 m
= (struct v9_sdma_mqd
*) mqd_mem_obj
->cpu_ptr
;
344 memset(m
, 0, sizeof(struct v9_sdma_mqd
));
348 *gart_addr
= mqd_mem_obj
->gpu_addr
;
350 mm
->update_mqd(mm
, m
, q
);
353 static int load_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
354 uint32_t pipe_id
, uint32_t queue_id
,
355 struct queue_properties
*p
, struct mm_struct
*mms
)
357 return mm
->dev
->kfd2kgd
->hqd_sdma_load(mm
->dev
->kgd
, mqd
,
358 (uint32_t __user
*)p
->write_ptr
,
362 #define SDMA_RLC_DUMMY_DEFAULT 0xf
364 static void update_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
365 struct queue_properties
*q
)
367 struct v9_sdma_mqd
*m
;
369 m
= get_sdma_mqd(mqd
);
370 m
->sdmax_rlcx_rb_cntl
= order_base_2(q
->queue_size
/ 4)
371 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT
|
372 q
->vmid
<< SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT
|
373 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT
|
374 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT
;
376 m
->sdmax_rlcx_rb_base
= lower_32_bits(q
->queue_address
>> 8);
377 m
->sdmax_rlcx_rb_base_hi
= upper_32_bits(q
->queue_address
>> 8);
378 m
->sdmax_rlcx_rb_rptr_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
379 m
->sdmax_rlcx_rb_rptr_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
380 m
->sdmax_rlcx_doorbell_offset
=
381 q
->doorbell_off
<< SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT
;
383 m
->sdma_engine_id
= q
->sdma_engine_id
;
384 m
->sdma_queue_id
= q
->sdma_queue_id
;
385 m
->sdmax_rlcx_dummy_reg
= SDMA_RLC_DUMMY_DEFAULT
;
387 q
->is_active
= QUEUE_IS_ACTIVE(*q
);
391 * * preempt type here is ignored because there is only one way
392 * * to preempt sdma queue
394 static int destroy_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
395 enum kfd_preempt_type type
,
396 unsigned int timeout
, uint32_t pipe_id
,
399 return mm
->dev
->kfd2kgd
->hqd_sdma_destroy(mm
->dev
->kgd
, mqd
, timeout
);
402 static bool is_occupied_sdma(struct mqd_manager
*mm
, void *mqd
,
403 uint64_t queue_address
, uint32_t pipe_id
,
406 return mm
->dev
->kfd2kgd
->hqd_sdma_is_occupied(mm
->dev
->kgd
, mqd
);
409 #if defined(CONFIG_DEBUG_FS)
411 static int debugfs_show_mqd(struct seq_file
*m
, void *data
)
413 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
414 data
, sizeof(struct v9_mqd
), false);
418 static int debugfs_show_mqd_sdma(struct seq_file
*m
, void *data
)
420 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
421 data
, sizeof(struct v9_sdma_mqd
), false);
427 struct mqd_manager
*mqd_manager_init_v9(enum KFD_MQD_TYPE type
,
430 struct mqd_manager
*mqd
;
432 if (WARN_ON(type
>= KFD_MQD_TYPE_MAX
))
435 mqd
= kzalloc(sizeof(*mqd
), GFP_KERNEL
);
442 case KFD_MQD_TYPE_CP
:
443 mqd
->allocate_mqd
= allocate_mqd
;
444 mqd
->init_mqd
= init_mqd
;
445 mqd
->free_mqd
= free_mqd
;
446 mqd
->load_mqd
= load_mqd
;
447 mqd
->update_mqd
= update_mqd
;
448 mqd
->destroy_mqd
= destroy_mqd
;
449 mqd
->is_occupied
= is_occupied
;
450 mqd
->get_wave_state
= get_wave_state
;
451 mqd
->mqd_size
= sizeof(struct v9_mqd
);
452 #if defined(CONFIG_DEBUG_FS)
453 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
456 case KFD_MQD_TYPE_HIQ
:
457 mqd
->allocate_mqd
= allocate_hiq_mqd
;
458 mqd
->init_mqd
= init_mqd_hiq
;
459 mqd
->free_mqd
= free_mqd_hiq_sdma
;
460 mqd
->load_mqd
= hiq_load_mqd_kiq
;
461 mqd
->update_mqd
= update_mqd
;
462 mqd
->destroy_mqd
= destroy_mqd
;
463 mqd
->is_occupied
= is_occupied
;
464 mqd
->mqd_size
= sizeof(struct v9_mqd
);
465 #if defined(CONFIG_DEBUG_FS)
466 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
469 case KFD_MQD_TYPE_DIQ
:
470 mqd
->allocate_mqd
= allocate_mqd
;
471 mqd
->init_mqd
= init_mqd_hiq
;
472 mqd
->free_mqd
= free_mqd
;
473 mqd
->load_mqd
= load_mqd
;
474 mqd
->update_mqd
= update_mqd
;
475 mqd
->destroy_mqd
= destroy_mqd
;
476 mqd
->is_occupied
= is_occupied
;
477 mqd
->mqd_size
= sizeof(struct v9_mqd
);
478 #if defined(CONFIG_DEBUG_FS)
479 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
482 case KFD_MQD_TYPE_SDMA
:
483 mqd
->allocate_mqd
= allocate_sdma_mqd
;
484 mqd
->init_mqd
= init_mqd_sdma
;
485 mqd
->free_mqd
= free_mqd_hiq_sdma
;
486 mqd
->load_mqd
= load_mqd_sdma
;
487 mqd
->update_mqd
= update_mqd_sdma
;
488 mqd
->destroy_mqd
= destroy_mqd_sdma
;
489 mqd
->is_occupied
= is_occupied_sdma
;
490 mqd
->mqd_size
= sizeof(struct v9_sdma_mqd
);
491 #if defined(CONFIG_DEBUG_FS)
492 mqd
->debugfs_show_mqd
= debugfs_show_mqd_sdma
;