2 * Copyright 2018 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/printk.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
28 #include "kfd_mqd_manager.h"
29 #include "v10_structs.h"
30 #include "gc/gc_10_1_0_offset.h"
31 #include "gc/gc_10_1_0_sh_mask.h"
32 #include "amdgpu_amdkfd.h"
34 static inline struct v10_compute_mqd
*get_mqd(void *mqd
)
36 return (struct v10_compute_mqd
*)mqd
;
39 static inline struct v10_sdma_mqd
*get_sdma_mqd(void *mqd
)
41 return (struct v10_sdma_mqd
*)mqd
;
44 static void update_cu_mask(struct mqd_manager
*mm
, void *mqd
,
45 struct queue_properties
*q
)
47 struct v10_compute_mqd
*m
;
48 uint32_t se_mask
[4] = {0}; /* 4 is the max # of SEs */
50 if (q
->cu_mask_count
== 0)
53 mqd_symmetrically_map_cu_mask(mm
,
54 q
->cu_mask
, q
->cu_mask_count
, se_mask
);
57 m
->compute_static_thread_mgmt_se0
= se_mask
[0];
58 m
->compute_static_thread_mgmt_se1
= se_mask
[1];
59 m
->compute_static_thread_mgmt_se2
= se_mask
[2];
60 m
->compute_static_thread_mgmt_se3
= se_mask
[3];
62 pr_debug("update cu mask to %#x %#x %#x %#x\n",
63 m
->compute_static_thread_mgmt_se0
,
64 m
->compute_static_thread_mgmt_se1
,
65 m
->compute_static_thread_mgmt_se2
,
66 m
->compute_static_thread_mgmt_se3
);
69 static void set_priority(struct v10_compute_mqd
*m
, struct queue_properties
*q
)
71 m
->cp_hqd_pipe_priority
= pipe_priority_map
[q
->priority
];
72 m
->cp_hqd_queue_priority
= q
->priority
;
75 static struct kfd_mem_obj
*allocate_mqd(struct kfd_dev
*kfd
,
76 struct queue_properties
*q
)
78 struct kfd_mem_obj
*mqd_mem_obj
;
80 if (kfd_gtt_sa_allocate(kfd
, sizeof(struct v10_compute_mqd
),
87 static void init_mqd(struct mqd_manager
*mm
, void **mqd
,
88 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
89 struct queue_properties
*q
)
92 struct v10_compute_mqd
*m
;
94 m
= (struct v10_compute_mqd
*) mqd_mem_obj
->cpu_ptr
;
95 addr
= mqd_mem_obj
->gpu_addr
;
97 memset(m
, 0, sizeof(struct v10_compute_mqd
));
99 m
->header
= 0xC0310800;
100 m
->compute_pipelinestat_enable
= 1;
101 m
->compute_static_thread_mgmt_se0
= 0xFFFFFFFF;
102 m
->compute_static_thread_mgmt_se1
= 0xFFFFFFFF;
103 m
->compute_static_thread_mgmt_se2
= 0xFFFFFFFF;
104 m
->compute_static_thread_mgmt_se3
= 0xFFFFFFFF;
106 m
->cp_hqd_persistent_state
= CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK
|
107 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT
;
109 m
->cp_mqd_control
= 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT
;
111 m
->cp_mqd_base_addr_lo
= lower_32_bits(addr
);
112 m
->cp_mqd_base_addr_hi
= upper_32_bits(addr
);
114 m
->cp_hqd_quantum
= 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT
|
115 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT
|
116 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT
;
118 if (q
->format
== KFD_QUEUE_FORMAT_AQL
) {
119 m
->cp_hqd_aql_control
=
120 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT
;
123 if (mm
->dev
->cwsr_enabled
) {
124 m
->cp_hqd_persistent_state
|=
125 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT
);
126 m
->cp_hqd_ctx_save_base_addr_lo
=
127 lower_32_bits(q
->ctx_save_restore_area_address
);
128 m
->cp_hqd_ctx_save_base_addr_hi
=
129 upper_32_bits(q
->ctx_save_restore_area_address
);
130 m
->cp_hqd_ctx_save_size
= q
->ctx_save_restore_area_size
;
131 m
->cp_hqd_cntl_stack_size
= q
->ctl_stack_size
;
132 m
->cp_hqd_cntl_stack_offset
= q
->ctl_stack_size
;
133 m
->cp_hqd_wg_state_offset
= q
->ctl_stack_size
;
139 mm
->update_mqd(mm
, m
, q
);
142 static int load_mqd(struct mqd_manager
*mm
, void *mqd
,
143 uint32_t pipe_id
, uint32_t queue_id
,
144 struct queue_properties
*p
, struct mm_struct
*mms
)
147 /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
148 uint32_t wptr_shift
= (p
->format
== KFD_QUEUE_FORMAT_AQL
? 4 : 0);
150 r
= mm
->dev
->kfd2kgd
->hqd_load(mm
->dev
->kgd
, mqd
, pipe_id
, queue_id
,
151 (uint32_t __user
*)p
->write_ptr
,
156 static int hiq_load_mqd_kiq(struct mqd_manager
*mm
, void *mqd
,
157 uint32_t pipe_id
, uint32_t queue_id
,
158 struct queue_properties
*p
, struct mm_struct
*mms
)
160 return mm
->dev
->kfd2kgd
->hiq_mqd_load(mm
->dev
->kgd
, mqd
, pipe_id
,
161 queue_id
, p
->doorbell_off
);
164 static void update_mqd(struct mqd_manager
*mm
, void *mqd
,
165 struct queue_properties
*q
)
167 struct v10_compute_mqd
*m
;
171 m
->cp_hqd_pq_control
= 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT
;
172 m
->cp_hqd_pq_control
|=
173 ffs(q
->queue_size
/ sizeof(unsigned int)) - 1 - 1;
174 pr_debug("cp_hqd_pq_control 0x%x\n", m
->cp_hqd_pq_control
);
176 m
->cp_hqd_pq_base_lo
= lower_32_bits((uint64_t)q
->queue_address
>> 8);
177 m
->cp_hqd_pq_base_hi
= upper_32_bits((uint64_t)q
->queue_address
>> 8);
179 m
->cp_hqd_pq_rptr_report_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
180 m
->cp_hqd_pq_rptr_report_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
181 m
->cp_hqd_pq_wptr_poll_addr_lo
= lower_32_bits((uint64_t)q
->write_ptr
);
182 m
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits((uint64_t)q
->write_ptr
);
184 m
->cp_hqd_pq_doorbell_control
=
186 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT
;
187 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
188 m
->cp_hqd_pq_doorbell_control
);
190 m
->cp_hqd_ib_control
= 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT
;
193 * HW does not clamp this field correctly. Maximum EOP queue size
194 * is constrained by per-SE EOP done signal count, which is 8-bit.
195 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
196 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
197 * is safe, giving a maximum field value of 0xA.
199 m
->cp_hqd_eop_control
= min(0xA,
200 ffs(q
->eop_ring_buffer_size
/ sizeof(unsigned int)) - 1 - 1);
201 m
->cp_hqd_eop_base_addr_lo
=
202 lower_32_bits(q
->eop_ring_buffer_address
>> 8);
203 m
->cp_hqd_eop_base_addr_hi
=
204 upper_32_bits(q
->eop_ring_buffer_address
>> 8);
206 m
->cp_hqd_iq_timer
= 0;
208 m
->cp_hqd_vmid
= q
->vmid
;
210 if (q
->format
== KFD_QUEUE_FORMAT_AQL
) {
211 /* GC 10 removed WPP_CLAMP from PQ Control */
212 m
->cp_hqd_pq_control
|= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK
|
213 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT
|
214 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT
;
215 m
->cp_hqd_pq_doorbell_control
|=
216 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT
;
218 if (mm
->dev
->cwsr_enabled
)
219 m
->cp_hqd_ctx_save_control
= 0;
221 update_cu_mask(mm
, mqd
, q
);
224 q
->is_active
= QUEUE_IS_ACTIVE(*q
);
227 static int destroy_mqd(struct mqd_manager
*mm
, void *mqd
,
228 enum kfd_preempt_type type
,
229 unsigned int timeout
, uint32_t pipe_id
,
232 return mm
->dev
->kfd2kgd
->hqd_destroy
233 (mm
->dev
->kgd
, mqd
, type
, timeout
,
237 static void free_mqd(struct mqd_manager
*mm
, void *mqd
,
238 struct kfd_mem_obj
*mqd_mem_obj
)
240 kfd_gtt_sa_free(mm
->dev
, mqd_mem_obj
);
243 static bool is_occupied(struct mqd_manager
*mm
, void *mqd
,
244 uint64_t queue_address
, uint32_t pipe_id
,
247 return mm
->dev
->kfd2kgd
->hqd_is_occupied(
248 mm
->dev
->kgd
, queue_address
,
252 static int get_wave_state(struct mqd_manager
*mm
, void *mqd
,
253 void __user
*ctl_stack
,
254 u32
*ctl_stack_used_size
,
255 u32
*save_area_used_size
)
257 struct v10_compute_mqd
*m
;
261 /* Control stack is written backwards, while workgroup context data
262 * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
263 * Current position is at m->cp_hqd_cntl_stack_offset and
264 * m->cp_hqd_wg_state_offset, respectively.
266 *ctl_stack_used_size
= m
->cp_hqd_cntl_stack_size
-
267 m
->cp_hqd_cntl_stack_offset
;
268 *save_area_used_size
= m
->cp_hqd_wg_state_offset
-
269 m
->cp_hqd_cntl_stack_size
;
271 /* Control stack is not copied to user mode for GFXv10 because
272 * it's part of the context save area that is already
273 * accessible to user mode
279 static void init_mqd_hiq(struct mqd_manager
*mm
, void **mqd
,
280 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
281 struct queue_properties
*q
)
283 struct v10_compute_mqd
*m
;
285 init_mqd(mm
, mqd
, mqd_mem_obj
, gart_addr
, q
);
289 m
->cp_hqd_pq_control
|= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT
|
290 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT
;
293 static void init_mqd_sdma(struct mqd_manager
*mm
, void **mqd
,
294 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
295 struct queue_properties
*q
)
297 struct v10_sdma_mqd
*m
;
299 m
= (struct v10_sdma_mqd
*) mqd_mem_obj
->cpu_ptr
;
301 memset(m
, 0, sizeof(struct v10_sdma_mqd
));
305 *gart_addr
= mqd_mem_obj
->gpu_addr
;
307 mm
->update_mqd(mm
, m
, q
);
310 static int load_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
311 uint32_t pipe_id
, uint32_t queue_id
,
312 struct queue_properties
*p
, struct mm_struct
*mms
)
314 return mm
->dev
->kfd2kgd
->hqd_sdma_load(mm
->dev
->kgd
, mqd
,
315 (uint32_t __user
*)p
->write_ptr
,
319 #define SDMA_RLC_DUMMY_DEFAULT 0xf
321 static void update_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
322 struct queue_properties
*q
)
324 struct v10_sdma_mqd
*m
;
326 m
= get_sdma_mqd(mqd
);
327 m
->sdmax_rlcx_rb_cntl
= (ffs(q
->queue_size
/ sizeof(unsigned int)) - 1)
328 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT
|
329 q
->vmid
<< SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT
|
330 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT
|
331 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT
;
333 m
->sdmax_rlcx_rb_base
= lower_32_bits(q
->queue_address
>> 8);
334 m
->sdmax_rlcx_rb_base_hi
= upper_32_bits(q
->queue_address
>> 8);
335 m
->sdmax_rlcx_rb_rptr_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
336 m
->sdmax_rlcx_rb_rptr_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
337 m
->sdmax_rlcx_doorbell_offset
=
338 q
->doorbell_off
<< SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT
;
340 m
->sdma_engine_id
= q
->sdma_engine_id
;
341 m
->sdma_queue_id
= q
->sdma_queue_id
;
342 m
->sdmax_rlcx_dummy_reg
= SDMA_RLC_DUMMY_DEFAULT
;
344 q
->is_active
= QUEUE_IS_ACTIVE(*q
);
348 * * preempt type here is ignored because there is only one way
349 * * to preempt sdma queue
351 static int destroy_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
352 enum kfd_preempt_type type
,
353 unsigned int timeout
, uint32_t pipe_id
,
356 return mm
->dev
->kfd2kgd
->hqd_sdma_destroy(mm
->dev
->kgd
, mqd
, timeout
);
359 static bool is_occupied_sdma(struct mqd_manager
*mm
, void *mqd
,
360 uint64_t queue_address
, uint32_t pipe_id
,
363 return mm
->dev
->kfd2kgd
->hqd_sdma_is_occupied(mm
->dev
->kgd
, mqd
);
366 #if defined(CONFIG_DEBUG_FS)
368 static int debugfs_show_mqd(struct seq_file
*m
, void *data
)
370 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
371 data
, sizeof(struct v10_compute_mqd
), false);
375 static int debugfs_show_mqd_sdma(struct seq_file
*m
, void *data
)
377 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
378 data
, sizeof(struct v10_sdma_mqd
), false);
384 struct mqd_manager
*mqd_manager_init_v10(enum KFD_MQD_TYPE type
,
387 struct mqd_manager
*mqd
;
389 if (WARN_ON(type
>= KFD_MQD_TYPE_MAX
))
392 mqd
= kzalloc(sizeof(*mqd
), GFP_KERNEL
);
399 case KFD_MQD_TYPE_CP
:
400 pr_debug("%s@%i\n", __func__
, __LINE__
);
401 mqd
->allocate_mqd
= allocate_mqd
;
402 mqd
->init_mqd
= init_mqd
;
403 mqd
->free_mqd
= free_mqd
;
404 mqd
->load_mqd
= load_mqd
;
405 mqd
->update_mqd
= update_mqd
;
406 mqd
->destroy_mqd
= destroy_mqd
;
407 mqd
->is_occupied
= is_occupied
;
408 mqd
->mqd_size
= sizeof(struct v10_compute_mqd
);
409 mqd
->get_wave_state
= get_wave_state
;
410 #if defined(CONFIG_DEBUG_FS)
411 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
413 pr_debug("%s@%i\n", __func__
, __LINE__
);
415 case KFD_MQD_TYPE_HIQ
:
416 pr_debug("%s@%i\n", __func__
, __LINE__
);
417 mqd
->allocate_mqd
= allocate_hiq_mqd
;
418 mqd
->init_mqd
= init_mqd_hiq
;
419 mqd
->free_mqd
= free_mqd_hiq_sdma
;
420 mqd
->load_mqd
= hiq_load_mqd_kiq
;
421 mqd
->update_mqd
= update_mqd
;
422 mqd
->destroy_mqd
= destroy_mqd
;
423 mqd
->is_occupied
= is_occupied
;
424 mqd
->mqd_size
= sizeof(struct v10_compute_mqd
);
425 #if defined(CONFIG_DEBUG_FS)
426 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
428 pr_debug("%s@%i\n", __func__
, __LINE__
);
430 case KFD_MQD_TYPE_DIQ
:
431 mqd
->allocate_mqd
= allocate_mqd
;
432 mqd
->init_mqd
= init_mqd_hiq
;
433 mqd
->free_mqd
= free_mqd
;
434 mqd
->load_mqd
= load_mqd
;
435 mqd
->update_mqd
= update_mqd
;
436 mqd
->destroy_mqd
= destroy_mqd
;
437 mqd
->is_occupied
= is_occupied
;
438 mqd
->mqd_size
= sizeof(struct v10_compute_mqd
);
439 #if defined(CONFIG_DEBUG_FS)
440 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
443 case KFD_MQD_TYPE_SDMA
:
444 pr_debug("%s@%i\n", __func__
, __LINE__
);
445 mqd
->allocate_mqd
= allocate_sdma_mqd
;
446 mqd
->init_mqd
= init_mqd_sdma
;
447 mqd
->free_mqd
= free_mqd_hiq_sdma
;
448 mqd
->load_mqd
= load_mqd_sdma
;
449 mqd
->update_mqd
= update_mqd_sdma
;
450 mqd
->destroy_mqd
= destroy_mqd_sdma
;
451 mqd
->is_occupied
= is_occupied_sdma
;
452 mqd
->mqd_size
= sizeof(struct v10_sdma_mqd
);
453 #if defined(CONFIG_DEBUG_FS)
454 mqd
->debugfs_show_mqd
= debugfs_show_mqd_sdma
;
456 pr_debug("%s@%i\n", __func__
, __LINE__
);