2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/printk.h>
25 #include <linux/slab.h>
26 #include <linux/mm_types.h>
29 #include "kfd_mqd_manager.h"
30 #include "vi_structs.h"
31 #include "gca/gfx_8_0_sh_mask.h"
32 #include "gca/gfx_8_0_enum.h"
33 #include "oss/oss_3_0_sh_mask.h"
35 #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
37 static inline struct vi_mqd
*get_mqd(void *mqd
)
39 return (struct vi_mqd
*)mqd
;
42 static inline struct vi_sdma_mqd
*get_sdma_mqd(void *mqd
)
44 return (struct vi_sdma_mqd
*)mqd
;
47 static void update_cu_mask(struct mqd_manager
*mm
, void *mqd
,
48 struct queue_properties
*q
)
51 uint32_t se_mask
[4] = {0}; /* 4 is the max # of SEs */
53 if (q
->cu_mask_count
== 0)
56 mqd_symmetrically_map_cu_mask(mm
,
57 q
->cu_mask
, q
->cu_mask_count
, se_mask
);
60 m
->compute_static_thread_mgmt_se0
= se_mask
[0];
61 m
->compute_static_thread_mgmt_se1
= se_mask
[1];
62 m
->compute_static_thread_mgmt_se2
= se_mask
[2];
63 m
->compute_static_thread_mgmt_se3
= se_mask
[3];
65 pr_debug("Update cu mask to %#x %#x %#x %#x\n",
66 m
->compute_static_thread_mgmt_se0
,
67 m
->compute_static_thread_mgmt_se1
,
68 m
->compute_static_thread_mgmt_se2
,
69 m
->compute_static_thread_mgmt_se3
);
72 static void set_priority(struct vi_mqd
*m
, struct queue_properties
*q
)
74 m
->cp_hqd_pipe_priority
= pipe_priority_map
[q
->priority
];
75 m
->cp_hqd_queue_priority
= q
->priority
;
78 static struct kfd_mem_obj
*allocate_mqd(struct kfd_dev
*kfd
,
79 struct queue_properties
*q
)
81 struct kfd_mem_obj
*mqd_mem_obj
;
83 if (kfd_gtt_sa_allocate(kfd
, sizeof(struct vi_mqd
),
90 static void init_mqd(struct mqd_manager
*mm
, void **mqd
,
91 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
92 struct queue_properties
*q
)
97 m
= (struct vi_mqd
*) mqd_mem_obj
->cpu_ptr
;
98 addr
= mqd_mem_obj
->gpu_addr
;
100 memset(m
, 0, sizeof(struct vi_mqd
));
102 m
->header
= 0xC0310800;
103 m
->compute_pipelinestat_enable
= 1;
104 m
->compute_static_thread_mgmt_se0
= 0xFFFFFFFF;
105 m
->compute_static_thread_mgmt_se1
= 0xFFFFFFFF;
106 m
->compute_static_thread_mgmt_se2
= 0xFFFFFFFF;
107 m
->compute_static_thread_mgmt_se3
= 0xFFFFFFFF;
109 m
->cp_hqd_persistent_state
= CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK
|
110 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT
;
112 m
->cp_mqd_control
= 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT
|
113 MTYPE_UC
<< CP_MQD_CONTROL__MTYPE__SHIFT
;
115 m
->cp_mqd_base_addr_lo
= lower_32_bits(addr
);
116 m
->cp_mqd_base_addr_hi
= upper_32_bits(addr
);
118 m
->cp_hqd_quantum
= 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT
|
119 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT
|
120 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT
;
123 m
->cp_hqd_eop_rptr
= 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT
;
125 if (q
->format
== KFD_QUEUE_FORMAT_AQL
)
126 m
->cp_hqd_iq_rptr
= 1;
129 m
->compute_tba_lo
= lower_32_bits(q
->tba_addr
>> 8);
130 m
->compute_tba_hi
= upper_32_bits(q
->tba_addr
>> 8);
131 m
->compute_tma_lo
= lower_32_bits(q
->tma_addr
>> 8);
132 m
->compute_tma_hi
= upper_32_bits(q
->tma_addr
>> 8);
133 m
->compute_pgm_rsrc2
|=
134 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT
);
137 if (mm
->dev
->cwsr_enabled
&& q
->ctx_save_restore_area_address
) {
138 m
->cp_hqd_persistent_state
|=
139 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT
);
140 m
->cp_hqd_ctx_save_base_addr_lo
=
141 lower_32_bits(q
->ctx_save_restore_area_address
);
142 m
->cp_hqd_ctx_save_base_addr_hi
=
143 upper_32_bits(q
->ctx_save_restore_area_address
);
144 m
->cp_hqd_ctx_save_size
= q
->ctx_save_restore_area_size
;
145 m
->cp_hqd_cntl_stack_size
= q
->ctl_stack_size
;
146 m
->cp_hqd_cntl_stack_offset
= q
->ctl_stack_size
;
147 m
->cp_hqd_wg_state_offset
= q
->ctl_stack_size
;
153 mm
->update_mqd(mm
, m
, q
);
156 static int load_mqd(struct mqd_manager
*mm
, void *mqd
,
157 uint32_t pipe_id
, uint32_t queue_id
,
158 struct queue_properties
*p
, struct mm_struct
*mms
)
160 /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
161 uint32_t wptr_shift
= (p
->format
== KFD_QUEUE_FORMAT_AQL
? 4 : 0);
162 uint32_t wptr_mask
= (uint32_t)((p
->queue_size
/ 4) - 1);
164 return mm
->dev
->kfd2kgd
->hqd_load(mm
->dev
->kgd
, mqd
, pipe_id
, queue_id
,
165 (uint32_t __user
*)p
->write_ptr
,
166 wptr_shift
, wptr_mask
, mms
);
169 static void __update_mqd(struct mqd_manager
*mm
, void *mqd
,
170 struct queue_properties
*q
, unsigned int mtype
,
171 unsigned int atc_bit
)
177 m
->cp_hqd_pq_control
= 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT
|
178 atc_bit
<< CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT
|
179 mtype
<< CP_HQD_PQ_CONTROL__MTYPE__SHIFT
;
180 m
->cp_hqd_pq_control
|= order_base_2(q
->queue_size
/ 4) - 1;
181 pr_debug("cp_hqd_pq_control 0x%x\n", m
->cp_hqd_pq_control
);
183 m
->cp_hqd_pq_base_lo
= lower_32_bits((uint64_t)q
->queue_address
>> 8);
184 m
->cp_hqd_pq_base_hi
= upper_32_bits((uint64_t)q
->queue_address
>> 8);
186 m
->cp_hqd_pq_rptr_report_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
187 m
->cp_hqd_pq_rptr_report_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
188 m
->cp_hqd_pq_wptr_poll_addr_lo
= lower_32_bits((uint64_t)q
->write_ptr
);
189 m
->cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits((uint64_t)q
->write_ptr
);
191 m
->cp_hqd_pq_doorbell_control
=
193 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT
;
194 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
195 m
->cp_hqd_pq_doorbell_control
);
197 m
->cp_hqd_eop_control
= atc_bit
<< CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT
|
198 mtype
<< CP_HQD_EOP_CONTROL__MTYPE__SHIFT
;
200 m
->cp_hqd_ib_control
= atc_bit
<< CP_HQD_IB_CONTROL__IB_ATC__SHIFT
|
201 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT
|
202 mtype
<< CP_HQD_IB_CONTROL__MTYPE__SHIFT
;
205 * HW does not clamp this field correctly. Maximum EOP queue size
206 * is constrained by per-SE EOP done signal count, which is 8-bit.
207 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
208 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
209 * is safe, giving a maximum field value of 0xA.
211 m
->cp_hqd_eop_control
|= min(0xA,
212 order_base_2(q
->eop_ring_buffer_size
/ 4) - 1);
213 m
->cp_hqd_eop_base_addr_lo
=
214 lower_32_bits(q
->eop_ring_buffer_address
>> 8);
215 m
->cp_hqd_eop_base_addr_hi
=
216 upper_32_bits(q
->eop_ring_buffer_address
>> 8);
218 m
->cp_hqd_iq_timer
= atc_bit
<< CP_HQD_IQ_TIMER__IQ_ATC__SHIFT
|
219 mtype
<< CP_HQD_IQ_TIMER__MTYPE__SHIFT
;
221 m
->cp_hqd_vmid
= q
->vmid
;
223 if (q
->format
== KFD_QUEUE_FORMAT_AQL
) {
224 m
->cp_hqd_pq_control
|= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK
|
225 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT
;
228 if (mm
->dev
->cwsr_enabled
&& q
->ctx_save_restore_area_address
)
229 m
->cp_hqd_ctx_save_control
=
230 atc_bit
<< CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT
|
231 mtype
<< CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT
;
233 update_cu_mask(mm
, mqd
, q
);
236 q
->is_active
= QUEUE_IS_ACTIVE(*q
);
240 static void update_mqd(struct mqd_manager
*mm
, void *mqd
,
241 struct queue_properties
*q
)
243 __update_mqd(mm
, mqd
, q
, MTYPE_CC
, 1);
246 static void update_mqd_tonga(struct mqd_manager
*mm
, void *mqd
,
247 struct queue_properties
*q
)
249 __update_mqd(mm
, mqd
, q
, MTYPE_UC
, 0);
252 static int destroy_mqd(struct mqd_manager
*mm
, void *mqd
,
253 enum kfd_preempt_type type
,
254 unsigned int timeout
, uint32_t pipe_id
,
257 return mm
->dev
->kfd2kgd
->hqd_destroy
258 (mm
->dev
->kgd
, mqd
, type
, timeout
,
262 static void free_mqd(struct mqd_manager
*mm
, void *mqd
,
263 struct kfd_mem_obj
*mqd_mem_obj
)
265 kfd_gtt_sa_free(mm
->dev
, mqd_mem_obj
);
268 static bool is_occupied(struct mqd_manager
*mm
, void *mqd
,
269 uint64_t queue_address
, uint32_t pipe_id
,
272 return mm
->dev
->kfd2kgd
->hqd_is_occupied(
273 mm
->dev
->kgd
, queue_address
,
277 static int get_wave_state(struct mqd_manager
*mm
, void *mqd
,
278 void __user
*ctl_stack
,
279 u32
*ctl_stack_used_size
,
280 u32
*save_area_used_size
)
286 *ctl_stack_used_size
= m
->cp_hqd_cntl_stack_size
-
287 m
->cp_hqd_cntl_stack_offset
;
288 *save_area_used_size
= m
->cp_hqd_wg_state_offset
-
289 m
->cp_hqd_cntl_stack_size
;
291 /* Control stack is not copied to user mode for GFXv8 because
292 * it's part of the context save area that is already
293 * accessible to user mode
299 static void init_mqd_hiq(struct mqd_manager
*mm
, void **mqd
,
300 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
301 struct queue_properties
*q
)
304 init_mqd(mm
, mqd
, mqd_mem_obj
, gart_addr
, q
);
308 m
->cp_hqd_pq_control
|= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT
|
309 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT
;
312 static void update_mqd_hiq(struct mqd_manager
*mm
, void *mqd
,
313 struct queue_properties
*q
)
315 __update_mqd(mm
, mqd
, q
, MTYPE_UC
, 0);
318 static void init_mqd_sdma(struct mqd_manager
*mm
, void **mqd
,
319 struct kfd_mem_obj
*mqd_mem_obj
, uint64_t *gart_addr
,
320 struct queue_properties
*q
)
322 struct vi_sdma_mqd
*m
;
324 m
= (struct vi_sdma_mqd
*) mqd_mem_obj
->cpu_ptr
;
326 memset(m
, 0, sizeof(struct vi_sdma_mqd
));
330 *gart_addr
= mqd_mem_obj
->gpu_addr
;
332 mm
->update_mqd(mm
, m
, q
);
335 static int load_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
336 uint32_t pipe_id
, uint32_t queue_id
,
337 struct queue_properties
*p
, struct mm_struct
*mms
)
339 return mm
->dev
->kfd2kgd
->hqd_sdma_load(mm
->dev
->kgd
, mqd
,
340 (uint32_t __user
*)p
->write_ptr
,
344 static void update_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
345 struct queue_properties
*q
)
347 struct vi_sdma_mqd
*m
;
349 m
= get_sdma_mqd(mqd
);
350 m
->sdmax_rlcx_rb_cntl
= order_base_2(q
->queue_size
/ 4)
351 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT
|
352 q
->vmid
<< SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT
|
353 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT
|
354 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT
;
356 m
->sdmax_rlcx_rb_base
= lower_32_bits(q
->queue_address
>> 8);
357 m
->sdmax_rlcx_rb_base_hi
= upper_32_bits(q
->queue_address
>> 8);
358 m
->sdmax_rlcx_rb_rptr_addr_lo
= lower_32_bits((uint64_t)q
->read_ptr
);
359 m
->sdmax_rlcx_rb_rptr_addr_hi
= upper_32_bits((uint64_t)q
->read_ptr
);
360 m
->sdmax_rlcx_doorbell
=
361 q
->doorbell_off
<< SDMA0_RLC0_DOORBELL__OFFSET__SHIFT
;
363 m
->sdmax_rlcx_virtual_addr
= q
->sdma_vm_addr
;
365 m
->sdma_engine_id
= q
->sdma_engine_id
;
366 m
->sdma_queue_id
= q
->sdma_queue_id
;
368 q
->is_active
= QUEUE_IS_ACTIVE(*q
);
372 * * preempt type here is ignored because there is only one way
373 * * to preempt sdma queue
375 static int destroy_mqd_sdma(struct mqd_manager
*mm
, void *mqd
,
376 enum kfd_preempt_type type
,
377 unsigned int timeout
, uint32_t pipe_id
,
380 return mm
->dev
->kfd2kgd
->hqd_sdma_destroy(mm
->dev
->kgd
, mqd
, timeout
);
383 static bool is_occupied_sdma(struct mqd_manager
*mm
, void *mqd
,
384 uint64_t queue_address
, uint32_t pipe_id
,
387 return mm
->dev
->kfd2kgd
->hqd_sdma_is_occupied(mm
->dev
->kgd
, mqd
);
390 #if defined(CONFIG_DEBUG_FS)
392 static int debugfs_show_mqd(struct seq_file
*m
, void *data
)
394 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
395 data
, sizeof(struct vi_mqd
), false);
399 static int debugfs_show_mqd_sdma(struct seq_file
*m
, void *data
)
401 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
402 data
, sizeof(struct vi_sdma_mqd
), false);
408 struct mqd_manager
*mqd_manager_init_vi(enum KFD_MQD_TYPE type
,
411 struct mqd_manager
*mqd
;
413 if (WARN_ON(type
>= KFD_MQD_TYPE_MAX
))
416 mqd
= kzalloc(sizeof(*mqd
), GFP_KERNEL
);
423 case KFD_MQD_TYPE_CP
:
424 mqd
->allocate_mqd
= allocate_mqd
;
425 mqd
->init_mqd
= init_mqd
;
426 mqd
->free_mqd
= free_mqd
;
427 mqd
->load_mqd
= load_mqd
;
428 mqd
->update_mqd
= update_mqd
;
429 mqd
->destroy_mqd
= destroy_mqd
;
430 mqd
->is_occupied
= is_occupied
;
431 mqd
->get_wave_state
= get_wave_state
;
432 mqd
->mqd_size
= sizeof(struct vi_mqd
);
433 #if defined(CONFIG_DEBUG_FS)
434 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
437 case KFD_MQD_TYPE_HIQ
:
438 mqd
->allocate_mqd
= allocate_hiq_mqd
;
439 mqd
->init_mqd
= init_mqd_hiq
;
440 mqd
->free_mqd
= free_mqd_hiq_sdma
;
441 mqd
->load_mqd
= load_mqd
;
442 mqd
->update_mqd
= update_mqd_hiq
;
443 mqd
->destroy_mqd
= destroy_mqd
;
444 mqd
->is_occupied
= is_occupied
;
445 mqd
->mqd_size
= sizeof(struct vi_mqd
);
446 #if defined(CONFIG_DEBUG_FS)
447 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
450 case KFD_MQD_TYPE_DIQ
:
451 mqd
->allocate_mqd
= allocate_mqd
;
452 mqd
->init_mqd
= init_mqd_hiq
;
453 mqd
->free_mqd
= free_mqd
;
454 mqd
->load_mqd
= load_mqd
;
455 mqd
->update_mqd
= update_mqd_hiq
;
456 mqd
->destroy_mqd
= destroy_mqd
;
457 mqd
->is_occupied
= is_occupied
;
458 mqd
->mqd_size
= sizeof(struct vi_mqd
);
459 #if defined(CONFIG_DEBUG_FS)
460 mqd
->debugfs_show_mqd
= debugfs_show_mqd
;
463 case KFD_MQD_TYPE_SDMA
:
464 mqd
->allocate_mqd
= allocate_sdma_mqd
;
465 mqd
->init_mqd
= init_mqd_sdma
;
466 mqd
->free_mqd
= free_mqd_hiq_sdma
;
467 mqd
->load_mqd
= load_mqd_sdma
;
468 mqd
->update_mqd
= update_mqd_sdma
;
469 mqd
->destroy_mqd
= destroy_mqd_sdma
;
470 mqd
->is_occupied
= is_occupied_sdma
;
471 mqd
->mqd_size
= sizeof(struct vi_sdma_mqd
);
472 #if defined(CONFIG_DEBUG_FS)
473 mqd
->debugfs_show_mqd
= debugfs_show_mqd_sdma
;
484 struct mqd_manager
*mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type
,
487 struct mqd_manager
*mqd
;
489 mqd
= mqd_manager_init_vi(type
, dev
);
492 if (type
== KFD_MQD_TYPE_CP
)
493 mqd
->update_mqd
= update_mqd_tonga
;