2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/slab.h>
25 #include <linux/list.h>
26 #include <linux/types.h>
27 #include <linux/printk.h>
28 #include <linux/bitops.h>
29 #include <linux/sched.h>
31 #include "kfd_device_queue_manager.h"
32 #include "kfd_mqd_manager.h"
34 #include "kfd_kernel_queue.h"
36 /* Size of the per-pipe EOP queue */
37 #define CIK_HPD_EOP_BYTES_LOG2 11
38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
40 static int set_pasid_vmid_mapping(struct device_queue_manager
*dqm
,
41 unsigned int pasid
, unsigned int vmid
);
43 static int create_compute_queue_nocpsch(struct device_queue_manager
*dqm
,
45 struct qcm_process_device
*qpd
);
47 static int execute_queues_cpsch(struct device_queue_manager
*dqm
, bool lock
);
48 static int destroy_queues_cpsch(struct device_queue_manager
*dqm
,
49 bool preempt_static_queues
, bool lock
);
51 static int create_sdma_queue_nocpsch(struct device_queue_manager
*dqm
,
53 struct qcm_process_device
*qpd
);
55 static void deallocate_sdma_queue(struct device_queue_manager
*dqm
,
56 unsigned int sdma_queue_id
);
59 enum KFD_MQD_TYPE
get_mqd_type_from_queue_type(enum kfd_queue_type type
)
61 if (type
== KFD_QUEUE_TYPE_SDMA
)
62 return KFD_MQD_TYPE_SDMA
;
63 return KFD_MQD_TYPE_CP
;
66 unsigned int get_first_pipe(struct device_queue_manager
*dqm
)
68 BUG_ON(!dqm
|| !dqm
->dev
);
69 return dqm
->dev
->shared_resources
.first_compute_pipe
;
72 unsigned int get_pipes_num(struct device_queue_manager
*dqm
)
74 BUG_ON(!dqm
|| !dqm
->dev
);
75 return dqm
->dev
->shared_resources
.compute_pipe_count
;
78 static inline unsigned int get_pipes_num_cpsch(void)
80 return PIPE_PER_ME_CP_SCHEDULING
;
83 void program_sh_mem_settings(struct device_queue_manager
*dqm
,
84 struct qcm_process_device
*qpd
)
86 return dqm
->dev
->kfd2kgd
->program_sh_mem_settings(
87 dqm
->dev
->kgd
, qpd
->vmid
,
89 qpd
->sh_mem_ape1_base
,
90 qpd
->sh_mem_ape1_limit
,
94 static int allocate_vmid(struct device_queue_manager
*dqm
,
95 struct qcm_process_device
*qpd
,
98 int bit
, allocated_vmid
;
100 if (dqm
->vmid_bitmap
== 0)
103 bit
= find_first_bit((unsigned long *)&dqm
->vmid_bitmap
, CIK_VMID_NUM
);
104 clear_bit(bit
, (unsigned long *)&dqm
->vmid_bitmap
);
106 /* Kaveri kfd vmid's starts from vmid 8 */
107 allocated_vmid
= bit
+ KFD_VMID_START_OFFSET
;
108 pr_debug("kfd: vmid allocation %d\n", allocated_vmid
);
109 qpd
->vmid
= allocated_vmid
;
110 q
->properties
.vmid
= allocated_vmid
;
112 set_pasid_vmid_mapping(dqm
, q
->process
->pasid
, q
->properties
.vmid
);
113 program_sh_mem_settings(dqm
, qpd
);
118 static void deallocate_vmid(struct device_queue_manager
*dqm
,
119 struct qcm_process_device
*qpd
,
122 int bit
= qpd
->vmid
- KFD_VMID_START_OFFSET
;
124 /* Release the vmid mapping */
125 set_pasid_vmid_mapping(dqm
, 0, qpd
->vmid
);
127 set_bit(bit
, (unsigned long *)&dqm
->vmid_bitmap
);
129 q
->properties
.vmid
= 0;
132 static int create_queue_nocpsch(struct device_queue_manager
*dqm
,
134 struct qcm_process_device
*qpd
,
139 BUG_ON(!dqm
|| !q
|| !qpd
|| !allocated_vmid
);
141 pr_debug("kfd: In func %s\n", __func__
);
144 mutex_lock(&dqm
->lock
);
146 if (dqm
->total_queue_count
>= max_num_of_queues_per_device
) {
147 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
148 dqm
->total_queue_count
);
149 mutex_unlock(&dqm
->lock
);
153 if (list_empty(&qpd
->queues_list
)) {
154 retval
= allocate_vmid(dqm
, qpd
, q
);
156 mutex_unlock(&dqm
->lock
);
160 *allocated_vmid
= qpd
->vmid
;
161 q
->properties
.vmid
= qpd
->vmid
;
163 if (q
->properties
.type
== KFD_QUEUE_TYPE_COMPUTE
)
164 retval
= create_compute_queue_nocpsch(dqm
, q
, qpd
);
165 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
166 retval
= create_sdma_queue_nocpsch(dqm
, q
, qpd
);
169 if (list_empty(&qpd
->queues_list
)) {
170 deallocate_vmid(dqm
, qpd
, q
);
173 mutex_unlock(&dqm
->lock
);
177 list_add(&q
->list
, &qpd
->queues_list
);
178 if (q
->properties
.is_active
)
181 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
182 dqm
->sdma_queue_count
++;
185 * Unconditionally increment this counter, regardless of the queue's
186 * type or whether the queue is active.
188 dqm
->total_queue_count
++;
189 pr_debug("Total of %d queues are accountable so far\n",
190 dqm
->total_queue_count
);
192 mutex_unlock(&dqm
->lock
);
196 static int allocate_hqd(struct device_queue_manager
*dqm
, struct queue
*q
)
203 for (pipe
= dqm
->next_pipe_to_allocate
, i
= 0; i
< get_pipes_num(dqm
);
204 pipe
= ((pipe
+ 1) % get_pipes_num(dqm
)), ++i
) {
205 if (dqm
->allocated_queues
[pipe
] != 0) {
206 bit
= find_first_bit(
207 (unsigned long *)&dqm
->allocated_queues
[pipe
],
211 (unsigned long *)&dqm
->allocated_queues
[pipe
]);
222 pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
223 __func__
, q
->pipe
, q
->queue
);
224 /* horizontal hqd allocation */
225 dqm
->next_pipe_to_allocate
= (pipe
+ 1) % get_pipes_num(dqm
);
230 static inline void deallocate_hqd(struct device_queue_manager
*dqm
,
233 set_bit(q
->queue
, (unsigned long *)&dqm
->allocated_queues
[q
->pipe
]);
236 static int create_compute_queue_nocpsch(struct device_queue_manager
*dqm
,
238 struct qcm_process_device
*qpd
)
241 struct mqd_manager
*mqd
;
243 BUG_ON(!dqm
|| !q
|| !qpd
);
245 mqd
= dqm
->ops
.get_mqd_manager(dqm
, KFD_MQD_TYPE_COMPUTE
);
249 retval
= allocate_hqd(dqm
, q
);
253 retval
= mqd
->init_mqd(mqd
, &q
->mqd
, &q
->mqd_mem_obj
,
254 &q
->gart_mqd_addr
, &q
->properties
);
256 deallocate_hqd(dqm
, q
);
260 pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
264 retval
= mqd
->load_mqd(mqd
, q
->mqd
, q
->pipe
,
265 q
->queue
, (uint32_t __user
*) q
->properties
.write_ptr
);
267 deallocate_hqd(dqm
, q
);
268 mqd
->uninit_mqd(mqd
, q
->mqd
, q
->mqd_mem_obj
);
275 static int destroy_queue_nocpsch(struct device_queue_manager
*dqm
,
276 struct qcm_process_device
*qpd
,
280 struct mqd_manager
*mqd
;
282 BUG_ON(!dqm
|| !q
|| !q
->mqd
|| !qpd
);
286 pr_debug("kfd: In Func %s\n", __func__
);
288 mutex_lock(&dqm
->lock
);
290 if (q
->properties
.type
== KFD_QUEUE_TYPE_COMPUTE
) {
291 mqd
= dqm
->ops
.get_mqd_manager(dqm
, KFD_MQD_TYPE_COMPUTE
);
296 deallocate_hqd(dqm
, q
);
297 } else if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
) {
298 mqd
= dqm
->ops
.get_mqd_manager(dqm
, KFD_MQD_TYPE_SDMA
);
303 dqm
->sdma_queue_count
--;
304 deallocate_sdma_queue(dqm
, q
->sdma_id
);
306 pr_debug("q->properties.type is invalid (%d)\n",
312 retval
= mqd
->destroy_mqd(mqd
, q
->mqd
,
313 KFD_PREEMPT_TYPE_WAVEFRONT_RESET
,
314 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS
,
320 mqd
->uninit_mqd(mqd
, q
->mqd
, q
->mqd_mem_obj
);
323 if (list_empty(&qpd
->queues_list
))
324 deallocate_vmid(dqm
, qpd
, q
);
325 if (q
->properties
.is_active
)
329 * Unconditionally decrement this counter, regardless of the queue's
332 dqm
->total_queue_count
--;
333 pr_debug("Total of %d queues are accountable so far\n",
334 dqm
->total_queue_count
);
337 mutex_unlock(&dqm
->lock
);
341 static int update_queue(struct device_queue_manager
*dqm
, struct queue
*q
)
344 struct mqd_manager
*mqd
;
345 bool prev_active
= false;
347 BUG_ON(!dqm
|| !q
|| !q
->mqd
);
349 mutex_lock(&dqm
->lock
);
350 mqd
= dqm
->ops
.get_mqd_manager(dqm
,
351 get_mqd_type_from_queue_type(q
->properties
.type
));
353 mutex_unlock(&dqm
->lock
);
357 if (q
->properties
.is_active
== true)
362 * check active state vs. the previous state
363 * and modify counter accordingly
365 retval
= mqd
->update_mqd(mqd
, q
->mqd
, &q
->properties
);
366 if ((q
->properties
.is_active
== true) && (prev_active
== false))
368 else if ((q
->properties
.is_active
== false) && (prev_active
== true))
371 if (sched_policy
!= KFD_SCHED_POLICY_NO_HWS
)
372 retval
= execute_queues_cpsch(dqm
, false);
374 mutex_unlock(&dqm
->lock
);
378 static struct mqd_manager
*get_mqd_manager_nocpsch(
379 struct device_queue_manager
*dqm
, enum KFD_MQD_TYPE type
)
381 struct mqd_manager
*mqd
;
383 BUG_ON(!dqm
|| type
>= KFD_MQD_TYPE_MAX
);
385 pr_debug("kfd: In func %s mqd type %d\n", __func__
, type
);
387 mqd
= dqm
->mqds
[type
];
389 mqd
= mqd_manager_init(type
, dqm
->dev
);
391 pr_err("kfd: mqd manager is NULL");
392 dqm
->mqds
[type
] = mqd
;
398 static int register_process_nocpsch(struct device_queue_manager
*dqm
,
399 struct qcm_process_device
*qpd
)
401 struct device_process_node
*n
;
404 BUG_ON(!dqm
|| !qpd
);
406 pr_debug("kfd: In func %s\n", __func__
);
408 n
= kzalloc(sizeof(struct device_process_node
), GFP_KERNEL
);
414 mutex_lock(&dqm
->lock
);
415 list_add(&n
->list
, &dqm
->queues
);
417 retval
= dqm
->ops_asic_specific
.register_process(dqm
, qpd
);
419 dqm
->processes_count
++;
421 mutex_unlock(&dqm
->lock
);
426 static int unregister_process_nocpsch(struct device_queue_manager
*dqm
,
427 struct qcm_process_device
*qpd
)
430 struct device_process_node
*cur
, *next
;
432 BUG_ON(!dqm
|| !qpd
);
434 pr_debug("In func %s\n", __func__
);
436 pr_debug("qpd->queues_list is %s\n",
437 list_empty(&qpd
->queues_list
) ? "empty" : "not empty");
440 mutex_lock(&dqm
->lock
);
442 list_for_each_entry_safe(cur
, next
, &dqm
->queues
, list
) {
443 if (qpd
== cur
->qpd
) {
444 list_del(&cur
->list
);
446 dqm
->processes_count
--;
450 /* qpd not found in dqm list */
453 mutex_unlock(&dqm
->lock
);
458 set_pasid_vmid_mapping(struct device_queue_manager
*dqm
, unsigned int pasid
,
461 uint32_t pasid_mapping
;
463 pasid_mapping
= (pasid
== 0) ? 0 :
465 ATC_VMID_PASID_MAPPING_VALID
;
467 return dqm
->dev
->kfd2kgd
->set_pasid_vmid_mapping(
468 dqm
->dev
->kgd
, pasid_mapping
,
472 int init_pipelines(struct device_queue_manager
*dqm
,
473 unsigned int pipes_num
, unsigned int first_pipe
)
476 struct mqd_manager
*mqd
;
477 unsigned int i
, err
, inx
;
478 uint64_t pipe_hpd_addr
;
480 BUG_ON(!dqm
|| !dqm
->dev
);
482 pr_debug("kfd: In func %s\n", __func__
);
485 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
486 * The driver never accesses this memory after zeroing it.
487 * It doesn't even have to be saved/restored on suspend/resume
488 * because it contains no data when there are no active queues.
491 err
= kfd_gtt_sa_allocate(dqm
->dev
, CIK_HPD_EOP_BYTES
* pipes_num
,
495 pr_err("kfd: error allocate vidmem num pipes: %d\n",
500 hpdptr
= dqm
->pipeline_mem
->cpu_ptr
;
501 dqm
->pipelines_addr
= dqm
->pipeline_mem
->gpu_addr
;
503 memset(hpdptr
, 0, CIK_HPD_EOP_BYTES
* pipes_num
);
505 mqd
= dqm
->ops
.get_mqd_manager(dqm
, KFD_MQD_TYPE_COMPUTE
);
507 kfd_gtt_sa_free(dqm
->dev
, dqm
->pipeline_mem
);
511 for (i
= 0; i
< pipes_num
; i
++) {
512 inx
= i
+ first_pipe
;
514 * HPD buffer on GTT is allocated by amdkfd, no need to waste
515 * space in GTT for pipelines we don't initialize
517 pipe_hpd_addr
= dqm
->pipelines_addr
+ i
* CIK_HPD_EOP_BYTES
;
518 pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr
);
519 /* = log2(bytes/4)-1 */
520 dqm
->dev
->kfd2kgd
->init_pipeline(dqm
->dev
->kgd
, inx
,
521 CIK_HPD_EOP_BYTES_LOG2
- 3, pipe_hpd_addr
);
527 static void init_interrupts(struct device_queue_manager
*dqm
)
533 for (i
= 0 ; i
< get_pipes_num(dqm
) ; i
++)
534 dqm
->dev
->kfd2kgd
->init_interrupts(dqm
->dev
->kgd
,
535 i
+ get_first_pipe(dqm
));
538 static int init_scheduler(struct device_queue_manager
*dqm
)
544 pr_debug("kfd: In %s\n", __func__
);
546 retval
= init_pipelines(dqm
, get_pipes_num(dqm
), get_first_pipe(dqm
));
550 static int initialize_nocpsch(struct device_queue_manager
*dqm
)
556 pr_debug("kfd: In func %s num of pipes: %d\n",
557 __func__
, get_pipes_num(dqm
));
559 mutex_init(&dqm
->lock
);
560 INIT_LIST_HEAD(&dqm
->queues
);
561 dqm
->queue_count
= dqm
->next_pipe_to_allocate
= 0;
562 dqm
->sdma_queue_count
= 0;
563 dqm
->allocated_queues
= kcalloc(get_pipes_num(dqm
),
564 sizeof(unsigned int), GFP_KERNEL
);
565 if (!dqm
->allocated_queues
) {
566 mutex_destroy(&dqm
->lock
);
570 for (i
= 0; i
< get_pipes_num(dqm
); i
++)
571 dqm
->allocated_queues
[i
] = (1 << QUEUES_PER_PIPE
) - 1;
573 dqm
->vmid_bitmap
= (1 << VMID_PER_DEVICE
) - 1;
574 dqm
->sdma_bitmap
= (1 << CIK_SDMA_QUEUES
) - 1;
580 static void uninitialize_nocpsch(struct device_queue_manager
*dqm
)
586 BUG_ON(dqm
->queue_count
> 0 || dqm
->processes_count
> 0);
588 kfree(dqm
->allocated_queues
);
589 for (i
= 0 ; i
< KFD_MQD_TYPE_MAX
; i
++)
591 mutex_destroy(&dqm
->lock
);
592 kfd_gtt_sa_free(dqm
->dev
, dqm
->pipeline_mem
);
595 static int start_nocpsch(struct device_queue_manager
*dqm
)
597 init_interrupts(dqm
);
601 static int stop_nocpsch(struct device_queue_manager
*dqm
)
606 static int allocate_sdma_queue(struct device_queue_manager
*dqm
,
607 unsigned int *sdma_queue_id
)
611 if (dqm
->sdma_bitmap
== 0)
614 bit
= find_first_bit((unsigned long *)&dqm
->sdma_bitmap
,
617 clear_bit(bit
, (unsigned long *)&dqm
->sdma_bitmap
);
618 *sdma_queue_id
= bit
;
623 static void deallocate_sdma_queue(struct device_queue_manager
*dqm
,
624 unsigned int sdma_queue_id
)
626 if (sdma_queue_id
>= CIK_SDMA_QUEUES
)
628 set_bit(sdma_queue_id
, (unsigned long *)&dqm
->sdma_bitmap
);
631 static int create_sdma_queue_nocpsch(struct device_queue_manager
*dqm
,
633 struct qcm_process_device
*qpd
)
635 struct mqd_manager
*mqd
;
638 mqd
= dqm
->ops
.get_mqd_manager(dqm
, KFD_MQD_TYPE_SDMA
);
642 retval
= allocate_sdma_queue(dqm
, &q
->sdma_id
);
646 q
->properties
.sdma_queue_id
= q
->sdma_id
% CIK_SDMA_QUEUES_PER_ENGINE
;
647 q
->properties
.sdma_engine_id
= q
->sdma_id
/ CIK_SDMA_ENGINE_NUM
;
649 pr_debug("kfd: sdma id is: %d\n", q
->sdma_id
);
650 pr_debug(" sdma queue id: %d\n", q
->properties
.sdma_queue_id
);
651 pr_debug(" sdma engine id: %d\n", q
->properties
.sdma_engine_id
);
653 dqm
->ops_asic_specific
.init_sdma_vm(dqm
, q
, qpd
);
654 retval
= mqd
->init_mqd(mqd
, &q
->mqd
, &q
->mqd_mem_obj
,
655 &q
->gart_mqd_addr
, &q
->properties
);
657 deallocate_sdma_queue(dqm
, q
->sdma_id
);
661 retval
= mqd
->load_mqd(mqd
, q
->mqd
, 0,
664 deallocate_sdma_queue(dqm
, q
->sdma_id
);
665 mqd
->uninit_mqd(mqd
, q
->mqd
, q
->mqd_mem_obj
);
673 * Device Queue Manager implementation for cp scheduler
676 static int set_sched_resources(struct device_queue_manager
*dqm
)
678 struct scheduling_resources res
;
679 unsigned int queue_num
, queue_mask
;
683 pr_debug("kfd: In func %s\n", __func__
);
685 queue_num
= get_pipes_num_cpsch() * QUEUES_PER_PIPE
;
686 queue_mask
= (1 << queue_num
) - 1;
687 res
.vmid_mask
= (1 << VMID_PER_DEVICE
) - 1;
688 res
.vmid_mask
<<= KFD_VMID_START_OFFSET
;
689 res
.queue_mask
= queue_mask
<< (get_first_pipe(dqm
) * QUEUES_PER_PIPE
);
690 res
.gws_mask
= res
.oac_mask
= res
.gds_heap_base
=
691 res
.gds_heap_size
= 0;
693 pr_debug("kfd: scheduling resources:\n"
694 " vmid mask: 0x%8X\n"
695 " queue mask: 0x%8llX\n",
696 res
.vmid_mask
, res
.queue_mask
);
698 return pm_send_set_resources(&dqm
->packets
, &res
);
701 static int initialize_cpsch(struct device_queue_manager
*dqm
)
707 pr_debug("kfd: In func %s num of pipes: %d\n",
708 __func__
, get_pipes_num_cpsch());
710 mutex_init(&dqm
->lock
);
711 INIT_LIST_HEAD(&dqm
->queues
);
712 dqm
->queue_count
= dqm
->processes_count
= 0;
713 dqm
->sdma_queue_count
= 0;
714 dqm
->active_runlist
= false;
715 retval
= dqm
->ops_asic_specific
.initialize(dqm
);
717 goto fail_init_pipelines
;
722 mutex_destroy(&dqm
->lock
);
726 static int start_cpsch(struct device_queue_manager
*dqm
)
728 struct device_process_node
*node
;
735 retval
= pm_init(&dqm
->packets
, dqm
);
737 goto fail_packet_manager_init
;
739 retval
= set_sched_resources(dqm
);
741 goto fail_set_sched_resources
;
743 pr_debug("kfd: allocating fence memory\n");
745 /* allocate fence memory on the gart */
746 retval
= kfd_gtt_sa_allocate(dqm
->dev
, sizeof(*dqm
->fence_addr
),
750 goto fail_allocate_vidmem
;
752 dqm
->fence_addr
= dqm
->fence_mem
->cpu_ptr
;
753 dqm
->fence_gpu_addr
= dqm
->fence_mem
->gpu_addr
;
755 init_interrupts(dqm
);
757 list_for_each_entry(node
, &dqm
->queues
, list
)
758 if (node
->qpd
->pqm
->process
&& dqm
->dev
)
759 kfd_bind_process_to_device(dqm
->dev
,
760 node
->qpd
->pqm
->process
);
762 execute_queues_cpsch(dqm
, true);
765 fail_allocate_vidmem
:
766 fail_set_sched_resources
:
767 pm_uninit(&dqm
->packets
);
768 fail_packet_manager_init
:
772 static int stop_cpsch(struct device_queue_manager
*dqm
)
774 struct device_process_node
*node
;
775 struct kfd_process_device
*pdd
;
779 destroy_queues_cpsch(dqm
, true, true);
781 list_for_each_entry(node
, &dqm
->queues
, list
) {
782 pdd
= qpd_to_pdd(node
->qpd
);
785 kfd_gtt_sa_free(dqm
->dev
, dqm
->fence_mem
);
786 pm_uninit(&dqm
->packets
);
791 static int create_kernel_queue_cpsch(struct device_queue_manager
*dqm
,
792 struct kernel_queue
*kq
,
793 struct qcm_process_device
*qpd
)
795 BUG_ON(!dqm
|| !kq
|| !qpd
);
797 pr_debug("kfd: In func %s\n", __func__
);
799 mutex_lock(&dqm
->lock
);
800 if (dqm
->total_queue_count
>= max_num_of_queues_per_device
) {
801 pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
802 dqm
->total_queue_count
);
803 mutex_unlock(&dqm
->lock
);
808 * Unconditionally increment this counter, regardless of the queue's
809 * type or whether the queue is active.
811 dqm
->total_queue_count
++;
812 pr_debug("Total of %d queues are accountable so far\n",
813 dqm
->total_queue_count
);
815 list_add(&kq
->list
, &qpd
->priv_queue_list
);
817 qpd
->is_debug
= true;
818 execute_queues_cpsch(dqm
, false);
819 mutex_unlock(&dqm
->lock
);
824 static void destroy_kernel_queue_cpsch(struct device_queue_manager
*dqm
,
825 struct kernel_queue
*kq
,
826 struct qcm_process_device
*qpd
)
830 pr_debug("kfd: In %s\n", __func__
);
832 mutex_lock(&dqm
->lock
);
833 /* here we actually preempt the DIQ */
834 destroy_queues_cpsch(dqm
, true, false);
837 qpd
->is_debug
= false;
838 execute_queues_cpsch(dqm
, false);
840 * Unconditionally decrement this counter, regardless of the queue's
843 dqm
->total_queue_count
--;
844 pr_debug("Total of %d queues are accountable so far\n",
845 dqm
->total_queue_count
);
846 mutex_unlock(&dqm
->lock
);
849 static void select_sdma_engine_id(struct queue
*q
)
853 q
->sdma_id
= sdma_id
;
854 sdma_id
= (sdma_id
+ 1) % 2;
857 static int create_queue_cpsch(struct device_queue_manager
*dqm
, struct queue
*q
,
858 struct qcm_process_device
*qpd
, int *allocate_vmid
)
861 struct mqd_manager
*mqd
;
863 BUG_ON(!dqm
|| !q
|| !qpd
);
870 mutex_lock(&dqm
->lock
);
872 if (dqm
->total_queue_count
>= max_num_of_queues_per_device
) {
873 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
874 dqm
->total_queue_count
);
879 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
880 select_sdma_engine_id(q
);
882 mqd
= dqm
->ops
.get_mqd_manager(dqm
,
883 get_mqd_type_from_queue_type(q
->properties
.type
));
886 mutex_unlock(&dqm
->lock
);
890 dqm
->ops_asic_specific
.init_sdma_vm(dqm
, q
, qpd
);
891 retval
= mqd
->init_mqd(mqd
, &q
->mqd
, &q
->mqd_mem_obj
,
892 &q
->gart_mqd_addr
, &q
->properties
);
896 list_add(&q
->list
, &qpd
->queues_list
);
897 if (q
->properties
.is_active
) {
899 retval
= execute_queues_cpsch(dqm
, false);
902 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
903 dqm
->sdma_queue_count
++;
905 * Unconditionally increment this counter, regardless of the queue's
906 * type or whether the queue is active.
908 dqm
->total_queue_count
++;
910 pr_debug("Total of %d queues are accountable so far\n",
911 dqm
->total_queue_count
);
914 mutex_unlock(&dqm
->lock
);
918 int amdkfd_fence_wait_timeout(unsigned int *fence_addr
,
919 unsigned int fence_value
,
920 unsigned long timeout
)
925 while (*fence_addr
!= fence_value
) {
926 if (time_after(jiffies
, timeout
)) {
927 pr_err("kfd: qcm fence wait loop timeout expired\n");
936 static int destroy_sdma_queues(struct device_queue_manager
*dqm
,
937 unsigned int sdma_engine
)
939 return pm_send_unmap_queue(&dqm
->packets
, KFD_QUEUE_TYPE_SDMA
,
940 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES
, 0, false,
944 static int destroy_queues_cpsch(struct device_queue_manager
*dqm
,
945 bool preempt_static_queues
, bool lock
)
948 enum kfd_preempt_type_filter preempt_type
;
949 struct kfd_process_device
*pdd
;
956 mutex_lock(&dqm
->lock
);
957 if (dqm
->active_runlist
== false)
960 pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
961 dqm
->sdma_queue_count
);
963 if (dqm
->sdma_queue_count
> 0) {
964 destroy_sdma_queues(dqm
, 0);
965 destroy_sdma_queues(dqm
, 1);
968 preempt_type
= preempt_static_queues
?
969 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES
:
970 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES
;
972 retval
= pm_send_unmap_queue(&dqm
->packets
, KFD_QUEUE_TYPE_COMPUTE
,
973 preempt_type
, 0, false, 0);
977 *dqm
->fence_addr
= KFD_FENCE_INIT
;
978 pm_send_query_status(&dqm
->packets
, dqm
->fence_gpu_addr
,
979 KFD_FENCE_COMPLETED
);
980 /* should be timed out */
981 retval
= amdkfd_fence_wait_timeout(dqm
->fence_addr
, KFD_FENCE_COMPLETED
,
982 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS
);
984 pdd
= kfd_get_process_device_data(dqm
->dev
,
985 kfd_get_process(current
));
986 pdd
->reset_wavefronts
= true;
989 pm_release_ib(&dqm
->packets
);
990 dqm
->active_runlist
= false;
994 mutex_unlock(&dqm
->lock
);
998 static int execute_queues_cpsch(struct device_queue_manager
*dqm
, bool lock
)
1005 mutex_lock(&dqm
->lock
);
1007 retval
= destroy_queues_cpsch(dqm
, false, false);
1009 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
1013 if (dqm
->queue_count
<= 0 || dqm
->processes_count
<= 0) {
1018 if (dqm
->active_runlist
) {
1023 retval
= pm_send_runlist(&dqm
->packets
, &dqm
->queues
);
1025 pr_err("kfd: failed to execute runlist");
1028 dqm
->active_runlist
= true;
1032 mutex_unlock(&dqm
->lock
);
1036 static int destroy_queue_cpsch(struct device_queue_manager
*dqm
,
1037 struct qcm_process_device
*qpd
,
1041 struct mqd_manager
*mqd
;
1042 bool preempt_all_queues
;
1044 BUG_ON(!dqm
|| !qpd
|| !q
);
1046 preempt_all_queues
= false;
1050 /* remove queue from list to prevent rescheduling after preemption */
1051 mutex_lock(&dqm
->lock
);
1053 if (qpd
->is_debug
) {
1055 * error, currently we do not allow to destroy a queue
1056 * of a currently debugged process
1059 goto failed_try_destroy_debugged_queue
;
1063 mqd
= dqm
->ops
.get_mqd_manager(dqm
,
1064 get_mqd_type_from_queue_type(q
->properties
.type
));
1070 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
1071 dqm
->sdma_queue_count
--;
1074 if (q
->properties
.is_active
)
1077 execute_queues_cpsch(dqm
, false);
1079 mqd
->uninit_mqd(mqd
, q
->mqd
, q
->mqd_mem_obj
);
1082 * Unconditionally decrement this counter, regardless of the queue's
1085 dqm
->total_queue_count
--;
1086 pr_debug("Total of %d queues are accountable so far\n",
1087 dqm
->total_queue_count
);
1089 mutex_unlock(&dqm
->lock
);
1094 failed_try_destroy_debugged_queue
:
1096 mutex_unlock(&dqm
->lock
);
1101 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1102 * stay in user mode.
1104 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1105 /* APE1 limit is inclusive and 64K aligned. */
1106 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1108 static bool set_cache_memory_policy(struct device_queue_manager
*dqm
,
1109 struct qcm_process_device
*qpd
,
1110 enum cache_policy default_policy
,
1111 enum cache_policy alternate_policy
,
1112 void __user
*alternate_aperture_base
,
1113 uint64_t alternate_aperture_size
)
1117 pr_debug("kfd: In func %s\n", __func__
);
1119 mutex_lock(&dqm
->lock
);
1121 if (alternate_aperture_size
== 0) {
1122 /* base > limit disables APE1 */
1123 qpd
->sh_mem_ape1_base
= 1;
1124 qpd
->sh_mem_ape1_limit
= 0;
1127 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1128 * SH_MEM_APE1_BASE[31:0], 0x0000 }
1129 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1130 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1131 * Verify that the base and size parameters can be
1132 * represented in this format and convert them.
1133 * Additionally restrict APE1 to user-mode addresses.
1136 uint64_t base
= (uintptr_t)alternate_aperture_base
;
1137 uint64_t limit
= base
+ alternate_aperture_size
- 1;
1142 if ((base
& APE1_FIXED_BITS_MASK
) != 0)
1145 if ((limit
& APE1_FIXED_BITS_MASK
) != APE1_LIMIT_ALIGNMENT
)
1148 qpd
->sh_mem_ape1_base
= base
>> 16;
1149 qpd
->sh_mem_ape1_limit
= limit
>> 16;
1152 retval
= dqm
->ops_asic_specific
.set_cache_memory_policy(
1157 alternate_aperture_base
,
1158 alternate_aperture_size
);
1160 if ((sched_policy
== KFD_SCHED_POLICY_NO_HWS
) && (qpd
->vmid
!= 0))
1161 program_sh_mem_settings(dqm
, qpd
);
1163 pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1164 qpd
->sh_mem_config
, qpd
->sh_mem_ape1_base
,
1165 qpd
->sh_mem_ape1_limit
);
1167 mutex_unlock(&dqm
->lock
);
1171 mutex_unlock(&dqm
->lock
);
1175 struct device_queue_manager
*device_queue_manager_init(struct kfd_dev
*dev
)
1177 struct device_queue_manager
*dqm
;
1181 pr_debug("kfd: loading device queue manager\n");
1183 dqm
= kzalloc(sizeof(struct device_queue_manager
), GFP_KERNEL
);
1188 switch (sched_policy
) {
1189 case KFD_SCHED_POLICY_HWS
:
1190 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION
:
1191 /* initialize dqm for cp scheduling */
1192 dqm
->ops
.create_queue
= create_queue_cpsch
;
1193 dqm
->ops
.initialize
= initialize_cpsch
;
1194 dqm
->ops
.start
= start_cpsch
;
1195 dqm
->ops
.stop
= stop_cpsch
;
1196 dqm
->ops
.destroy_queue
= destroy_queue_cpsch
;
1197 dqm
->ops
.update_queue
= update_queue
;
1198 dqm
->ops
.get_mqd_manager
= get_mqd_manager_nocpsch
;
1199 dqm
->ops
.register_process
= register_process_nocpsch
;
1200 dqm
->ops
.unregister_process
= unregister_process_nocpsch
;
1201 dqm
->ops
.uninitialize
= uninitialize_nocpsch
;
1202 dqm
->ops
.create_kernel_queue
= create_kernel_queue_cpsch
;
1203 dqm
->ops
.destroy_kernel_queue
= destroy_kernel_queue_cpsch
;
1204 dqm
->ops
.set_cache_memory_policy
= set_cache_memory_policy
;
1206 case KFD_SCHED_POLICY_NO_HWS
:
1207 /* initialize dqm for no cp scheduling */
1208 dqm
->ops
.start
= start_nocpsch
;
1209 dqm
->ops
.stop
= stop_nocpsch
;
1210 dqm
->ops
.create_queue
= create_queue_nocpsch
;
1211 dqm
->ops
.destroy_queue
= destroy_queue_nocpsch
;
1212 dqm
->ops
.update_queue
= update_queue
;
1213 dqm
->ops
.get_mqd_manager
= get_mqd_manager_nocpsch
;
1214 dqm
->ops
.register_process
= register_process_nocpsch
;
1215 dqm
->ops
.unregister_process
= unregister_process_nocpsch
;
1216 dqm
->ops
.initialize
= initialize_nocpsch
;
1217 dqm
->ops
.uninitialize
= uninitialize_nocpsch
;
1218 dqm
->ops
.set_cache_memory_policy
= set_cache_memory_policy
;
1225 switch (dev
->device_info
->asic_family
) {
1227 device_queue_manager_init_vi(&dqm
->ops_asic_specific
);
1231 device_queue_manager_init_cik(&dqm
->ops_asic_specific
);
1235 if (dqm
->ops
.initialize(dqm
) != 0) {
1243 void device_queue_manager_uninit(struct device_queue_manager
*dqm
)
1247 dqm
->ops
.uninitialize(dqm
);