2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/ratelimit.h>
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/list.h>
28 #include <linux/types.h>
29 #include <linux/bitops.h>
30 #include <linux/sched.h>
32 #include "kfd_device_queue_manager.h"
33 #include "kfd_mqd_manager.h"
35 #include "kfd_kernel_queue.h"
37 /* Size of the per-pipe EOP queue */
38 #define CIK_HPD_EOP_BYTES_LOG2 11
39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
41 static int set_pasid_vmid_mapping(struct device_queue_manager
*dqm
,
42 unsigned int pasid
, unsigned int vmid
);
44 static int create_compute_queue_nocpsch(struct device_queue_manager
*dqm
,
46 struct qcm_process_device
*qpd
);
48 static int execute_queues_cpsch(struct device_queue_manager
*dqm
,
49 enum kfd_unmap_queues_filter filter
,
50 uint32_t filter_param
);
51 static int unmap_queues_cpsch(struct device_queue_manager
*dqm
,
52 enum kfd_unmap_queues_filter filter
,
53 uint32_t filter_param
);
55 static int map_queues_cpsch(struct device_queue_manager
*dqm
);
57 static int create_sdma_queue_nocpsch(struct device_queue_manager
*dqm
,
59 struct qcm_process_device
*qpd
);
61 static void deallocate_sdma_queue(struct device_queue_manager
*dqm
,
62 unsigned int sdma_queue_id
);
64 static void kfd_process_hw_exception(struct work_struct
*work
);
67 enum KFD_MQD_TYPE
get_mqd_type_from_queue_type(enum kfd_queue_type type
)
69 if (type
== KFD_QUEUE_TYPE_SDMA
)
70 return KFD_MQD_TYPE_SDMA
;
71 return KFD_MQD_TYPE_CP
;
74 static bool is_pipe_enabled(struct device_queue_manager
*dqm
, int mec
, int pipe
)
77 int pipe_offset
= mec
* dqm
->dev
->shared_resources
.num_pipe_per_mec
78 + pipe
* dqm
->dev
->shared_resources
.num_queue_per_pipe
;
80 /* queue is available for KFD usage if bit is 1 */
81 for (i
= 0; i
< dqm
->dev
->shared_resources
.num_queue_per_pipe
; ++i
)
82 if (test_bit(pipe_offset
+ i
,
83 dqm
->dev
->shared_resources
.queue_bitmap
))
88 unsigned int get_queues_num(struct device_queue_manager
*dqm
)
90 return bitmap_weight(dqm
->dev
->shared_resources
.queue_bitmap
,
94 unsigned int get_queues_per_pipe(struct device_queue_manager
*dqm
)
96 return dqm
->dev
->shared_resources
.num_queue_per_pipe
;
99 unsigned int get_pipes_per_mec(struct device_queue_manager
*dqm
)
101 return dqm
->dev
->shared_resources
.num_pipe_per_mec
;
104 static unsigned int get_num_sdma_engines(struct device_queue_manager
*dqm
)
106 return dqm
->dev
->device_info
->num_sdma_engines
;
109 unsigned int get_num_sdma_queues(struct device_queue_manager
*dqm
)
111 return dqm
->dev
->device_info
->num_sdma_engines
112 * KFD_SDMA_QUEUES_PER_ENGINE
;
115 void program_sh_mem_settings(struct device_queue_manager
*dqm
,
116 struct qcm_process_device
*qpd
)
118 return dqm
->dev
->kfd2kgd
->program_sh_mem_settings(
119 dqm
->dev
->kgd
, qpd
->vmid
,
121 qpd
->sh_mem_ape1_base
,
122 qpd
->sh_mem_ape1_limit
,
126 static int allocate_doorbell(struct qcm_process_device
*qpd
, struct queue
*q
)
128 struct kfd_dev
*dev
= qpd
->dqm
->dev
;
130 if (!KFD_IS_SOC15(dev
->device_info
->asic_family
)) {
131 /* On pre-SOC15 chips we need to use the queue ID to
132 * preserve the user mode ABI.
134 q
->doorbell_id
= q
->properties
.queue_id
;
135 } else if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
) {
136 /* For SDMA queues on SOC15, use static doorbell
137 * assignments based on the engine and queue.
139 q
->doorbell_id
= dev
->shared_resources
.sdma_doorbell
140 [q
->properties
.sdma_engine_id
]
141 [q
->properties
.sdma_queue_id
];
143 /* For CP queues on SOC15 reserve a free doorbell ID */
146 found
= find_first_zero_bit(qpd
->doorbell_bitmap
,
147 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
);
148 if (found
>= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
) {
149 pr_debug("No doorbells available");
152 set_bit(found
, qpd
->doorbell_bitmap
);
153 q
->doorbell_id
= found
;
156 q
->properties
.doorbell_off
=
157 kfd_doorbell_id_to_offset(dev
, q
->process
,
163 static void deallocate_doorbell(struct qcm_process_device
*qpd
,
167 struct kfd_dev
*dev
= qpd
->dqm
->dev
;
169 if (!KFD_IS_SOC15(dev
->device_info
->asic_family
) ||
170 q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
173 old
= test_and_clear_bit(q
->doorbell_id
, qpd
->doorbell_bitmap
);
177 static int allocate_vmid(struct device_queue_manager
*dqm
,
178 struct qcm_process_device
*qpd
,
181 int bit
, allocated_vmid
;
183 if (dqm
->vmid_bitmap
== 0)
186 bit
= ffs(dqm
->vmid_bitmap
) - 1;
187 dqm
->vmid_bitmap
&= ~(1 << bit
);
189 allocated_vmid
= bit
+ dqm
->dev
->vm_info
.first_vmid_kfd
;
190 pr_debug("vmid allocation %d\n", allocated_vmid
);
191 qpd
->vmid
= allocated_vmid
;
192 q
->properties
.vmid
= allocated_vmid
;
194 set_pasid_vmid_mapping(dqm
, q
->process
->pasid
, q
->properties
.vmid
);
195 program_sh_mem_settings(dqm
, qpd
);
197 /* qpd->page_table_base is set earlier when register_process()
198 * is called, i.e. when the first queue is created.
200 dqm
->dev
->kfd2kgd
->set_vm_context_page_table_base(dqm
->dev
->kgd
,
202 qpd
->page_table_base
);
203 /* invalidate the VM context after pasid and vmid mapping is set up */
204 kfd_flush_tlb(qpd_to_pdd(qpd
));
209 static int flush_texture_cache_nocpsch(struct kfd_dev
*kdev
,
210 struct qcm_process_device
*qpd
)
212 const struct packet_manager_funcs
*pmf
= qpd
->dqm
->packets
.pmf
;
218 ret
= pmf
->release_mem(qpd
->ib_base
, (uint32_t *)qpd
->ib_kaddr
);
222 return kdev
->kfd2kgd
->submit_ib(kdev
->kgd
, KGD_ENGINE_MEC1
, qpd
->vmid
,
223 qpd
->ib_base
, (uint32_t *)qpd
->ib_kaddr
,
224 pmf
->release_mem_size
/ sizeof(uint32_t));
227 static void deallocate_vmid(struct device_queue_manager
*dqm
,
228 struct qcm_process_device
*qpd
,
231 int bit
= qpd
->vmid
- dqm
->dev
->vm_info
.first_vmid_kfd
;
233 /* On GFX v7, CP doesn't flush TC at dequeue */
234 if (q
->device
->device_info
->asic_family
== CHIP_HAWAII
)
235 if (flush_texture_cache_nocpsch(q
->device
, qpd
))
236 pr_err("Failed to flush TC\n");
238 kfd_flush_tlb(qpd_to_pdd(qpd
));
240 /* Release the vmid mapping */
241 set_pasid_vmid_mapping(dqm
, 0, qpd
->vmid
);
243 dqm
->vmid_bitmap
|= (1 << bit
);
245 q
->properties
.vmid
= 0;
248 static int create_queue_nocpsch(struct device_queue_manager
*dqm
,
250 struct qcm_process_device
*qpd
)
258 if (dqm
->total_queue_count
>= max_num_of_queues_per_device
) {
259 pr_warn("Can't create new usermode queue because %d queues were already created\n",
260 dqm
->total_queue_count
);
265 if (list_empty(&qpd
->queues_list
)) {
266 retval
= allocate_vmid(dqm
, qpd
, q
);
270 q
->properties
.vmid
= qpd
->vmid
;
272 * Eviction state logic: we only mark active queues as evicted
273 * to avoid the overhead of restoring inactive queues later
276 q
->properties
.is_evicted
= (q
->properties
.queue_size
> 0 &&
277 q
->properties
.queue_percent
> 0 &&
278 q
->properties
.queue_address
!= 0);
280 q
->properties
.tba_addr
= qpd
->tba_addr
;
281 q
->properties
.tma_addr
= qpd
->tma_addr
;
283 if (q
->properties
.type
== KFD_QUEUE_TYPE_COMPUTE
)
284 retval
= create_compute_queue_nocpsch(dqm
, q
, qpd
);
285 else if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
286 retval
= create_sdma_queue_nocpsch(dqm
, q
, qpd
);
291 if (list_empty(&qpd
->queues_list
))
292 deallocate_vmid(dqm
, qpd
, q
);
296 list_add(&q
->list
, &qpd
->queues_list
);
298 if (q
->properties
.is_active
)
301 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
302 dqm
->sdma_queue_count
++;
305 * Unconditionally increment this counter, regardless of the queue's
306 * type or whether the queue is active.
308 dqm
->total_queue_count
++;
309 pr_debug("Total of %d queues are accountable so far\n",
310 dqm
->total_queue_count
);
317 static int allocate_hqd(struct device_queue_manager
*dqm
, struct queue
*q
)
324 for (pipe
= dqm
->next_pipe_to_allocate
, i
= 0;
325 i
< get_pipes_per_mec(dqm
);
326 pipe
= ((pipe
+ 1) % get_pipes_per_mec(dqm
)), ++i
) {
328 if (!is_pipe_enabled(dqm
, 0, pipe
))
331 if (dqm
->allocated_queues
[pipe
] != 0) {
332 bit
= ffs(dqm
->allocated_queues
[pipe
]) - 1;
333 dqm
->allocated_queues
[pipe
] &= ~(1 << bit
);
344 pr_debug("hqd slot - pipe %d, queue %d\n", q
->pipe
, q
->queue
);
345 /* horizontal hqd allocation */
346 dqm
->next_pipe_to_allocate
= (pipe
+ 1) % get_pipes_per_mec(dqm
);
351 static inline void deallocate_hqd(struct device_queue_manager
*dqm
,
354 dqm
->allocated_queues
[q
->pipe
] |= (1 << q
->queue
);
357 static int create_compute_queue_nocpsch(struct device_queue_manager
*dqm
,
359 struct qcm_process_device
*qpd
)
361 struct mqd_manager
*mqd_mgr
;
364 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
, KFD_MQD_TYPE_COMPUTE
);
368 retval
= allocate_hqd(dqm
, q
);
372 retval
= allocate_doorbell(qpd
, q
);
374 goto out_deallocate_hqd
;
376 retval
= mqd_mgr
->init_mqd(mqd_mgr
, &q
->mqd
, &q
->mqd_mem_obj
,
377 &q
->gart_mqd_addr
, &q
->properties
);
379 goto out_deallocate_doorbell
;
381 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
384 dqm
->dev
->kfd2kgd
->set_scratch_backing_va(
385 dqm
->dev
->kgd
, qpd
->sh_hidden_private_base
, qpd
->vmid
);
387 if (!q
->properties
.is_active
)
390 if (WARN(q
->process
->mm
!= current
->mm
,
391 "should only run in user thread"))
394 retval
= mqd_mgr
->load_mqd(mqd_mgr
, q
->mqd
, q
->pipe
, q
->queue
,
395 &q
->properties
, current
->mm
);
402 mqd_mgr
->uninit_mqd(mqd_mgr
, q
->mqd
, q
->mqd_mem_obj
);
403 out_deallocate_doorbell
:
404 deallocate_doorbell(qpd
, q
);
406 deallocate_hqd(dqm
, q
);
411 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
412 * to avoid asynchronized access
414 static int destroy_queue_nocpsch_locked(struct device_queue_manager
*dqm
,
415 struct qcm_process_device
*qpd
,
419 struct mqd_manager
*mqd_mgr
;
421 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
,
422 get_mqd_type_from_queue_type(q
->properties
.type
));
426 if (q
->properties
.type
== KFD_QUEUE_TYPE_COMPUTE
) {
427 deallocate_hqd(dqm
, q
);
428 } else if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
) {
429 dqm
->sdma_queue_count
--;
430 deallocate_sdma_queue(dqm
, q
->sdma_id
);
432 pr_debug("q->properties.type %d is invalid\n",
436 dqm
->total_queue_count
--;
438 deallocate_doorbell(qpd
, q
);
440 retval
= mqd_mgr
->destroy_mqd(mqd_mgr
, q
->mqd
,
441 KFD_PREEMPT_TYPE_WAVEFRONT_RESET
,
442 KFD_UNMAP_LATENCY_MS
,
444 if (retval
== -ETIME
)
445 qpd
->reset_wavefronts
= true;
447 mqd_mgr
->uninit_mqd(mqd_mgr
, q
->mqd
, q
->mqd_mem_obj
);
450 if (list_empty(&qpd
->queues_list
)) {
451 if (qpd
->reset_wavefronts
) {
452 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
454 /* dbgdev_wave_reset_wavefronts has to be called before
455 * deallocate_vmid(), i.e. when vmid is still in use.
457 dbgdev_wave_reset_wavefronts(dqm
->dev
,
459 qpd
->reset_wavefronts
= false;
462 deallocate_vmid(dqm
, qpd
, q
);
465 if (q
->properties
.is_active
)
471 static int destroy_queue_nocpsch(struct device_queue_manager
*dqm
,
472 struct qcm_process_device
*qpd
,
478 retval
= destroy_queue_nocpsch_locked(dqm
, qpd
, q
);
484 static int update_queue(struct device_queue_manager
*dqm
, struct queue
*q
)
487 struct mqd_manager
*mqd_mgr
;
488 struct kfd_process_device
*pdd
;
489 bool prev_active
= false;
492 pdd
= kfd_get_process_device_data(q
->device
, q
->process
);
497 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
,
498 get_mqd_type_from_queue_type(q
->properties
.type
));
504 * Eviction state logic: we only mark active queues as evicted
505 * to avoid the overhead of restoring inactive queues later
507 if (pdd
->qpd
.evicted
)
508 q
->properties
.is_evicted
= (q
->properties
.queue_size
> 0 &&
509 q
->properties
.queue_percent
> 0 &&
510 q
->properties
.queue_address
!= 0);
512 /* Save previous activity state for counters */
513 prev_active
= q
->properties
.is_active
;
515 /* Make sure the queue is unmapped before updating the MQD */
516 if (dqm
->sched_policy
!= KFD_SCHED_POLICY_NO_HWS
) {
517 retval
= unmap_queues_cpsch(dqm
,
518 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0);
520 pr_err("unmap queue failed\n");
523 } else if (prev_active
&&
524 (q
->properties
.type
== KFD_QUEUE_TYPE_COMPUTE
||
525 q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)) {
526 retval
= mqd_mgr
->destroy_mqd(mqd_mgr
, q
->mqd
,
527 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN
,
528 KFD_UNMAP_LATENCY_MS
, q
->pipe
, q
->queue
);
530 pr_err("destroy mqd failed\n");
535 retval
= mqd_mgr
->update_mqd(mqd_mgr
, q
->mqd
, &q
->properties
);
538 * check active state vs. the previous state and modify
539 * counter accordingly. map_queues_cpsch uses the
540 * dqm->queue_count to determine whether a new runlist must be
543 if (q
->properties
.is_active
&& !prev_active
)
545 else if (!q
->properties
.is_active
&& prev_active
)
548 if (dqm
->sched_policy
!= KFD_SCHED_POLICY_NO_HWS
)
549 retval
= map_queues_cpsch(dqm
);
550 else if (q
->properties
.is_active
&&
551 (q
->properties
.type
== KFD_QUEUE_TYPE_COMPUTE
||
552 q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)) {
553 if (WARN(q
->process
->mm
!= current
->mm
,
554 "should only run in user thread"))
557 retval
= mqd_mgr
->load_mqd(mqd_mgr
, q
->mqd
,
559 &q
->properties
, current
->mm
);
567 static struct mqd_manager
*get_mqd_manager(
568 struct device_queue_manager
*dqm
, enum KFD_MQD_TYPE type
)
570 struct mqd_manager
*mqd_mgr
;
572 if (WARN_ON(type
>= KFD_MQD_TYPE_MAX
))
575 pr_debug("mqd type %d\n", type
);
577 mqd_mgr
= dqm
->mqd_mgrs
[type
];
579 mqd_mgr
= mqd_manager_init(type
, dqm
->dev
);
581 pr_err("mqd manager is NULL");
582 dqm
->mqd_mgrs
[type
] = mqd_mgr
;
588 static int evict_process_queues_nocpsch(struct device_queue_manager
*dqm
,
589 struct qcm_process_device
*qpd
)
592 struct mqd_manager
*mqd_mgr
;
593 struct kfd_process_device
*pdd
;
597 if (qpd
->evicted
++ > 0) /* already evicted, do nothing */
600 pdd
= qpd_to_pdd(qpd
);
601 pr_info_ratelimited("Evicting PASID %u queues\n",
602 pdd
->process
->pasid
);
604 /* unactivate all active queues on the qpd */
605 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
606 if (!q
->properties
.is_active
)
608 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
,
609 get_mqd_type_from_queue_type(q
->properties
.type
));
610 if (!mqd_mgr
) { /* should not be here */
611 pr_err("Cannot evict queue, mqd mgr is NULL\n");
615 q
->properties
.is_evicted
= true;
616 q
->properties
.is_active
= false;
617 retval
= mqd_mgr
->destroy_mqd(mqd_mgr
, q
->mqd
,
618 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN
,
619 KFD_UNMAP_LATENCY_MS
, q
->pipe
, q
->queue
);
630 static int evict_process_queues_cpsch(struct device_queue_manager
*dqm
,
631 struct qcm_process_device
*qpd
)
634 struct kfd_process_device
*pdd
;
638 if (qpd
->evicted
++ > 0) /* already evicted, do nothing */
641 pdd
= qpd_to_pdd(qpd
);
642 pr_info_ratelimited("Evicting PASID %u queues\n",
643 pdd
->process
->pasid
);
645 /* unactivate all active queues on the qpd */
646 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
647 if (!q
->properties
.is_active
)
649 q
->properties
.is_evicted
= true;
650 q
->properties
.is_active
= false;
653 retval
= execute_queues_cpsch(dqm
,
655 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES
:
656 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0);
663 static int restore_process_queues_nocpsch(struct device_queue_manager
*dqm
,
664 struct qcm_process_device
*qpd
)
666 struct mm_struct
*mm
= NULL
;
668 struct mqd_manager
*mqd_mgr
;
669 struct kfd_process_device
*pdd
;
673 pdd
= qpd_to_pdd(qpd
);
674 /* Retrieve PD base */
675 pd_base
= dqm
->dev
->kfd2kgd
->get_process_page_dir(pdd
->vm
);
678 if (WARN_ON_ONCE(!qpd
->evicted
)) /* already restored, do nothing */
680 if (qpd
->evicted
> 1) { /* ref count still > 0, decrement & quit */
685 pr_info_ratelimited("Restoring PASID %u queues\n",
686 pdd
->process
->pasid
);
688 /* Update PD Base in QPD */
689 qpd
->page_table_base
= pd_base
;
690 pr_debug("Updated PD address to 0x%08x\n", pd_base
);
692 if (!list_empty(&qpd
->queues_list
)) {
693 dqm
->dev
->kfd2kgd
->set_vm_context_page_table_base(
696 qpd
->page_table_base
);
700 /* Take a safe reference to the mm_struct, which may otherwise
701 * disappear even while the kfd_process is still referenced.
703 mm
= get_task_mm(pdd
->process
->lead_thread
);
709 /* activate all active queues on the qpd */
710 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
711 if (!q
->properties
.is_evicted
)
713 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
,
714 get_mqd_type_from_queue_type(q
->properties
.type
));
715 if (!mqd_mgr
) { /* should not be here */
716 pr_err("Cannot restore queue, mqd mgr is NULL\n");
720 q
->properties
.is_evicted
= false;
721 q
->properties
.is_active
= true;
722 retval
= mqd_mgr
->load_mqd(mqd_mgr
, q
->mqd
, q
->pipe
,
723 q
->queue
, &q
->properties
, mm
);
736 static int restore_process_queues_cpsch(struct device_queue_manager
*dqm
,
737 struct qcm_process_device
*qpd
)
740 struct kfd_process_device
*pdd
;
744 pdd
= qpd_to_pdd(qpd
);
745 /* Retrieve PD base */
746 pd_base
= dqm
->dev
->kfd2kgd
->get_process_page_dir(pdd
->vm
);
749 if (WARN_ON_ONCE(!qpd
->evicted
)) /* already restored, do nothing */
751 if (qpd
->evicted
> 1) { /* ref count still > 0, decrement & quit */
756 pr_info_ratelimited("Restoring PASID %u queues\n",
757 pdd
->process
->pasid
);
759 /* Update PD Base in QPD */
760 qpd
->page_table_base
= pd_base
;
761 pr_debug("Updated PD address to 0x%08x\n", pd_base
);
763 /* activate all active queues on the qpd */
764 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
765 if (!q
->properties
.is_evicted
)
767 q
->properties
.is_evicted
= false;
768 q
->properties
.is_active
= true;
771 retval
= execute_queues_cpsch(dqm
,
772 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0);
780 static int register_process(struct device_queue_manager
*dqm
,
781 struct qcm_process_device
*qpd
)
783 struct device_process_node
*n
;
784 struct kfd_process_device
*pdd
;
788 n
= kzalloc(sizeof(*n
), GFP_KERNEL
);
794 pdd
= qpd_to_pdd(qpd
);
795 /* Retrieve PD base */
796 pd_base
= dqm
->dev
->kfd2kgd
->get_process_page_dir(pdd
->vm
);
799 list_add(&n
->list
, &dqm
->queues
);
801 /* Update PD Base in QPD */
802 qpd
->page_table_base
= pd_base
;
804 retval
= dqm
->asic_ops
.update_qpd(dqm
, qpd
);
806 if (dqm
->processes_count
++ == 0)
807 dqm
->dev
->kfd2kgd
->set_compute_idle(dqm
->dev
->kgd
, false);
814 static int unregister_process(struct device_queue_manager
*dqm
,
815 struct qcm_process_device
*qpd
)
818 struct device_process_node
*cur
, *next
;
820 pr_debug("qpd->queues_list is %s\n",
821 list_empty(&qpd
->queues_list
) ? "empty" : "not empty");
826 list_for_each_entry_safe(cur
, next
, &dqm
->queues
, list
) {
827 if (qpd
== cur
->qpd
) {
828 list_del(&cur
->list
);
830 if (--dqm
->processes_count
== 0)
831 dqm
->dev
->kfd2kgd
->set_compute_idle(
832 dqm
->dev
->kgd
, true);
836 /* qpd not found in dqm list */
844 set_pasid_vmid_mapping(struct device_queue_manager
*dqm
, unsigned int pasid
,
847 uint32_t pasid_mapping
;
849 pasid_mapping
= (pasid
== 0) ? 0 :
851 ATC_VMID_PASID_MAPPING_VALID
;
853 return dqm
->dev
->kfd2kgd
->set_pasid_vmid_mapping(
854 dqm
->dev
->kgd
, pasid_mapping
,
858 static void init_interrupts(struct device_queue_manager
*dqm
)
862 for (i
= 0 ; i
< get_pipes_per_mec(dqm
) ; i
++)
863 if (is_pipe_enabled(dqm
, 0, i
))
864 dqm
->dev
->kfd2kgd
->init_interrupts(dqm
->dev
->kgd
, i
);
867 static int initialize_nocpsch(struct device_queue_manager
*dqm
)
871 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm
));
873 dqm
->allocated_queues
= kcalloc(get_pipes_per_mec(dqm
),
874 sizeof(unsigned int), GFP_KERNEL
);
875 if (!dqm
->allocated_queues
)
878 mutex_init(&dqm
->lock_hidden
);
879 INIT_LIST_HEAD(&dqm
->queues
);
880 dqm
->queue_count
= dqm
->next_pipe_to_allocate
= 0;
881 dqm
->sdma_queue_count
= 0;
883 for (pipe
= 0; pipe
< get_pipes_per_mec(dqm
); pipe
++) {
884 int pipe_offset
= pipe
* get_queues_per_pipe(dqm
);
886 for (queue
= 0; queue
< get_queues_per_pipe(dqm
); queue
++)
887 if (test_bit(pipe_offset
+ queue
,
888 dqm
->dev
->shared_resources
.queue_bitmap
))
889 dqm
->allocated_queues
[pipe
] |= 1 << queue
;
892 dqm
->vmid_bitmap
= (1 << dqm
->dev
->vm_info
.vmid_num_kfd
) - 1;
893 dqm
->sdma_bitmap
= (1 << get_num_sdma_queues(dqm
)) - 1;
898 static void uninitialize(struct device_queue_manager
*dqm
)
902 WARN_ON(dqm
->queue_count
> 0 || dqm
->processes_count
> 0);
904 kfree(dqm
->allocated_queues
);
905 for (i
= 0 ; i
< KFD_MQD_TYPE_MAX
; i
++)
906 kfree(dqm
->mqd_mgrs
[i
]);
907 mutex_destroy(&dqm
->lock_hidden
);
908 kfd_gtt_sa_free(dqm
->dev
, dqm
->pipeline_mem
);
911 static int start_nocpsch(struct device_queue_manager
*dqm
)
913 init_interrupts(dqm
);
914 return pm_init(&dqm
->packets
, dqm
);
917 static int stop_nocpsch(struct device_queue_manager
*dqm
)
919 pm_uninit(&dqm
->packets
);
923 static int allocate_sdma_queue(struct device_queue_manager
*dqm
,
924 unsigned int *sdma_queue_id
)
928 if (dqm
->sdma_bitmap
== 0)
931 bit
= ffs(dqm
->sdma_bitmap
) - 1;
932 dqm
->sdma_bitmap
&= ~(1 << bit
);
933 *sdma_queue_id
= bit
;
938 static void deallocate_sdma_queue(struct device_queue_manager
*dqm
,
939 unsigned int sdma_queue_id
)
941 if (sdma_queue_id
>= get_num_sdma_queues(dqm
))
943 dqm
->sdma_bitmap
|= (1 << sdma_queue_id
);
946 static int create_sdma_queue_nocpsch(struct device_queue_manager
*dqm
,
948 struct qcm_process_device
*qpd
)
950 struct mqd_manager
*mqd_mgr
;
953 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
, KFD_MQD_TYPE_SDMA
);
957 retval
= allocate_sdma_queue(dqm
, &q
->sdma_id
);
961 q
->properties
.sdma_queue_id
= q
->sdma_id
/ get_num_sdma_engines(dqm
);
962 q
->properties
.sdma_engine_id
= q
->sdma_id
% get_num_sdma_engines(dqm
);
964 retval
= allocate_doorbell(qpd
, q
);
966 goto out_deallocate_sdma_queue
;
968 pr_debug("SDMA id is: %d\n", q
->sdma_id
);
969 pr_debug("SDMA queue id: %d\n", q
->properties
.sdma_queue_id
);
970 pr_debug("SDMA engine id: %d\n", q
->properties
.sdma_engine_id
);
972 dqm
->asic_ops
.init_sdma_vm(dqm
, q
, qpd
);
973 retval
= mqd_mgr
->init_mqd(mqd_mgr
, &q
->mqd
, &q
->mqd_mem_obj
,
974 &q
->gart_mqd_addr
, &q
->properties
);
976 goto out_deallocate_doorbell
;
978 retval
= mqd_mgr
->load_mqd(mqd_mgr
, q
->mqd
, 0, 0, &q
->properties
,
986 mqd_mgr
->uninit_mqd(mqd_mgr
, q
->mqd
, q
->mqd_mem_obj
);
987 out_deallocate_doorbell
:
988 deallocate_doorbell(qpd
, q
);
989 out_deallocate_sdma_queue
:
990 deallocate_sdma_queue(dqm
, q
->sdma_id
);
996 * Device Queue Manager implementation for cp scheduler
999 static int set_sched_resources(struct device_queue_manager
*dqm
)
1002 struct scheduling_resources res
;
1004 res
.vmid_mask
= dqm
->dev
->shared_resources
.compute_vmid_bitmap
;
1007 for (i
= 0; i
< KGD_MAX_QUEUES
; ++i
) {
1008 mec
= (i
/ dqm
->dev
->shared_resources
.num_queue_per_pipe
)
1009 / dqm
->dev
->shared_resources
.num_pipe_per_mec
;
1011 if (!test_bit(i
, dqm
->dev
->shared_resources
.queue_bitmap
))
1014 /* only acquire queues from the first MEC */
1018 /* This situation may be hit in the future if a new HW
1019 * generation exposes more than 64 queues. If so, the
1020 * definition of res.queue_mask needs updating
1022 if (WARN_ON(i
>= (sizeof(res
.queue_mask
)*8))) {
1023 pr_err("Invalid queue enabled by amdgpu: %d\n", i
);
1027 res
.queue_mask
|= (1ull << i
);
1029 res
.gws_mask
= res
.oac_mask
= res
.gds_heap_base
=
1030 res
.gds_heap_size
= 0;
1032 pr_debug("Scheduling resources:\n"
1033 "vmid mask: 0x%8X\n"
1034 "queue mask: 0x%8llX\n",
1035 res
.vmid_mask
, res
.queue_mask
);
1037 return pm_send_set_resources(&dqm
->packets
, &res
);
1040 static int initialize_cpsch(struct device_queue_manager
*dqm
)
1042 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm
));
1044 mutex_init(&dqm
->lock_hidden
);
1045 INIT_LIST_HEAD(&dqm
->queues
);
1046 dqm
->queue_count
= dqm
->processes_count
= 0;
1047 dqm
->sdma_queue_count
= 0;
1048 dqm
->active_runlist
= false;
1049 dqm
->sdma_bitmap
= (1 << get_num_sdma_queues(dqm
)) - 1;
1051 INIT_WORK(&dqm
->hw_exception_work
, kfd_process_hw_exception
);
1056 static int start_cpsch(struct device_queue_manager
*dqm
)
1062 retval
= pm_init(&dqm
->packets
, dqm
);
1064 goto fail_packet_manager_init
;
1066 retval
= set_sched_resources(dqm
);
1068 goto fail_set_sched_resources
;
1070 pr_debug("Allocating fence memory\n");
1072 /* allocate fence memory on the gart */
1073 retval
= kfd_gtt_sa_allocate(dqm
->dev
, sizeof(*dqm
->fence_addr
),
1077 goto fail_allocate_vidmem
;
1079 dqm
->fence_addr
= dqm
->fence_mem
->cpu_ptr
;
1080 dqm
->fence_gpu_addr
= dqm
->fence_mem
->gpu_addr
;
1082 init_interrupts(dqm
);
1085 /* clear hang status when driver try to start the hw scheduler */
1086 dqm
->is_hws_hang
= false;
1087 execute_queues_cpsch(dqm
, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0);
1091 fail_allocate_vidmem
:
1092 fail_set_sched_resources
:
1093 pm_uninit(&dqm
->packets
);
1094 fail_packet_manager_init
:
1098 static int stop_cpsch(struct device_queue_manager
*dqm
)
1101 unmap_queues_cpsch(dqm
, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES
, 0);
1104 kfd_gtt_sa_free(dqm
->dev
, dqm
->fence_mem
);
1105 pm_uninit(&dqm
->packets
);
1110 static int create_kernel_queue_cpsch(struct device_queue_manager
*dqm
,
1111 struct kernel_queue
*kq
,
1112 struct qcm_process_device
*qpd
)
1115 if (dqm
->total_queue_count
>= max_num_of_queues_per_device
) {
1116 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1117 dqm
->total_queue_count
);
1123 * Unconditionally increment this counter, regardless of the queue's
1124 * type or whether the queue is active.
1126 dqm
->total_queue_count
++;
1127 pr_debug("Total of %d queues are accountable so far\n",
1128 dqm
->total_queue_count
);
1130 list_add(&kq
->list
, &qpd
->priv_queue_list
);
1132 qpd
->is_debug
= true;
1133 execute_queues_cpsch(dqm
, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0);
1139 static void destroy_kernel_queue_cpsch(struct device_queue_manager
*dqm
,
1140 struct kernel_queue
*kq
,
1141 struct qcm_process_device
*qpd
)
1144 list_del(&kq
->list
);
1146 qpd
->is_debug
= false;
1147 execute_queues_cpsch(dqm
, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES
, 0);
1149 * Unconditionally decrement this counter, regardless of the queue's
1152 dqm
->total_queue_count
--;
1153 pr_debug("Total of %d queues are accountable so far\n",
1154 dqm
->total_queue_count
);
1158 static int create_queue_cpsch(struct device_queue_manager
*dqm
, struct queue
*q
,
1159 struct qcm_process_device
*qpd
)
1162 struct mqd_manager
*mqd_mgr
;
1168 if (dqm
->total_queue_count
>= max_num_of_queues_per_device
) {
1169 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1170 dqm
->total_queue_count
);
1175 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
) {
1176 retval
= allocate_sdma_queue(dqm
, &q
->sdma_id
);
1179 q
->properties
.sdma_queue_id
=
1180 q
->sdma_id
/ get_num_sdma_engines(dqm
);
1181 q
->properties
.sdma_engine_id
=
1182 q
->sdma_id
% get_num_sdma_engines(dqm
);
1185 retval
= allocate_doorbell(qpd
, q
);
1187 goto out_deallocate_sdma_queue
;
1189 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
,
1190 get_mqd_type_from_queue_type(q
->properties
.type
));
1194 goto out_deallocate_doorbell
;
1197 * Eviction state logic: we only mark active queues as evicted
1198 * to avoid the overhead of restoring inactive queues later
1201 q
->properties
.is_evicted
= (q
->properties
.queue_size
> 0 &&
1202 q
->properties
.queue_percent
> 0 &&
1203 q
->properties
.queue_address
!= 0);
1205 dqm
->asic_ops
.init_sdma_vm(dqm
, q
, qpd
);
1207 q
->properties
.tba_addr
= qpd
->tba_addr
;
1208 q
->properties
.tma_addr
= qpd
->tma_addr
;
1209 retval
= mqd_mgr
->init_mqd(mqd_mgr
, &q
->mqd
, &q
->mqd_mem_obj
,
1210 &q
->gart_mqd_addr
, &q
->properties
);
1212 goto out_deallocate_doorbell
;
1214 list_add(&q
->list
, &qpd
->queues_list
);
1216 if (q
->properties
.is_active
) {
1218 retval
= execute_queues_cpsch(dqm
,
1219 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0);
1222 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
1223 dqm
->sdma_queue_count
++;
1225 * Unconditionally increment this counter, regardless of the queue's
1226 * type or whether the queue is active.
1228 dqm
->total_queue_count
++;
1230 pr_debug("Total of %d queues are accountable so far\n",
1231 dqm
->total_queue_count
);
1236 out_deallocate_doorbell
:
1237 deallocate_doorbell(qpd
, q
);
1238 out_deallocate_sdma_queue
:
1239 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
)
1240 deallocate_sdma_queue(dqm
, q
->sdma_id
);
1247 int amdkfd_fence_wait_timeout(unsigned int *fence_addr
,
1248 unsigned int fence_value
,
1249 unsigned int timeout_ms
)
1251 unsigned long end_jiffies
= msecs_to_jiffies(timeout_ms
) + jiffies
;
1253 while (*fence_addr
!= fence_value
) {
1254 if (time_after(jiffies
, end_jiffies
)) {
1255 pr_err("qcm fence wait loop timeout expired\n");
1256 /* In HWS case, this is used to halt the driver thread
1257 * in order not to mess up CP states before doing
1258 * scandumps for FW debugging.
1260 while (halt_if_hws_hang
)
1271 static int unmap_sdma_queues(struct device_queue_manager
*dqm
,
1272 unsigned int sdma_engine
)
1274 return pm_send_unmap_queue(&dqm
->packets
, KFD_QUEUE_TYPE_SDMA
,
1275 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0, false,
1279 /* dqm->lock mutex has to be locked before calling this function */
1280 static int map_queues_cpsch(struct device_queue_manager
*dqm
)
1284 if (dqm
->queue_count
<= 0 || dqm
->processes_count
<= 0)
1287 if (dqm
->active_runlist
)
1290 retval
= pm_send_runlist(&dqm
->packets
, &dqm
->queues
);
1292 pr_err("failed to execute runlist\n");
1295 dqm
->active_runlist
= true;
1300 /* dqm->lock mutex has to be locked before calling this function */
1301 static int unmap_queues_cpsch(struct device_queue_manager
*dqm
,
1302 enum kfd_unmap_queues_filter filter
,
1303 uint32_t filter_param
)
1307 if (dqm
->is_hws_hang
)
1309 if (!dqm
->active_runlist
)
1312 pr_debug("Before destroying queues, sdma queue count is : %u\n",
1313 dqm
->sdma_queue_count
);
1315 if (dqm
->sdma_queue_count
> 0) {
1316 unmap_sdma_queues(dqm
, 0);
1317 unmap_sdma_queues(dqm
, 1);
1320 retval
= pm_send_unmap_queue(&dqm
->packets
, KFD_QUEUE_TYPE_COMPUTE
,
1321 filter
, filter_param
, false, 0);
1325 *dqm
->fence_addr
= KFD_FENCE_INIT
;
1326 pm_send_query_status(&dqm
->packets
, dqm
->fence_gpu_addr
,
1327 KFD_FENCE_COMPLETED
);
1328 /* should be timed out */
1329 retval
= amdkfd_fence_wait_timeout(dqm
->fence_addr
, KFD_FENCE_COMPLETED
,
1330 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS
);
1334 pm_release_ib(&dqm
->packets
);
1335 dqm
->active_runlist
= false;
1340 /* dqm->lock mutex has to be locked before calling this function */
1341 static int execute_queues_cpsch(struct device_queue_manager
*dqm
,
1342 enum kfd_unmap_queues_filter filter
,
1343 uint32_t filter_param
)
1347 if (dqm
->is_hws_hang
)
1349 retval
= unmap_queues_cpsch(dqm
, filter
, filter_param
);
1351 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1352 dqm
->is_hws_hang
= true;
1353 schedule_work(&dqm
->hw_exception_work
);
1357 return map_queues_cpsch(dqm
);
1360 static int destroy_queue_cpsch(struct device_queue_manager
*dqm
,
1361 struct qcm_process_device
*qpd
,
1365 struct mqd_manager
*mqd_mgr
;
1366 bool preempt_all_queues
;
1368 preempt_all_queues
= false;
1372 /* remove queue from list to prevent rescheduling after preemption */
1375 if (qpd
->is_debug
) {
1377 * error, currently we do not allow to destroy a queue
1378 * of a currently debugged process
1381 goto failed_try_destroy_debugged_queue
;
1385 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
,
1386 get_mqd_type_from_queue_type(q
->properties
.type
));
1392 deallocate_doorbell(qpd
, q
);
1394 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
) {
1395 dqm
->sdma_queue_count
--;
1396 deallocate_sdma_queue(dqm
, q
->sdma_id
);
1401 if (q
->properties
.is_active
) {
1403 retval
= execute_queues_cpsch(dqm
,
1404 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
, 0);
1405 if (retval
== -ETIME
)
1406 qpd
->reset_wavefronts
= true;
1409 mqd_mgr
->uninit_mqd(mqd_mgr
, q
->mqd
, q
->mqd_mem_obj
);
1412 * Unconditionally decrement this counter, regardless of the queue's
1415 dqm
->total_queue_count
--;
1416 pr_debug("Total of %d queues are accountable so far\n",
1417 dqm
->total_queue_count
);
1424 failed_try_destroy_debugged_queue
:
1431 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1432 * stay in user mode.
1434 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1435 /* APE1 limit is inclusive and 64K aligned. */
1436 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1438 static bool set_cache_memory_policy(struct device_queue_manager
*dqm
,
1439 struct qcm_process_device
*qpd
,
1440 enum cache_policy default_policy
,
1441 enum cache_policy alternate_policy
,
1442 void __user
*alternate_aperture_base
,
1443 uint64_t alternate_aperture_size
)
1447 if (!dqm
->asic_ops
.set_cache_memory_policy
)
1452 if (alternate_aperture_size
== 0) {
1453 /* base > limit disables APE1 */
1454 qpd
->sh_mem_ape1_base
= 1;
1455 qpd
->sh_mem_ape1_limit
= 0;
1458 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1459 * SH_MEM_APE1_BASE[31:0], 0x0000 }
1460 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1461 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1462 * Verify that the base and size parameters can be
1463 * represented in this format and convert them.
1464 * Additionally restrict APE1 to user-mode addresses.
1467 uint64_t base
= (uintptr_t)alternate_aperture_base
;
1468 uint64_t limit
= base
+ alternate_aperture_size
- 1;
1470 if (limit
<= base
|| (base
& APE1_FIXED_BITS_MASK
) != 0 ||
1471 (limit
& APE1_FIXED_BITS_MASK
) != APE1_LIMIT_ALIGNMENT
) {
1476 qpd
->sh_mem_ape1_base
= base
>> 16;
1477 qpd
->sh_mem_ape1_limit
= limit
>> 16;
1480 retval
= dqm
->asic_ops
.set_cache_memory_policy(
1485 alternate_aperture_base
,
1486 alternate_aperture_size
);
1488 if ((dqm
->sched_policy
== KFD_SCHED_POLICY_NO_HWS
) && (qpd
->vmid
!= 0))
1489 program_sh_mem_settings(dqm
, qpd
);
1491 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1492 qpd
->sh_mem_config
, qpd
->sh_mem_ape1_base
,
1493 qpd
->sh_mem_ape1_limit
);
1500 static int set_trap_handler(struct device_queue_manager
*dqm
,
1501 struct qcm_process_device
*qpd
,
1507 if (dqm
->dev
->cwsr_enabled
) {
1508 /* Jump from CWSR trap handler to user trap */
1509 tma
= (uint64_t *)(qpd
->cwsr_kaddr
+ KFD_CWSR_TMA_OFFSET
);
1513 qpd
->tba_addr
= tba_addr
;
1514 qpd
->tma_addr
= tma_addr
;
1520 static int process_termination_nocpsch(struct device_queue_manager
*dqm
,
1521 struct qcm_process_device
*qpd
)
1523 struct queue
*q
, *next
;
1524 struct device_process_node
*cur
, *next_dpn
;
1529 /* Clear all user mode queues */
1530 list_for_each_entry_safe(q
, next
, &qpd
->queues_list
, list
) {
1533 ret
= destroy_queue_nocpsch_locked(dqm
, qpd
, q
);
1538 /* Unregister process */
1539 list_for_each_entry_safe(cur
, next_dpn
, &dqm
->queues
, list
) {
1540 if (qpd
== cur
->qpd
) {
1541 list_del(&cur
->list
);
1543 dqm
->processes_count
--;
1553 static int process_termination_cpsch(struct device_queue_manager
*dqm
,
1554 struct qcm_process_device
*qpd
)
1557 struct queue
*q
, *next
;
1558 struct kernel_queue
*kq
, *kq_next
;
1559 struct mqd_manager
*mqd_mgr
;
1560 struct device_process_node
*cur
, *next_dpn
;
1561 enum kfd_unmap_queues_filter filter
=
1562 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES
;
1568 /* Clean all kernel queues */
1569 list_for_each_entry_safe(kq
, kq_next
, &qpd
->priv_queue_list
, list
) {
1570 list_del(&kq
->list
);
1572 qpd
->is_debug
= false;
1573 dqm
->total_queue_count
--;
1574 filter
= KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES
;
1577 /* Clear all user mode queues */
1578 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
1579 if (q
->properties
.type
== KFD_QUEUE_TYPE_SDMA
) {
1580 dqm
->sdma_queue_count
--;
1581 deallocate_sdma_queue(dqm
, q
->sdma_id
);
1584 if (q
->properties
.is_active
)
1587 dqm
->total_queue_count
--;
1590 /* Unregister process */
1591 list_for_each_entry_safe(cur
, next_dpn
, &dqm
->queues
, list
) {
1592 if (qpd
== cur
->qpd
) {
1593 list_del(&cur
->list
);
1595 dqm
->processes_count
--;
1600 retval
= execute_queues_cpsch(dqm
, filter
, 0);
1601 if ((!dqm
->is_hws_hang
) && (retval
|| qpd
->reset_wavefronts
)) {
1602 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm
->dev
);
1603 dbgdev_wave_reset_wavefronts(dqm
->dev
, qpd
->pqm
->process
);
1604 qpd
->reset_wavefronts
= false;
1607 /* lastly, free mqd resources */
1608 list_for_each_entry_safe(q
, next
, &qpd
->queues_list
, list
) {
1609 mqd_mgr
= dqm
->ops
.get_mqd_manager(dqm
,
1610 get_mqd_type_from_queue_type(q
->properties
.type
));
1617 mqd_mgr
->uninit_mqd(mqd_mgr
, q
->mqd
, q
->mqd_mem_obj
);
1625 struct device_queue_manager
*device_queue_manager_init(struct kfd_dev
*dev
)
1627 struct device_queue_manager
*dqm
;
1629 pr_debug("Loading device queue manager\n");
1631 dqm
= kzalloc(sizeof(*dqm
), GFP_KERNEL
);
1635 switch (dev
->device_info
->asic_family
) {
1636 /* HWS is not available on Hawaii. */
1638 /* HWS depends on CWSR for timely dequeue. CWSR is not
1639 * available on Tonga.
1641 * FIXME: This argument also applies to Kaveri.
1644 dqm
->sched_policy
= KFD_SCHED_POLICY_NO_HWS
;
1647 dqm
->sched_policy
= sched_policy
;
1652 switch (dqm
->sched_policy
) {
1653 case KFD_SCHED_POLICY_HWS
:
1654 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION
:
1655 /* initialize dqm for cp scheduling */
1656 dqm
->ops
.create_queue
= create_queue_cpsch
;
1657 dqm
->ops
.initialize
= initialize_cpsch
;
1658 dqm
->ops
.start
= start_cpsch
;
1659 dqm
->ops
.stop
= stop_cpsch
;
1660 dqm
->ops
.destroy_queue
= destroy_queue_cpsch
;
1661 dqm
->ops
.update_queue
= update_queue
;
1662 dqm
->ops
.get_mqd_manager
= get_mqd_manager
;
1663 dqm
->ops
.register_process
= register_process
;
1664 dqm
->ops
.unregister_process
= unregister_process
;
1665 dqm
->ops
.uninitialize
= uninitialize
;
1666 dqm
->ops
.create_kernel_queue
= create_kernel_queue_cpsch
;
1667 dqm
->ops
.destroy_kernel_queue
= destroy_kernel_queue_cpsch
;
1668 dqm
->ops
.set_cache_memory_policy
= set_cache_memory_policy
;
1669 dqm
->ops
.set_trap_handler
= set_trap_handler
;
1670 dqm
->ops
.process_termination
= process_termination_cpsch
;
1671 dqm
->ops
.evict_process_queues
= evict_process_queues_cpsch
;
1672 dqm
->ops
.restore_process_queues
= restore_process_queues_cpsch
;
1674 case KFD_SCHED_POLICY_NO_HWS
:
1675 /* initialize dqm for no cp scheduling */
1676 dqm
->ops
.start
= start_nocpsch
;
1677 dqm
->ops
.stop
= stop_nocpsch
;
1678 dqm
->ops
.create_queue
= create_queue_nocpsch
;
1679 dqm
->ops
.destroy_queue
= destroy_queue_nocpsch
;
1680 dqm
->ops
.update_queue
= update_queue
;
1681 dqm
->ops
.get_mqd_manager
= get_mqd_manager
;
1682 dqm
->ops
.register_process
= register_process
;
1683 dqm
->ops
.unregister_process
= unregister_process
;
1684 dqm
->ops
.initialize
= initialize_nocpsch
;
1685 dqm
->ops
.uninitialize
= uninitialize
;
1686 dqm
->ops
.set_cache_memory_policy
= set_cache_memory_policy
;
1687 dqm
->ops
.set_trap_handler
= set_trap_handler
;
1688 dqm
->ops
.process_termination
= process_termination_nocpsch
;
1689 dqm
->ops
.evict_process_queues
= evict_process_queues_nocpsch
;
1690 dqm
->ops
.restore_process_queues
=
1691 restore_process_queues_nocpsch
;
1694 pr_err("Invalid scheduling policy %d\n", dqm
->sched_policy
);
1698 switch (dev
->device_info
->asic_family
) {
1700 device_queue_manager_init_vi(&dqm
->asic_ops
);
1704 device_queue_manager_init_cik(&dqm
->asic_ops
);
1708 device_queue_manager_init_cik_hawaii(&dqm
->asic_ops
);
1713 case CHIP_POLARIS10
:
1714 case CHIP_POLARIS11
:
1715 device_queue_manager_init_vi_tonga(&dqm
->asic_ops
);
1720 device_queue_manager_init_v9(&dqm
->asic_ops
);
1723 WARN(1, "Unexpected ASIC family %u",
1724 dev
->device_info
->asic_family
);
1728 if (!dqm
->ops
.initialize(dqm
))
1736 void device_queue_manager_uninit(struct device_queue_manager
*dqm
)
1738 dqm
->ops
.uninitialize(dqm
);
1742 int kfd_process_vm_fault(struct device_queue_manager
*dqm
,
1745 struct kfd_process_device
*pdd
;
1746 struct kfd_process
*p
= kfd_lookup_process_by_pasid(pasid
);
1751 pdd
= kfd_get_process_device_data(dqm
->dev
, p
);
1753 ret
= dqm
->ops
.evict_process_queues(dqm
, &pdd
->qpd
);
1754 kfd_unref_process(p
);
1759 static void kfd_process_hw_exception(struct work_struct
*work
)
1761 struct device_queue_manager
*dqm
= container_of(work
,
1762 struct device_queue_manager
, hw_exception_work
);
1763 dqm
->dev
->kfd2kgd
->gpu_recover(dqm
->dev
->kgd
);
1766 #if defined(CONFIG_DEBUG_FS)
1768 static void seq_reg_dump(struct seq_file
*m
,
1769 uint32_t (*dump
)[2], uint32_t n_regs
)
1773 for (i
= 0, count
= 0; i
< n_regs
; i
++) {
1775 dump
[i
-1][0] + sizeof(uint32_t) != dump
[i
][0]) {
1776 seq_printf(m
, "%s %08x: %08x",
1778 dump
[i
][0], dump
[i
][1]);
1781 seq_printf(m
, " %08x", dump
[i
][1]);
1789 int dqm_debugfs_hqds(struct seq_file
*m
, void *data
)
1791 struct device_queue_manager
*dqm
= data
;
1792 uint32_t (*dump
)[2], n_regs
;
1796 r
= dqm
->dev
->kfd2kgd
->hqd_dump(dqm
->dev
->kgd
,
1797 KFD_CIK_HIQ_PIPE
, KFD_CIK_HIQ_QUEUE
, &dump
, &n_regs
);
1799 seq_printf(m
, " HIQ on MEC %d Pipe %d Queue %d\n",
1800 KFD_CIK_HIQ_PIPE
/get_pipes_per_mec(dqm
)+1,
1801 KFD_CIK_HIQ_PIPE
%get_pipes_per_mec(dqm
),
1803 seq_reg_dump(m
, dump
, n_regs
);
1808 for (pipe
= 0; pipe
< get_pipes_per_mec(dqm
); pipe
++) {
1809 int pipe_offset
= pipe
* get_queues_per_pipe(dqm
);
1811 for (queue
= 0; queue
< get_queues_per_pipe(dqm
); queue
++) {
1812 if (!test_bit(pipe_offset
+ queue
,
1813 dqm
->dev
->shared_resources
.queue_bitmap
))
1816 r
= dqm
->dev
->kfd2kgd
->hqd_dump(
1817 dqm
->dev
->kgd
, pipe
, queue
, &dump
, &n_regs
);
1821 seq_printf(m
, " CP Pipe %d, Queue %d\n",
1823 seq_reg_dump(m
, dump
, n_regs
);
1829 for (pipe
= 0; pipe
< get_num_sdma_engines(dqm
); pipe
++) {
1830 for (queue
= 0; queue
< KFD_SDMA_QUEUES_PER_ENGINE
; queue
++) {
1831 r
= dqm
->dev
->kfd2kgd
->hqd_sdma_dump(
1832 dqm
->dev
->kgd
, pipe
, queue
, &dump
, &n_regs
);
1836 seq_printf(m
, " SDMA Engine %d, RLC %d\n",
1838 seq_reg_dump(m
, dump
, n_regs
);
1847 int dqm_debugfs_execute_queues(struct device_queue_manager
*dqm
)
1852 dqm
->active_runlist
= true;
1853 r
= execute_queues_cpsch(dqm
, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES
, 0);