2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34 #include "amdgpu_amdkfd.h"
40 #include "kfd_device_queue_manager.h"
41 #include "kfd_dbgmgr.h"
42 #include "kfd_iommu.h"
45 * List of struct kfd_process (field kfd_process).
46 * Unique/indexed by mm_struct*
48 DEFINE_HASHTABLE(kfd_processes_table
, KFD_PROCESS_TABLE_SIZE
);
49 static DEFINE_MUTEX(kfd_processes_mutex
);
51 DEFINE_SRCU(kfd_processes_srcu
);
53 /* For process termination handling */
54 static struct workqueue_struct
*kfd_process_wq
;
56 /* Ordered, single-threaded workqueue for restoring evicted
57 * processes. Restoring multiple processes concurrently under memory
58 * pressure can lead to processes blocking each other from validating
59 * their BOs and result in a live-lock situation where processes
60 * remain evicted indefinitely.
62 static struct workqueue_struct
*kfd_restore_wq
;
64 static struct kfd_process
*find_process(const struct task_struct
*thread
);
65 static void kfd_process_ref_release(struct kref
*ref
);
66 static struct kfd_process
*create_process(const struct task_struct
*thread
);
67 static int kfd_process_init_cwsr_apu(struct kfd_process
*p
, struct file
*filep
);
69 static void evict_process_worker(struct work_struct
*work
);
70 static void restore_process_worker(struct work_struct
*work
);
72 struct kfd_procfs_tree
{
76 static struct kfd_procfs_tree procfs
;
78 static ssize_t
kfd_procfs_show(struct kobject
*kobj
, struct attribute
*attr
,
83 if (strcmp(attr
->name
, "pasid") == 0) {
84 struct kfd_process
*p
= container_of(attr
, struct kfd_process
,
88 pr_err("Invalid attribute");
92 return snprintf(buffer
, PAGE_SIZE
, "%d\n", val
);
95 static void kfd_procfs_kobj_release(struct kobject
*kobj
)
100 static const struct sysfs_ops kfd_procfs_ops
= {
101 .show
= kfd_procfs_show
,
104 static struct kobj_type procfs_type
= {
105 .release
= kfd_procfs_kobj_release
,
106 .sysfs_ops
= &kfd_procfs_ops
,
109 void kfd_procfs_init(void)
113 procfs
.kobj
= kfd_alloc_struct(procfs
.kobj
);
117 ret
= kobject_init_and_add(procfs
.kobj
, &procfs_type
,
118 &kfd_device
->kobj
, "proc");
120 pr_warn("Could not create procfs proc folder");
121 /* If we fail to create the procfs, clean up */
122 kfd_procfs_shutdown();
126 void kfd_procfs_shutdown(void)
129 kobject_del(procfs
.kobj
);
130 kobject_put(procfs
.kobj
);
135 int kfd_process_create_wq(void)
138 kfd_process_wq
= alloc_workqueue("kfd_process_wq", 0, 0);
140 kfd_restore_wq
= alloc_ordered_workqueue("kfd_restore_wq", 0);
142 if (!kfd_process_wq
|| !kfd_restore_wq
) {
143 kfd_process_destroy_wq();
150 void kfd_process_destroy_wq(void)
152 if (kfd_process_wq
) {
153 destroy_workqueue(kfd_process_wq
);
154 kfd_process_wq
= NULL
;
156 if (kfd_restore_wq
) {
157 destroy_workqueue(kfd_restore_wq
);
158 kfd_restore_wq
= NULL
;
162 static void kfd_process_free_gpuvm(struct kgd_mem
*mem
,
163 struct kfd_process_device
*pdd
)
165 struct kfd_dev
*dev
= pdd
->dev
;
167 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev
->kgd
, mem
, pdd
->vm
);
168 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev
->kgd
, mem
);
171 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
172 * This function should be only called right after the process
173 * is created and when kfd_processes_mutex is still being held
174 * to avoid concurrency. Because of that exclusiveness, we do
175 * not need to take p->mutex.
177 static int kfd_process_alloc_gpuvm(struct kfd_process_device
*pdd
,
178 uint64_t gpu_va
, uint32_t size
,
179 uint32_t flags
, void **kptr
)
181 struct kfd_dev
*kdev
= pdd
->dev
;
182 struct kgd_mem
*mem
= NULL
;
186 err
= amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev
->kgd
, gpu_va
, size
,
187 pdd
->vm
, &mem
, NULL
, flags
);
191 err
= amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev
->kgd
, mem
, pdd
->vm
);
195 err
= amdgpu_amdkfd_gpuvm_sync_memory(kdev
->kgd
, mem
, true);
197 pr_debug("Sync memory failed, wait interrupted by user signal\n");
198 goto sync_memory_failed
;
201 /* Create an obj handle so kfd_process_device_remove_obj_handle
202 * will take care of the bo removal when the process finishes.
203 * We do not need to take p->mutex, because the process is just
204 * created and the ioctls have not had the chance to run.
206 handle
= kfd_process_device_create_obj_handle(pdd
, mem
);
214 err
= amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev
->kgd
,
215 (struct kgd_mem
*)mem
, kptr
, NULL
);
217 pr_debug("Map GTT BO to kernel failed\n");
218 goto free_obj_handle
;
225 kfd_process_device_remove_obj_handle(pdd
, handle
);
228 kfd_process_free_gpuvm(mem
, pdd
);
232 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev
->kgd
, mem
);
238 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
239 * process for IB usage The memory reserved is for KFD to submit
240 * IB to AMDGPU from kernel. If the memory is reserved
241 * successfully, ib_kaddr will have the CPU/kernel
242 * address. Check ib_kaddr before accessing the memory.
244 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device
*pdd
)
246 struct qcm_process_device
*qpd
= &pdd
->qpd
;
247 uint32_t flags
= ALLOC_MEM_FLAGS_GTT
|
248 ALLOC_MEM_FLAGS_NO_SUBSTITUTE
|
249 ALLOC_MEM_FLAGS_WRITABLE
|
250 ALLOC_MEM_FLAGS_EXECUTABLE
;
254 if (qpd
->ib_kaddr
|| !qpd
->ib_base
)
257 /* ib_base is only set for dGPU */
258 ret
= kfd_process_alloc_gpuvm(pdd
, qpd
->ib_base
, PAGE_SIZE
, flags
,
263 qpd
->ib_kaddr
= kaddr
;
268 struct kfd_process
*kfd_create_process(struct file
*filep
)
270 struct kfd_process
*process
;
271 struct task_struct
*thread
= current
;
275 return ERR_PTR(-EINVAL
);
277 /* Only the pthreads threading model is supported. */
278 if (thread
->group_leader
->mm
!= thread
->mm
)
279 return ERR_PTR(-EINVAL
);
282 * take kfd processes mutex before starting of process creation
283 * so there won't be a case where two threads of the same process
284 * create two kfd_process structures
286 mutex_lock(&kfd_processes_mutex
);
288 /* A prior open of /dev/kfd could have already created the process. */
289 process
= find_process(thread
);
291 pr_debug("Process already found\n");
293 process
= create_process(thread
);
297 ret
= kfd_process_init_cwsr_apu(process
, filep
);
299 process
= ERR_PTR(ret
);
306 process
->kobj
= kfd_alloc_struct(process
->kobj
);
307 if (!process
->kobj
) {
308 pr_warn("Creating procfs kobject failed");
311 ret
= kobject_init_and_add(process
->kobj
, &procfs_type
,
313 (int)process
->lead_thread
->pid
);
315 pr_warn("Creating procfs pid directory failed");
319 process
->attr_pasid
.name
= "pasid";
320 process
->attr_pasid
.mode
= KFD_SYSFS_FILE_MODE
;
321 sysfs_attr_init(&process
->attr_pasid
);
322 ret
= sysfs_create_file(process
->kobj
, &process
->attr_pasid
);
324 pr_warn("Creating pasid for pid %d failed",
325 (int)process
->lead_thread
->pid
);
328 if (!IS_ERR(process
))
329 kref_get(&process
->ref
);
330 mutex_unlock(&kfd_processes_mutex
);
335 struct kfd_process
*kfd_get_process(const struct task_struct
*thread
)
337 struct kfd_process
*process
;
340 return ERR_PTR(-EINVAL
);
342 /* Only the pthreads threading model is supported. */
343 if (thread
->group_leader
->mm
!= thread
->mm
)
344 return ERR_PTR(-EINVAL
);
346 process
= find_process(thread
);
348 return ERR_PTR(-EINVAL
);
353 static struct kfd_process
*find_process_by_mm(const struct mm_struct
*mm
)
355 struct kfd_process
*process
;
357 hash_for_each_possible_rcu(kfd_processes_table
, process
,
358 kfd_processes
, (uintptr_t)mm
)
359 if (process
->mm
== mm
)
365 static struct kfd_process
*find_process(const struct task_struct
*thread
)
367 struct kfd_process
*p
;
370 idx
= srcu_read_lock(&kfd_processes_srcu
);
371 p
= find_process_by_mm(thread
->mm
);
372 srcu_read_unlock(&kfd_processes_srcu
, idx
);
377 void kfd_unref_process(struct kfd_process
*p
)
379 kref_put(&p
->ref
, kfd_process_ref_release
);
382 static void kfd_process_device_free_bos(struct kfd_process_device
*pdd
)
384 struct kfd_process
*p
= pdd
->process
;
389 * Remove all handles from idr and release appropriate
390 * local memory object
392 idr_for_each_entry(&pdd
->alloc_idr
, mem
, id
) {
393 struct kfd_process_device
*peer_pdd
;
395 list_for_each_entry(peer_pdd
, &p
->per_device_data
,
399 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
400 peer_pdd
->dev
->kgd
, mem
, peer_pdd
->vm
);
403 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd
->dev
->kgd
, mem
);
404 kfd_process_device_remove_obj_handle(pdd
, id
);
408 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process
*p
)
410 struct kfd_process_device
*pdd
;
412 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
)
413 kfd_process_device_free_bos(pdd
);
416 static void kfd_process_destroy_pdds(struct kfd_process
*p
)
418 struct kfd_process_device
*pdd
, *temp
;
420 list_for_each_entry_safe(pdd
, temp
, &p
->per_device_data
,
422 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
423 pdd
->dev
->id
, p
->pasid
);
426 amdgpu_amdkfd_gpuvm_release_process_vm(
427 pdd
->dev
->kgd
, pdd
->vm
);
431 amdgpu_amdkfd_gpuvm_destroy_process_vm(
432 pdd
->dev
->kgd
, pdd
->vm
);
434 list_del(&pdd
->per_device_list
);
436 if (pdd
->qpd
.cwsr_kaddr
&& !pdd
->qpd
.cwsr_base
)
437 free_pages((unsigned long)pdd
->qpd
.cwsr_kaddr
,
438 get_order(KFD_CWSR_TBA_TMA_SIZE
));
440 kfree(pdd
->qpd
.doorbell_bitmap
);
441 idr_destroy(&pdd
->alloc_idr
);
447 /* No process locking is needed in this function, because the process
448 * is not findable any more. We must assume that no other thread is
449 * using it any more, otherwise we couldn't safely free the process
450 * structure in the end.
452 static void kfd_process_wq_release(struct work_struct
*work
)
454 struct kfd_process
*p
= container_of(work
, struct kfd_process
,
457 /* Remove the procfs files */
459 sysfs_remove_file(p
->kobj
, &p
->attr_pasid
);
460 kobject_del(p
->kobj
);
461 kobject_put(p
->kobj
);
465 kfd_iommu_unbind_process(p
);
467 kfd_process_free_outstanding_kfd_bos(p
);
469 kfd_process_destroy_pdds(p
);
470 dma_fence_put(p
->ef
);
472 kfd_event_free_process(p
);
474 kfd_pasid_free(p
->pasid
);
475 kfd_free_process_doorbells(p
);
477 mutex_destroy(&p
->mutex
);
479 put_task_struct(p
->lead_thread
);
484 static void kfd_process_ref_release(struct kref
*ref
)
486 struct kfd_process
*p
= container_of(ref
, struct kfd_process
, ref
);
488 INIT_WORK(&p
->release_work
, kfd_process_wq_release
);
489 queue_work(kfd_process_wq
, &p
->release_work
);
492 static void kfd_process_free_notifier(struct mmu_notifier
*mn
)
494 kfd_unref_process(container_of(mn
, struct kfd_process
, mmu_notifier
));
497 static void kfd_process_notifier_release(struct mmu_notifier
*mn
,
498 struct mm_struct
*mm
)
500 struct kfd_process
*p
;
501 struct kfd_process_device
*pdd
= NULL
;
504 * The kfd_process structure can not be free because the
505 * mmu_notifier srcu is read locked
507 p
= container_of(mn
, struct kfd_process
, mmu_notifier
);
508 if (WARN_ON(p
->mm
!= mm
))
511 mutex_lock(&kfd_processes_mutex
);
512 hash_del_rcu(&p
->kfd_processes
);
513 mutex_unlock(&kfd_processes_mutex
);
514 synchronize_srcu(&kfd_processes_srcu
);
516 cancel_delayed_work_sync(&p
->eviction_work
);
517 cancel_delayed_work_sync(&p
->restore_work
);
519 mutex_lock(&p
->mutex
);
521 /* Iterate over all process device data structures and if the
522 * pdd is in debug mode, we should first force unregistration,
523 * then we will be able to destroy the queues
525 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
526 struct kfd_dev
*dev
= pdd
->dev
;
528 mutex_lock(kfd_get_dbgmgr_mutex());
529 if (dev
&& dev
->dbgmgr
&& dev
->dbgmgr
->pasid
== p
->pasid
) {
530 if (!kfd_dbgmgr_unregister(dev
->dbgmgr
, p
)) {
531 kfd_dbgmgr_destroy(dev
->dbgmgr
);
535 mutex_unlock(kfd_get_dbgmgr_mutex());
538 kfd_process_dequeue_from_all_devices(p
);
541 /* Indicate to other users that MM is no longer valid */
544 mutex_unlock(&p
->mutex
);
546 mmu_notifier_put(&p
->mmu_notifier
);
549 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops
= {
550 .release
= kfd_process_notifier_release
,
551 .free_notifier
= kfd_process_free_notifier
,
554 static int kfd_process_init_cwsr_apu(struct kfd_process
*p
, struct file
*filep
)
556 unsigned long offset
;
557 struct kfd_process_device
*pdd
;
559 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
560 struct kfd_dev
*dev
= pdd
->dev
;
561 struct qcm_process_device
*qpd
= &pdd
->qpd
;
563 if (!dev
->cwsr_enabled
|| qpd
->cwsr_kaddr
|| qpd
->cwsr_base
)
566 offset
= KFD_MMAP_TYPE_RESERVED_MEM
| KFD_MMAP_GPU_ID(dev
->id
);
567 qpd
->tba_addr
= (int64_t)vm_mmap(filep
, 0,
568 KFD_CWSR_TBA_TMA_SIZE
, PROT_READ
| PROT_EXEC
,
571 if (IS_ERR_VALUE(qpd
->tba_addr
)) {
572 int err
= qpd
->tba_addr
;
574 pr_err("Failure to set tba address. error %d.\n", err
);
576 qpd
->cwsr_kaddr
= NULL
;
580 memcpy(qpd
->cwsr_kaddr
, dev
->cwsr_isa
, dev
->cwsr_isa_size
);
582 qpd
->tma_addr
= qpd
->tba_addr
+ KFD_CWSR_TMA_OFFSET
;
583 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
584 qpd
->tba_addr
, qpd
->tma_addr
, qpd
->cwsr_kaddr
);
590 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device
*pdd
)
592 struct kfd_dev
*dev
= pdd
->dev
;
593 struct qcm_process_device
*qpd
= &pdd
->qpd
;
594 uint32_t flags
= ALLOC_MEM_FLAGS_GTT
|
595 ALLOC_MEM_FLAGS_NO_SUBSTITUTE
| ALLOC_MEM_FLAGS_EXECUTABLE
;
599 if (!dev
->cwsr_enabled
|| qpd
->cwsr_kaddr
|| !qpd
->cwsr_base
)
602 /* cwsr_base is only set for dGPU */
603 ret
= kfd_process_alloc_gpuvm(pdd
, qpd
->cwsr_base
,
604 KFD_CWSR_TBA_TMA_SIZE
, flags
, &kaddr
);
608 qpd
->cwsr_kaddr
= kaddr
;
609 qpd
->tba_addr
= qpd
->cwsr_base
;
611 memcpy(qpd
->cwsr_kaddr
, dev
->cwsr_isa
, dev
->cwsr_isa_size
);
613 qpd
->tma_addr
= qpd
->tba_addr
+ KFD_CWSR_TMA_OFFSET
;
614 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
615 qpd
->tba_addr
, qpd
->tma_addr
, qpd
->cwsr_kaddr
);
621 * On return the kfd_process is fully operational and will be freed when the
624 static struct kfd_process
*create_process(const struct task_struct
*thread
)
626 struct kfd_process
*process
;
629 process
= kzalloc(sizeof(*process
), GFP_KERNEL
);
631 goto err_alloc_process
;
633 kref_init(&process
->ref
);
634 mutex_init(&process
->mutex
);
635 process
->mm
= thread
->mm
;
636 process
->lead_thread
= thread
->group_leader
;
637 INIT_LIST_HEAD(&process
->per_device_data
);
638 INIT_DELAYED_WORK(&process
->eviction_work
, evict_process_worker
);
639 INIT_DELAYED_WORK(&process
->restore_work
, restore_process_worker
);
640 process
->last_restore_timestamp
= get_jiffies_64();
641 kfd_event_init_process(process
);
642 process
->is_32bit_user_mode
= in_compat_syscall();
644 process
->pasid
= kfd_pasid_alloc();
645 if (process
->pasid
== 0)
646 goto err_alloc_pasid
;
648 if (kfd_alloc_process_doorbells(process
) < 0)
649 goto err_alloc_doorbells
;
651 err
= pqm_init(&process
->pqm
, process
);
653 goto err_process_pqm_init
;
655 /* init process apertures*/
656 err
= kfd_init_apertures(process
);
658 goto err_init_apertures
;
660 /* Must be last, have to use release destruction after this */
661 process
->mmu_notifier
.ops
= &kfd_process_mmu_notifier_ops
;
662 err
= mmu_notifier_register(&process
->mmu_notifier
, process
->mm
);
664 goto err_register_notifier
;
666 get_task_struct(process
->lead_thread
);
667 hash_add_rcu(kfd_processes_table
, &process
->kfd_processes
,
668 (uintptr_t)process
->mm
);
672 err_register_notifier
:
673 kfd_process_free_outstanding_kfd_bos(process
);
674 kfd_process_destroy_pdds(process
);
676 pqm_uninit(&process
->pqm
);
677 err_process_pqm_init
:
678 kfd_free_process_doorbells(process
);
680 kfd_pasid_free(process
->pasid
);
682 mutex_destroy(&process
->mutex
);
688 static int init_doorbell_bitmap(struct qcm_process_device
*qpd
,
692 int range_start
= dev
->shared_resources
.non_cp_doorbells_start
;
693 int range_end
= dev
->shared_resources
.non_cp_doorbells_end
;
695 if (!KFD_IS_SOC15(dev
->device_info
->asic_family
))
698 qpd
->doorbell_bitmap
=
699 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
,
700 BITS_PER_BYTE
), GFP_KERNEL
);
701 if (!qpd
->doorbell_bitmap
)
704 /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
705 pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start
, range_end
);
706 pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
707 range_start
+ KFD_QUEUE_DOORBELL_MIRROR_OFFSET
,
708 range_end
+ KFD_QUEUE_DOORBELL_MIRROR_OFFSET
);
710 for (i
= 0; i
< KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
/ 2; i
++) {
711 if (i
>= range_start
&& i
<= range_end
) {
712 set_bit(i
, qpd
->doorbell_bitmap
);
713 set_bit(i
+ KFD_QUEUE_DOORBELL_MIRROR_OFFSET
,
714 qpd
->doorbell_bitmap
);
721 struct kfd_process_device
*kfd_get_process_device_data(struct kfd_dev
*dev
,
722 struct kfd_process
*p
)
724 struct kfd_process_device
*pdd
= NULL
;
726 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
)
733 struct kfd_process_device
*kfd_create_process_device_data(struct kfd_dev
*dev
,
734 struct kfd_process
*p
)
736 struct kfd_process_device
*pdd
= NULL
;
738 pdd
= kzalloc(sizeof(*pdd
), GFP_KERNEL
);
742 if (init_doorbell_bitmap(&pdd
->qpd
, dev
)) {
743 pr_err("Failed to init doorbell for process\n");
749 INIT_LIST_HEAD(&pdd
->qpd
.queues_list
);
750 INIT_LIST_HEAD(&pdd
->qpd
.priv_queue_list
);
751 pdd
->qpd
.dqm
= dev
->dqm
;
752 pdd
->qpd
.pqm
= &p
->pqm
;
753 pdd
->qpd
.evicted
= 0;
755 pdd
->bound
= PDD_UNBOUND
;
756 pdd
->already_dequeued
= false;
757 list_add(&pdd
->per_device_list
, &p
->per_device_data
);
759 /* Init idr used for memory handle translation */
760 idr_init(&pdd
->alloc_idr
);
766 * kfd_process_device_init_vm - Initialize a VM for a process-device
768 * @pdd: The process-device
769 * @drm_file: Optional pointer to a DRM file descriptor
771 * If @drm_file is specified, it will be used to acquire the VM from
772 * that file descriptor. If successful, the @pdd takes ownership of
773 * the file descriptor.
775 * If @drm_file is NULL, a new VM is created.
777 * Returns 0 on success, -errno on failure.
779 int kfd_process_device_init_vm(struct kfd_process_device
*pdd
,
780 struct file
*drm_file
)
782 struct kfd_process
*p
;
787 return drm_file
? -EBUSY
: 0;
793 ret
= amdgpu_amdkfd_gpuvm_acquire_process_vm(
794 dev
->kgd
, drm_file
, p
->pasid
,
795 &pdd
->vm
, &p
->kgd_process_info
, &p
->ef
);
797 ret
= amdgpu_amdkfd_gpuvm_create_process_vm(dev
->kgd
, p
->pasid
,
798 &pdd
->vm
, &p
->kgd_process_info
, &p
->ef
);
800 pr_err("Failed to create process VM object\n");
804 amdgpu_vm_set_task_info(pdd
->vm
);
806 ret
= kfd_process_device_reserve_ib_mem(pdd
);
808 goto err_reserve_ib_mem
;
809 ret
= kfd_process_device_init_cwsr_dgpu(pdd
);
813 pdd
->drm_file
= drm_file
;
819 kfd_process_device_free_bos(pdd
);
821 amdgpu_amdkfd_gpuvm_destroy_process_vm(dev
->kgd
, pdd
->vm
);
828 * Direct the IOMMU to bind the process (specifically the pasid->mm)
830 * Unbinding occurs when the process dies or the device is removed.
832 * Assumes that the process lock is held.
834 struct kfd_process_device
*kfd_bind_process_to_device(struct kfd_dev
*dev
,
835 struct kfd_process
*p
)
837 struct kfd_process_device
*pdd
;
840 pdd
= kfd_get_process_device_data(dev
, p
);
842 pr_err("Process device data doesn't exist\n");
843 return ERR_PTR(-ENOMEM
);
846 err
= kfd_iommu_bind_process_to_device(pdd
);
850 err
= kfd_process_device_init_vm(pdd
, NULL
);
857 struct kfd_process_device
*kfd_get_first_process_device_data(
858 struct kfd_process
*p
)
860 return list_first_entry(&p
->per_device_data
,
861 struct kfd_process_device
,
865 struct kfd_process_device
*kfd_get_next_process_device_data(
866 struct kfd_process
*p
,
867 struct kfd_process_device
*pdd
)
869 if (list_is_last(&pdd
->per_device_list
, &p
->per_device_data
))
871 return list_next_entry(pdd
, per_device_list
);
874 bool kfd_has_process_device_data(struct kfd_process
*p
)
876 return !(list_empty(&p
->per_device_data
));
879 /* Create specific handle mapped to mem from process local memory idr
880 * Assumes that the process lock is held.
882 int kfd_process_device_create_obj_handle(struct kfd_process_device
*pdd
,
885 return idr_alloc(&pdd
->alloc_idr
, mem
, 0, 0, GFP_KERNEL
);
888 /* Translate specific handle from process local memory idr
889 * Assumes that the process lock is held.
891 void *kfd_process_device_translate_handle(struct kfd_process_device
*pdd
,
897 return idr_find(&pdd
->alloc_idr
, handle
);
900 /* Remove specific handle from process local memory idr
901 * Assumes that the process lock is held.
903 void kfd_process_device_remove_obj_handle(struct kfd_process_device
*pdd
,
907 idr_remove(&pdd
->alloc_idr
, handle
);
910 /* This increments the process->ref counter. */
911 struct kfd_process
*kfd_lookup_process_by_pasid(unsigned int pasid
)
913 struct kfd_process
*p
, *ret_p
= NULL
;
916 int idx
= srcu_read_lock(&kfd_processes_srcu
);
918 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
919 if (p
->pasid
== pasid
) {
926 srcu_read_unlock(&kfd_processes_srcu
, idx
);
931 /* This increments the process->ref counter. */
932 struct kfd_process
*kfd_lookup_process_by_mm(const struct mm_struct
*mm
)
934 struct kfd_process
*p
;
936 int idx
= srcu_read_lock(&kfd_processes_srcu
);
938 p
= find_process_by_mm(mm
);
942 srcu_read_unlock(&kfd_processes_srcu
, idx
);
947 /* process_evict_queues - Evict all user queues of a process
949 * Eviction is reference-counted per process-device. This means multiple
950 * evictions from different sources can be nested safely.
952 int kfd_process_evict_queues(struct kfd_process
*p
)
954 struct kfd_process_device
*pdd
;
956 unsigned int n_evicted
= 0;
958 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
959 r
= pdd
->dev
->dqm
->ops
.evict_process_queues(pdd
->dev
->dqm
,
962 pr_err("Failed to evict process queues\n");
971 /* To keep state consistent, roll back partial eviction by
974 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
977 if (pdd
->dev
->dqm
->ops
.restore_process_queues(pdd
->dev
->dqm
,
979 pr_err("Failed to restore queues\n");
987 /* process_restore_queues - Restore all user queues of a process */
988 int kfd_process_restore_queues(struct kfd_process
*p
)
990 struct kfd_process_device
*pdd
;
993 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
994 r
= pdd
->dev
->dqm
->ops
.restore_process_queues(pdd
->dev
->dqm
,
997 pr_err("Failed to restore process queues\n");
1006 static void evict_process_worker(struct work_struct
*work
)
1009 struct kfd_process
*p
;
1010 struct delayed_work
*dwork
;
1012 dwork
= to_delayed_work(work
);
1014 /* Process termination destroys this worker thread. So during the
1015 * lifetime of this thread, kfd_process p will be valid
1017 p
= container_of(dwork
, struct kfd_process
, eviction_work
);
1018 WARN_ONCE(p
->last_eviction_seqno
!= p
->ef
->seqno
,
1019 "Eviction fence mismatch\n");
1021 /* Narrow window of overlap between restore and evict work
1022 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1023 * unreserves KFD BOs, it is possible to evicted again. But
1024 * restore has few more steps of finish. So lets wait for any
1025 * previous restore work to complete
1027 flush_delayed_work(&p
->restore_work
);
1029 pr_debug("Started evicting pasid 0x%x\n", p
->pasid
);
1030 ret
= kfd_process_evict_queues(p
);
1032 dma_fence_signal(p
->ef
);
1033 dma_fence_put(p
->ef
);
1035 queue_delayed_work(kfd_restore_wq
, &p
->restore_work
,
1036 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS
));
1038 pr_debug("Finished evicting pasid 0x%x\n", p
->pasid
);
1040 pr_err("Failed to evict queues of pasid 0x%x\n", p
->pasid
);
1043 static void restore_process_worker(struct work_struct
*work
)
1045 struct delayed_work
*dwork
;
1046 struct kfd_process
*p
;
1049 dwork
= to_delayed_work(work
);
1051 /* Process termination destroys this worker thread. So during the
1052 * lifetime of this thread, kfd_process p will be valid
1054 p
= container_of(dwork
, struct kfd_process
, restore_work
);
1055 pr_debug("Started restoring pasid 0x%x\n", p
->pasid
);
1057 /* Setting last_restore_timestamp before successful restoration.
1058 * Otherwise this would have to be set by KGD (restore_process_bos)
1059 * before KFD BOs are unreserved. If not, the process can be evicted
1060 * again before the timestamp is set.
1061 * If restore fails, the timestamp will be set again in the next
1062 * attempt. This would mean that the minimum GPU quanta would be
1063 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1067 p
->last_restore_timestamp
= get_jiffies_64();
1068 ret
= amdgpu_amdkfd_gpuvm_restore_process_bos(p
->kgd_process_info
,
1071 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1072 p
->pasid
, PROCESS_BACK_OFF_TIME_MS
);
1073 ret
= queue_delayed_work(kfd_restore_wq
, &p
->restore_work
,
1074 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS
));
1075 WARN(!ret
, "reschedule restore work failed\n");
1079 ret
= kfd_process_restore_queues(p
);
1081 pr_debug("Finished restoring pasid 0x%x\n", p
->pasid
);
1083 pr_err("Failed to restore queues of pasid 0x%x\n", p
->pasid
);
1086 void kfd_suspend_all_processes(void)
1088 struct kfd_process
*p
;
1090 int idx
= srcu_read_lock(&kfd_processes_srcu
);
1092 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
1093 cancel_delayed_work_sync(&p
->eviction_work
);
1094 cancel_delayed_work_sync(&p
->restore_work
);
1096 if (kfd_process_evict_queues(p
))
1097 pr_err("Failed to suspend process 0x%x\n", p
->pasid
);
1098 dma_fence_signal(p
->ef
);
1099 dma_fence_put(p
->ef
);
1102 srcu_read_unlock(&kfd_processes_srcu
, idx
);
1105 int kfd_resume_all_processes(void)
1107 struct kfd_process
*p
;
1109 int ret
= 0, idx
= srcu_read_lock(&kfd_processes_srcu
);
1111 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
1112 if (!queue_delayed_work(kfd_restore_wq
, &p
->restore_work
, 0)) {
1113 pr_err("Restore process %d failed during resume\n",
1118 srcu_read_unlock(&kfd_processes_srcu
, idx
);
1122 int kfd_reserved_mem_mmap(struct kfd_dev
*dev
, struct kfd_process
*process
,
1123 struct vm_area_struct
*vma
)
1125 struct kfd_process_device
*pdd
;
1126 struct qcm_process_device
*qpd
;
1128 if ((vma
->vm_end
- vma
->vm_start
) != KFD_CWSR_TBA_TMA_SIZE
) {
1129 pr_err("Incorrect CWSR mapping size.\n");
1133 pdd
= kfd_get_process_device_data(dev
, process
);
1138 qpd
->cwsr_kaddr
= (void *)__get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
1139 get_order(KFD_CWSR_TBA_TMA_SIZE
));
1140 if (!qpd
->cwsr_kaddr
) {
1141 pr_err("Error allocating per process CWSR buffer.\n");
1145 vma
->vm_flags
|= VM_IO
| VM_DONTCOPY
| VM_DONTEXPAND
1146 | VM_NORESERVE
| VM_DONTDUMP
| VM_PFNMAP
;
1147 /* Mapping pages to user process */
1148 return remap_pfn_range(vma
, vma
->vm_start
,
1149 PFN_DOWN(__pa(qpd
->cwsr_kaddr
)),
1150 KFD_CWSR_TBA_TMA_SIZE
, vma
->vm_page_prot
);
1153 void kfd_flush_tlb(struct kfd_process_device
*pdd
)
1155 struct kfd_dev
*dev
= pdd
->dev
;
1157 if (dev
->dqm
->sched_policy
== KFD_SCHED_POLICY_NO_HWS
) {
1158 /* Nothing to flush until a VMID is assigned, which
1159 * only happens when the first queue is created.
1162 amdgpu_amdkfd_flush_gpu_tlb_vmid(dev
->kgd
,
1165 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev
->kgd
,
1166 pdd
->process
->pasid
);
1170 #if defined(CONFIG_DEBUG_FS)
1172 int kfd_debugfs_mqds_by_process(struct seq_file
*m
, void *data
)
1174 struct kfd_process
*p
;
1178 int idx
= srcu_read_lock(&kfd_processes_srcu
);
1180 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
1181 seq_printf(m
, "Process %d PASID 0x%x:\n",
1182 p
->lead_thread
->tgid
, p
->pasid
);
1184 mutex_lock(&p
->mutex
);
1185 r
= pqm_debugfs_mqds(m
, &p
->pqm
);
1186 mutex_unlock(&p
->mutex
);
1192 srcu_read_unlock(&kfd_processes_srcu
, idx
);