2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34 #include "amdgpu_amdkfd.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
41 #include "kfd_iommu.h"
44 * List of struct kfd_process (field kfd_process).
45 * Unique/indexed by mm_struct*
47 DEFINE_HASHTABLE(kfd_processes_table
, KFD_PROCESS_TABLE_SIZE
);
48 static DEFINE_MUTEX(kfd_processes_mutex
);
50 DEFINE_SRCU(kfd_processes_srcu
);
52 /* For process termination handling */
53 static struct workqueue_struct
*kfd_process_wq
;
55 /* Ordered, single-threaded workqueue for restoring evicted
56 * processes. Restoring multiple processes concurrently under memory
57 * pressure can lead to processes blocking each other from validating
58 * their BOs and result in a live-lock situation where processes
59 * remain evicted indefinitely.
61 static struct workqueue_struct
*kfd_restore_wq
;
63 static struct kfd_process
*find_process(const struct task_struct
*thread
);
64 static void kfd_process_ref_release(struct kref
*ref
);
65 static struct kfd_process
*create_process(const struct task_struct
*thread
,
68 static void evict_process_worker(struct work_struct
*work
);
69 static void restore_process_worker(struct work_struct
*work
);
72 int kfd_process_create_wq(void)
75 kfd_process_wq
= alloc_workqueue("kfd_process_wq", 0, 0);
77 kfd_restore_wq
= alloc_ordered_workqueue("kfd_restore_wq", 0);
79 if (!kfd_process_wq
|| !kfd_restore_wq
) {
80 kfd_process_destroy_wq();
87 void kfd_process_destroy_wq(void)
90 destroy_workqueue(kfd_process_wq
);
91 kfd_process_wq
= NULL
;
94 destroy_workqueue(kfd_restore_wq
);
95 kfd_restore_wq
= NULL
;
99 static void kfd_process_free_gpuvm(struct kgd_mem
*mem
,
100 struct kfd_process_device
*pdd
)
102 struct kfd_dev
*dev
= pdd
->dev
;
104 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev
->kgd
, mem
, pdd
->vm
);
105 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev
->kgd
, mem
);
108 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
109 * This function should be only called right after the process
110 * is created and when kfd_processes_mutex is still being held
111 * to avoid concurrency. Because of that exclusiveness, we do
112 * not need to take p->mutex.
114 static int kfd_process_alloc_gpuvm(struct kfd_process_device
*pdd
,
115 uint64_t gpu_va
, uint32_t size
,
116 uint32_t flags
, void **kptr
)
118 struct kfd_dev
*kdev
= pdd
->dev
;
119 struct kgd_mem
*mem
= NULL
;
123 err
= amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev
->kgd
, gpu_va
, size
,
124 pdd
->vm
, &mem
, NULL
, flags
);
128 err
= amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev
->kgd
, mem
, pdd
->vm
);
132 err
= amdgpu_amdkfd_gpuvm_sync_memory(kdev
->kgd
, mem
, true);
134 pr_debug("Sync memory failed, wait interrupted by user signal\n");
135 goto sync_memory_failed
;
138 /* Create an obj handle so kfd_process_device_remove_obj_handle
139 * will take care of the bo removal when the process finishes.
140 * We do not need to take p->mutex, because the process is just
141 * created and the ioctls have not had the chance to run.
143 handle
= kfd_process_device_create_obj_handle(pdd
, mem
);
151 err
= amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev
->kgd
,
152 (struct kgd_mem
*)mem
, kptr
, NULL
);
154 pr_debug("Map GTT BO to kernel failed\n");
155 goto free_obj_handle
;
162 kfd_process_device_remove_obj_handle(pdd
, handle
);
165 kfd_process_free_gpuvm(mem
, pdd
);
169 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev
->kgd
, mem
);
175 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
176 * process for IB usage The memory reserved is for KFD to submit
177 * IB to AMDGPU from kernel. If the memory is reserved
178 * successfully, ib_kaddr will have the CPU/kernel
179 * address. Check ib_kaddr before accessing the memory.
181 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device
*pdd
)
183 struct qcm_process_device
*qpd
= &pdd
->qpd
;
184 uint32_t flags
= ALLOC_MEM_FLAGS_GTT
|
185 ALLOC_MEM_FLAGS_NO_SUBSTITUTE
|
186 ALLOC_MEM_FLAGS_WRITABLE
|
187 ALLOC_MEM_FLAGS_EXECUTABLE
;
191 if (qpd
->ib_kaddr
|| !qpd
->ib_base
)
194 /* ib_base is only set for dGPU */
195 ret
= kfd_process_alloc_gpuvm(pdd
, qpd
->ib_base
, PAGE_SIZE
, flags
,
200 qpd
->ib_kaddr
= kaddr
;
205 struct kfd_process
*kfd_create_process(struct file
*filep
)
207 struct kfd_process
*process
;
208 struct task_struct
*thread
= current
;
211 return ERR_PTR(-EINVAL
);
213 /* Only the pthreads threading model is supported. */
214 if (thread
->group_leader
->mm
!= thread
->mm
)
215 return ERR_PTR(-EINVAL
);
218 * take kfd processes mutex before starting of process creation
219 * so there won't be a case where two threads of the same process
220 * create two kfd_process structures
222 mutex_lock(&kfd_processes_mutex
);
224 /* A prior open of /dev/kfd could have already created the process. */
225 process
= find_process(thread
);
227 pr_debug("Process already found\n");
229 process
= create_process(thread
, filep
);
231 mutex_unlock(&kfd_processes_mutex
);
236 struct kfd_process
*kfd_get_process(const struct task_struct
*thread
)
238 struct kfd_process
*process
;
241 return ERR_PTR(-EINVAL
);
243 /* Only the pthreads threading model is supported. */
244 if (thread
->group_leader
->mm
!= thread
->mm
)
245 return ERR_PTR(-EINVAL
);
247 process
= find_process(thread
);
249 return ERR_PTR(-EINVAL
);
254 static struct kfd_process
*find_process_by_mm(const struct mm_struct
*mm
)
256 struct kfd_process
*process
;
258 hash_for_each_possible_rcu(kfd_processes_table
, process
,
259 kfd_processes
, (uintptr_t)mm
)
260 if (process
->mm
== mm
)
266 static struct kfd_process
*find_process(const struct task_struct
*thread
)
268 struct kfd_process
*p
;
271 idx
= srcu_read_lock(&kfd_processes_srcu
);
272 p
= find_process_by_mm(thread
->mm
);
273 srcu_read_unlock(&kfd_processes_srcu
, idx
);
278 void kfd_unref_process(struct kfd_process
*p
)
280 kref_put(&p
->ref
, kfd_process_ref_release
);
283 static void kfd_process_device_free_bos(struct kfd_process_device
*pdd
)
285 struct kfd_process
*p
= pdd
->process
;
290 * Remove all handles from idr and release appropriate
291 * local memory object
293 idr_for_each_entry(&pdd
->alloc_idr
, mem
, id
) {
294 struct kfd_process_device
*peer_pdd
;
296 list_for_each_entry(peer_pdd
, &p
->per_device_data
,
300 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
301 peer_pdd
->dev
->kgd
, mem
, peer_pdd
->vm
);
304 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd
->dev
->kgd
, mem
);
305 kfd_process_device_remove_obj_handle(pdd
, id
);
309 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process
*p
)
311 struct kfd_process_device
*pdd
;
313 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
)
314 kfd_process_device_free_bos(pdd
);
317 static void kfd_process_destroy_pdds(struct kfd_process
*p
)
319 struct kfd_process_device
*pdd
, *temp
;
321 list_for_each_entry_safe(pdd
, temp
, &p
->per_device_data
,
323 pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
324 pdd
->dev
->id
, p
->pasid
);
327 amdgpu_amdkfd_gpuvm_release_process_vm(
328 pdd
->dev
->kgd
, pdd
->vm
);
332 amdgpu_amdkfd_gpuvm_destroy_process_vm(
333 pdd
->dev
->kgd
, pdd
->vm
);
335 list_del(&pdd
->per_device_list
);
337 if (pdd
->qpd
.cwsr_kaddr
&& !pdd
->qpd
.cwsr_base
)
338 free_pages((unsigned long)pdd
->qpd
.cwsr_kaddr
,
339 get_order(KFD_CWSR_TBA_TMA_SIZE
));
341 kfree(pdd
->qpd
.doorbell_bitmap
);
342 idr_destroy(&pdd
->alloc_idr
);
348 /* No process locking is needed in this function, because the process
349 * is not findable any more. We must assume that no other thread is
350 * using it any more, otherwise we couldn't safely free the process
351 * structure in the end.
353 static void kfd_process_wq_release(struct work_struct
*work
)
355 struct kfd_process
*p
= container_of(work
, struct kfd_process
,
358 kfd_iommu_unbind_process(p
);
360 kfd_process_free_outstanding_kfd_bos(p
);
362 kfd_process_destroy_pdds(p
);
363 dma_fence_put(p
->ef
);
365 kfd_event_free_process(p
);
367 kfd_pasid_free(p
->pasid
);
368 kfd_free_process_doorbells(p
);
370 mutex_destroy(&p
->mutex
);
372 put_task_struct(p
->lead_thread
);
377 static void kfd_process_ref_release(struct kref
*ref
)
379 struct kfd_process
*p
= container_of(ref
, struct kfd_process
, ref
);
381 INIT_WORK(&p
->release_work
, kfd_process_wq_release
);
382 queue_work(kfd_process_wq
, &p
->release_work
);
385 static void kfd_process_destroy_delayed(struct rcu_head
*rcu
)
387 struct kfd_process
*p
= container_of(rcu
, struct kfd_process
, rcu
);
389 kfd_unref_process(p
);
392 static void kfd_process_notifier_release(struct mmu_notifier
*mn
,
393 struct mm_struct
*mm
)
395 struct kfd_process
*p
;
396 struct kfd_process_device
*pdd
= NULL
;
399 * The kfd_process structure can not be free because the
400 * mmu_notifier srcu is read locked
402 p
= container_of(mn
, struct kfd_process
, mmu_notifier
);
403 if (WARN_ON(p
->mm
!= mm
))
406 mutex_lock(&kfd_processes_mutex
);
407 hash_del_rcu(&p
->kfd_processes
);
408 mutex_unlock(&kfd_processes_mutex
);
409 synchronize_srcu(&kfd_processes_srcu
);
411 cancel_delayed_work_sync(&p
->eviction_work
);
412 cancel_delayed_work_sync(&p
->restore_work
);
414 mutex_lock(&p
->mutex
);
416 /* Iterate over all process device data structures and if the
417 * pdd is in debug mode, we should first force unregistration,
418 * then we will be able to destroy the queues
420 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
421 struct kfd_dev
*dev
= pdd
->dev
;
423 mutex_lock(kfd_get_dbgmgr_mutex());
424 if (dev
&& dev
->dbgmgr
&& dev
->dbgmgr
->pasid
== p
->pasid
) {
425 if (!kfd_dbgmgr_unregister(dev
->dbgmgr
, p
)) {
426 kfd_dbgmgr_destroy(dev
->dbgmgr
);
430 mutex_unlock(kfd_get_dbgmgr_mutex());
433 kfd_process_dequeue_from_all_devices(p
);
436 /* Indicate to other users that MM is no longer valid */
439 mutex_unlock(&p
->mutex
);
441 mmu_notifier_unregister_no_release(&p
->mmu_notifier
, mm
);
442 mmu_notifier_call_srcu(&p
->rcu
, &kfd_process_destroy_delayed
);
445 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops
= {
446 .release
= kfd_process_notifier_release
,
449 static int kfd_process_init_cwsr_apu(struct kfd_process
*p
, struct file
*filep
)
451 unsigned long offset
;
452 struct kfd_process_device
*pdd
;
454 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
455 struct kfd_dev
*dev
= pdd
->dev
;
456 struct qcm_process_device
*qpd
= &pdd
->qpd
;
458 if (!dev
->cwsr_enabled
|| qpd
->cwsr_kaddr
|| qpd
->cwsr_base
)
461 offset
= (KFD_MMAP_TYPE_RESERVED_MEM
| KFD_MMAP_GPU_ID(dev
->id
))
463 qpd
->tba_addr
= (int64_t)vm_mmap(filep
, 0,
464 KFD_CWSR_TBA_TMA_SIZE
, PROT_READ
| PROT_EXEC
,
467 if (IS_ERR_VALUE(qpd
->tba_addr
)) {
468 int err
= qpd
->tba_addr
;
470 pr_err("Failure to set tba address. error %d.\n", err
);
472 qpd
->cwsr_kaddr
= NULL
;
476 memcpy(qpd
->cwsr_kaddr
, dev
->cwsr_isa
, dev
->cwsr_isa_size
);
478 qpd
->tma_addr
= qpd
->tba_addr
+ KFD_CWSR_TMA_OFFSET
;
479 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
480 qpd
->tba_addr
, qpd
->tma_addr
, qpd
->cwsr_kaddr
);
486 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device
*pdd
)
488 struct kfd_dev
*dev
= pdd
->dev
;
489 struct qcm_process_device
*qpd
= &pdd
->qpd
;
490 uint32_t flags
= ALLOC_MEM_FLAGS_GTT
|
491 ALLOC_MEM_FLAGS_NO_SUBSTITUTE
| ALLOC_MEM_FLAGS_EXECUTABLE
;
495 if (!dev
->cwsr_enabled
|| qpd
->cwsr_kaddr
|| !qpd
->cwsr_base
)
498 /* cwsr_base is only set for dGPU */
499 ret
= kfd_process_alloc_gpuvm(pdd
, qpd
->cwsr_base
,
500 KFD_CWSR_TBA_TMA_SIZE
, flags
, &kaddr
);
504 qpd
->cwsr_kaddr
= kaddr
;
505 qpd
->tba_addr
= qpd
->cwsr_base
;
507 memcpy(qpd
->cwsr_kaddr
, dev
->cwsr_isa
, dev
->cwsr_isa_size
);
509 qpd
->tma_addr
= qpd
->tba_addr
+ KFD_CWSR_TMA_OFFSET
;
510 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
511 qpd
->tba_addr
, qpd
->tma_addr
, qpd
->cwsr_kaddr
);
516 static struct kfd_process
*create_process(const struct task_struct
*thread
,
519 struct kfd_process
*process
;
522 process
= kzalloc(sizeof(*process
), GFP_KERNEL
);
525 goto err_alloc_process
;
527 process
->pasid
= kfd_pasid_alloc();
528 if (process
->pasid
== 0)
529 goto err_alloc_pasid
;
531 if (kfd_alloc_process_doorbells(process
) < 0)
532 goto err_alloc_doorbells
;
534 kref_init(&process
->ref
);
536 mutex_init(&process
->mutex
);
538 process
->mm
= thread
->mm
;
540 /* register notifier */
541 process
->mmu_notifier
.ops
= &kfd_process_mmu_notifier_ops
;
542 err
= mmu_notifier_register(&process
->mmu_notifier
, process
->mm
);
544 goto err_mmu_notifier
;
546 hash_add_rcu(kfd_processes_table
, &process
->kfd_processes
,
547 (uintptr_t)process
->mm
);
549 process
->lead_thread
= thread
->group_leader
;
550 get_task_struct(process
->lead_thread
);
552 INIT_LIST_HEAD(&process
->per_device_data
);
554 kfd_event_init_process(process
);
556 err
= pqm_init(&process
->pqm
, process
);
558 goto err_process_pqm_init
;
560 /* init process apertures*/
561 process
->is_32bit_user_mode
= in_compat_syscall();
562 err
= kfd_init_apertures(process
);
564 goto err_init_apertures
;
566 INIT_DELAYED_WORK(&process
->eviction_work
, evict_process_worker
);
567 INIT_DELAYED_WORK(&process
->restore_work
, restore_process_worker
);
568 process
->last_restore_timestamp
= get_jiffies_64();
570 err
= kfd_process_init_cwsr_apu(process
, filep
);
577 kfd_process_free_outstanding_kfd_bos(process
);
578 kfd_process_destroy_pdds(process
);
580 pqm_uninit(&process
->pqm
);
581 err_process_pqm_init
:
582 hash_del_rcu(&process
->kfd_processes
);
584 mmu_notifier_unregister_no_release(&process
->mmu_notifier
, process
->mm
);
586 mutex_destroy(&process
->mutex
);
587 kfd_free_process_doorbells(process
);
589 kfd_pasid_free(process
->pasid
);
596 static int init_doorbell_bitmap(struct qcm_process_device
*qpd
,
601 if (!KFD_IS_SOC15(dev
->device_info
->asic_family
))
604 qpd
->doorbell_bitmap
=
605 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
,
606 BITS_PER_BYTE
), GFP_KERNEL
);
607 if (!qpd
->doorbell_bitmap
)
610 /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
611 for (i
= 0; i
< KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
/ 2; i
++) {
612 if (i
>= dev
->shared_resources
.non_cp_doorbells_start
613 && i
<= dev
->shared_resources
.non_cp_doorbells_end
) {
614 set_bit(i
, qpd
->doorbell_bitmap
);
615 set_bit(i
+ KFD_QUEUE_DOORBELL_MIRROR_OFFSET
,
616 qpd
->doorbell_bitmap
);
617 pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i
,
618 i
+ KFD_QUEUE_DOORBELL_MIRROR_OFFSET
);
625 struct kfd_process_device
*kfd_get_process_device_data(struct kfd_dev
*dev
,
626 struct kfd_process
*p
)
628 struct kfd_process_device
*pdd
= NULL
;
630 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
)
637 struct kfd_process_device
*kfd_create_process_device_data(struct kfd_dev
*dev
,
638 struct kfd_process
*p
)
640 struct kfd_process_device
*pdd
= NULL
;
642 pdd
= kzalloc(sizeof(*pdd
), GFP_KERNEL
);
646 if (init_doorbell_bitmap(&pdd
->qpd
, dev
)) {
647 pr_err("Failed to init doorbell for process\n");
653 INIT_LIST_HEAD(&pdd
->qpd
.queues_list
);
654 INIT_LIST_HEAD(&pdd
->qpd
.priv_queue_list
);
655 pdd
->qpd
.dqm
= dev
->dqm
;
656 pdd
->qpd
.pqm
= &p
->pqm
;
657 pdd
->qpd
.evicted
= 0;
659 pdd
->bound
= PDD_UNBOUND
;
660 pdd
->already_dequeued
= false;
661 list_add(&pdd
->per_device_list
, &p
->per_device_data
);
663 /* Init idr used for memory handle translation */
664 idr_init(&pdd
->alloc_idr
);
670 * kfd_process_device_init_vm - Initialize a VM for a process-device
672 * @pdd: The process-device
673 * @drm_file: Optional pointer to a DRM file descriptor
675 * If @drm_file is specified, it will be used to acquire the VM from
676 * that file descriptor. If successful, the @pdd takes ownership of
677 * the file descriptor.
679 * If @drm_file is NULL, a new VM is created.
681 * Returns 0 on success, -errno on failure.
683 int kfd_process_device_init_vm(struct kfd_process_device
*pdd
,
684 struct file
*drm_file
)
686 struct kfd_process
*p
;
691 return drm_file
? -EBUSY
: 0;
697 ret
= amdgpu_amdkfd_gpuvm_acquire_process_vm(
698 dev
->kgd
, drm_file
, p
->pasid
,
699 &pdd
->vm
, &p
->kgd_process_info
, &p
->ef
);
701 ret
= amdgpu_amdkfd_gpuvm_create_process_vm(dev
->kgd
, p
->pasid
,
702 &pdd
->vm
, &p
->kgd_process_info
, &p
->ef
);
704 pr_err("Failed to create process VM object\n");
708 ret
= kfd_process_device_reserve_ib_mem(pdd
);
710 goto err_reserve_ib_mem
;
711 ret
= kfd_process_device_init_cwsr_dgpu(pdd
);
715 pdd
->drm_file
= drm_file
;
721 kfd_process_device_free_bos(pdd
);
723 amdgpu_amdkfd_gpuvm_destroy_process_vm(dev
->kgd
, pdd
->vm
);
730 * Direct the IOMMU to bind the process (specifically the pasid->mm)
732 * Unbinding occurs when the process dies or the device is removed.
734 * Assumes that the process lock is held.
736 struct kfd_process_device
*kfd_bind_process_to_device(struct kfd_dev
*dev
,
737 struct kfd_process
*p
)
739 struct kfd_process_device
*pdd
;
742 pdd
= kfd_get_process_device_data(dev
, p
);
744 pr_err("Process device data doesn't exist\n");
745 return ERR_PTR(-ENOMEM
);
748 err
= kfd_iommu_bind_process_to_device(pdd
);
752 err
= kfd_process_device_init_vm(pdd
, NULL
);
759 struct kfd_process_device
*kfd_get_first_process_device_data(
760 struct kfd_process
*p
)
762 return list_first_entry(&p
->per_device_data
,
763 struct kfd_process_device
,
767 struct kfd_process_device
*kfd_get_next_process_device_data(
768 struct kfd_process
*p
,
769 struct kfd_process_device
*pdd
)
771 if (list_is_last(&pdd
->per_device_list
, &p
->per_device_data
))
773 return list_next_entry(pdd
, per_device_list
);
776 bool kfd_has_process_device_data(struct kfd_process
*p
)
778 return !(list_empty(&p
->per_device_data
));
781 /* Create specific handle mapped to mem from process local memory idr
782 * Assumes that the process lock is held.
784 int kfd_process_device_create_obj_handle(struct kfd_process_device
*pdd
,
787 return idr_alloc(&pdd
->alloc_idr
, mem
, 0, 0, GFP_KERNEL
);
790 /* Translate specific handle from process local memory idr
791 * Assumes that the process lock is held.
793 void *kfd_process_device_translate_handle(struct kfd_process_device
*pdd
,
799 return idr_find(&pdd
->alloc_idr
, handle
);
802 /* Remove specific handle from process local memory idr
803 * Assumes that the process lock is held.
805 void kfd_process_device_remove_obj_handle(struct kfd_process_device
*pdd
,
809 idr_remove(&pdd
->alloc_idr
, handle
);
812 /* This increments the process->ref counter. */
813 struct kfd_process
*kfd_lookup_process_by_pasid(unsigned int pasid
)
815 struct kfd_process
*p
, *ret_p
= NULL
;
818 int idx
= srcu_read_lock(&kfd_processes_srcu
);
820 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
821 if (p
->pasid
== pasid
) {
828 srcu_read_unlock(&kfd_processes_srcu
, idx
);
833 /* This increments the process->ref counter. */
834 struct kfd_process
*kfd_lookup_process_by_mm(const struct mm_struct
*mm
)
836 struct kfd_process
*p
;
838 int idx
= srcu_read_lock(&kfd_processes_srcu
);
840 p
= find_process_by_mm(mm
);
844 srcu_read_unlock(&kfd_processes_srcu
, idx
);
849 /* process_evict_queues - Evict all user queues of a process
851 * Eviction is reference-counted per process-device. This means multiple
852 * evictions from different sources can be nested safely.
854 int kfd_process_evict_queues(struct kfd_process
*p
)
856 struct kfd_process_device
*pdd
;
858 unsigned int n_evicted
= 0;
860 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
861 r
= pdd
->dev
->dqm
->ops
.evict_process_queues(pdd
->dev
->dqm
,
864 pr_err("Failed to evict process queues\n");
873 /* To keep state consistent, roll back partial eviction by
876 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
879 if (pdd
->dev
->dqm
->ops
.restore_process_queues(pdd
->dev
->dqm
,
881 pr_err("Failed to restore queues\n");
889 /* process_restore_queues - Restore all user queues of a process */
890 int kfd_process_restore_queues(struct kfd_process
*p
)
892 struct kfd_process_device
*pdd
;
895 list_for_each_entry(pdd
, &p
->per_device_data
, per_device_list
) {
896 r
= pdd
->dev
->dqm
->ops
.restore_process_queues(pdd
->dev
->dqm
,
899 pr_err("Failed to restore process queues\n");
908 static void evict_process_worker(struct work_struct
*work
)
911 struct kfd_process
*p
;
912 struct delayed_work
*dwork
;
914 dwork
= to_delayed_work(work
);
916 /* Process termination destroys this worker thread. So during the
917 * lifetime of this thread, kfd_process p will be valid
919 p
= container_of(dwork
, struct kfd_process
, eviction_work
);
920 WARN_ONCE(p
->last_eviction_seqno
!= p
->ef
->seqno
,
921 "Eviction fence mismatch\n");
923 /* Narrow window of overlap between restore and evict work
924 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
925 * unreserves KFD BOs, it is possible to evicted again. But
926 * restore has few more steps of finish. So lets wait for any
927 * previous restore work to complete
929 flush_delayed_work(&p
->restore_work
);
931 pr_debug("Started evicting pasid %d\n", p
->pasid
);
932 ret
= kfd_process_evict_queues(p
);
934 dma_fence_signal(p
->ef
);
935 dma_fence_put(p
->ef
);
937 queue_delayed_work(kfd_restore_wq
, &p
->restore_work
,
938 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS
));
940 pr_debug("Finished evicting pasid %d\n", p
->pasid
);
942 pr_err("Failed to evict queues of pasid %d\n", p
->pasid
);
945 static void restore_process_worker(struct work_struct
*work
)
947 struct delayed_work
*dwork
;
948 struct kfd_process
*p
;
949 struct kfd_process_device
*pdd
;
952 dwork
= to_delayed_work(work
);
954 /* Process termination destroys this worker thread. So during the
955 * lifetime of this thread, kfd_process p will be valid
957 p
= container_of(dwork
, struct kfd_process
, restore_work
);
959 /* Call restore_process_bos on the first KGD device. This function
960 * takes care of restoring the whole process including other devices.
961 * Restore can fail if enough memory is not available. If so,
964 pdd
= list_first_entry(&p
->per_device_data
,
965 struct kfd_process_device
,
968 pr_debug("Started restoring pasid %d\n", p
->pasid
);
970 /* Setting last_restore_timestamp before successful restoration.
971 * Otherwise this would have to be set by KGD (restore_process_bos)
972 * before KFD BOs are unreserved. If not, the process can be evicted
973 * again before the timestamp is set.
974 * If restore fails, the timestamp will be set again in the next
975 * attempt. This would mean that the minimum GPU quanta would be
976 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
980 p
->last_restore_timestamp
= get_jiffies_64();
981 ret
= amdgpu_amdkfd_gpuvm_restore_process_bos(p
->kgd_process_info
,
984 pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
985 p
->pasid
, PROCESS_BACK_OFF_TIME_MS
);
986 ret
= queue_delayed_work(kfd_restore_wq
, &p
->restore_work
,
987 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS
));
988 WARN(!ret
, "reschedule restore work failed\n");
992 ret
= kfd_process_restore_queues(p
);
994 pr_debug("Finished restoring pasid %d\n", p
->pasid
);
996 pr_err("Failed to restore queues of pasid %d\n", p
->pasid
);
999 void kfd_suspend_all_processes(void)
1001 struct kfd_process
*p
;
1003 int idx
= srcu_read_lock(&kfd_processes_srcu
);
1005 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
1006 cancel_delayed_work_sync(&p
->eviction_work
);
1007 cancel_delayed_work_sync(&p
->restore_work
);
1009 if (kfd_process_evict_queues(p
))
1010 pr_err("Failed to suspend process %d\n", p
->pasid
);
1011 dma_fence_signal(p
->ef
);
1012 dma_fence_put(p
->ef
);
1015 srcu_read_unlock(&kfd_processes_srcu
, idx
);
1018 int kfd_resume_all_processes(void)
1020 struct kfd_process
*p
;
1022 int ret
= 0, idx
= srcu_read_lock(&kfd_processes_srcu
);
1024 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
1025 if (!queue_delayed_work(kfd_restore_wq
, &p
->restore_work
, 0)) {
1026 pr_err("Restore process %d failed during resume\n",
1031 srcu_read_unlock(&kfd_processes_srcu
, idx
);
1035 int kfd_reserved_mem_mmap(struct kfd_dev
*dev
, struct kfd_process
*process
,
1036 struct vm_area_struct
*vma
)
1038 struct kfd_process_device
*pdd
;
1039 struct qcm_process_device
*qpd
;
1041 if ((vma
->vm_end
- vma
->vm_start
) != KFD_CWSR_TBA_TMA_SIZE
) {
1042 pr_err("Incorrect CWSR mapping size.\n");
1046 pdd
= kfd_get_process_device_data(dev
, process
);
1051 qpd
->cwsr_kaddr
= (void *)__get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
1052 get_order(KFD_CWSR_TBA_TMA_SIZE
));
1053 if (!qpd
->cwsr_kaddr
) {
1054 pr_err("Error allocating per process CWSR buffer.\n");
1058 vma
->vm_flags
|= VM_IO
| VM_DONTCOPY
| VM_DONTEXPAND
1059 | VM_NORESERVE
| VM_DONTDUMP
| VM_PFNMAP
;
1060 /* Mapping pages to user process */
1061 return remap_pfn_range(vma
, vma
->vm_start
,
1062 PFN_DOWN(__pa(qpd
->cwsr_kaddr
)),
1063 KFD_CWSR_TBA_TMA_SIZE
, vma
->vm_page_prot
);
1066 void kfd_flush_tlb(struct kfd_process_device
*pdd
)
1068 struct kfd_dev
*dev
= pdd
->dev
;
1069 const struct kfd2kgd_calls
*f2g
= dev
->kfd2kgd
;
1071 if (dev
->dqm
->sched_policy
== KFD_SCHED_POLICY_NO_HWS
) {
1072 /* Nothing to flush until a VMID is assigned, which
1073 * only happens when the first queue is created.
1076 f2g
->invalidate_tlbs_vmid(dev
->kgd
, pdd
->qpd
.vmid
);
1078 f2g
->invalidate_tlbs(dev
->kgd
, pdd
->process
->pasid
);
1082 #if defined(CONFIG_DEBUG_FS)
1084 int kfd_debugfs_mqds_by_process(struct seq_file
*m
, void *data
)
1086 struct kfd_process
*p
;
1090 int idx
= srcu_read_lock(&kfd_processes_srcu
);
1092 hash_for_each_rcu(kfd_processes_table
, temp
, p
, kfd_processes
) {
1093 seq_printf(m
, "Process %d PASID %d:\n",
1094 p
->lead_thread
->tgid
, p
->pasid
);
1096 mutex_lock(&p
->mutex
);
1097 r
= pqm_debugfs_mqds(m
, &p
->pqm
);
1098 mutex_unlock(&p
->mutex
);
1104 srcu_read_unlock(&kfd_processes_srcu
, idx
);