2 * Copyright 2008 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jerome Glisse <glisse@freedesktop.org>
27 #include <linux/pagemap.h>
28 #include <linux/sync_file.h>
30 #include <drm/amdgpu_drm.h>
31 #include <drm/drm_syncobj.h>
33 #include "amdgpu_trace.h"
35 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser
*p
,
36 struct drm_amdgpu_cs_chunk_fence
*data
,
39 struct drm_gem_object
*gobj
;
42 gobj
= drm_gem_object_lookup(p
->filp
, data
->handle
);
46 p
->uf_entry
.robj
= amdgpu_bo_ref(gem_to_amdgpu_bo(gobj
));
47 p
->uf_entry
.priority
= 0;
48 p
->uf_entry
.tv
.bo
= &p
->uf_entry
.robj
->tbo
;
49 p
->uf_entry
.tv
.shared
= true;
50 p
->uf_entry
.user_pages
= NULL
;
52 size
= amdgpu_bo_size(p
->uf_entry
.robj
);
53 if (size
!= PAGE_SIZE
|| (data
->offset
+ 8) > size
)
56 *offset
= data
->offset
;
58 drm_gem_object_put_unlocked(gobj
);
60 if (amdgpu_ttm_tt_get_usermm(p
->uf_entry
.robj
->tbo
.ttm
)) {
61 amdgpu_bo_unref(&p
->uf_entry
.robj
);
68 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser
*p
, void *data
)
70 struct amdgpu_fpriv
*fpriv
= p
->filp
->driver_priv
;
71 struct amdgpu_vm
*vm
= &fpriv
->vm
;
72 union drm_amdgpu_cs
*cs
= data
;
73 uint64_t *chunk_array_user
;
74 uint64_t *chunk_array
;
75 unsigned size
, num_ibs
= 0;
76 uint32_t uf_offset
= 0;
80 if (cs
->in
.num_chunks
== 0)
83 chunk_array
= kmalloc_array(cs
->in
.num_chunks
, sizeof(uint64_t), GFP_KERNEL
);
87 p
->ctx
= amdgpu_ctx_get(fpriv
, cs
->in
.ctx_id
);
93 /* skip guilty context job */
94 if (atomic_read(&p
->ctx
->guilty
) == 1) {
99 mutex_lock(&p
->ctx
->lock
);
102 chunk_array_user
= u64_to_user_ptr(cs
->in
.chunks
);
103 if (copy_from_user(chunk_array
, chunk_array_user
,
104 sizeof(uint64_t)*cs
->in
.num_chunks
)) {
109 p
->nchunks
= cs
->in
.num_chunks
;
110 p
->chunks
= kmalloc_array(p
->nchunks
, sizeof(struct amdgpu_cs_chunk
),
117 for (i
= 0; i
< p
->nchunks
; i
++) {
118 struct drm_amdgpu_cs_chunk __user
**chunk_ptr
= NULL
;
119 struct drm_amdgpu_cs_chunk user_chunk
;
120 uint32_t __user
*cdata
;
122 chunk_ptr
= u64_to_user_ptr(chunk_array
[i
]);
123 if (copy_from_user(&user_chunk
, chunk_ptr
,
124 sizeof(struct drm_amdgpu_cs_chunk
))) {
127 goto free_partial_kdata
;
129 p
->chunks
[i
].chunk_id
= user_chunk
.chunk_id
;
130 p
->chunks
[i
].length_dw
= user_chunk
.length_dw
;
132 size
= p
->chunks
[i
].length_dw
;
133 cdata
= u64_to_user_ptr(user_chunk
.chunk_data
);
135 p
->chunks
[i
].kdata
= kvmalloc_array(size
, sizeof(uint32_t), GFP_KERNEL
);
136 if (p
->chunks
[i
].kdata
== NULL
) {
139 goto free_partial_kdata
;
141 size
*= sizeof(uint32_t);
142 if (copy_from_user(p
->chunks
[i
].kdata
, cdata
, size
)) {
144 goto free_partial_kdata
;
147 switch (p
->chunks
[i
].chunk_id
) {
148 case AMDGPU_CHUNK_ID_IB
:
152 case AMDGPU_CHUNK_ID_FENCE
:
153 size
= sizeof(struct drm_amdgpu_cs_chunk_fence
);
154 if (p
->chunks
[i
].length_dw
* sizeof(uint32_t) < size
) {
156 goto free_partial_kdata
;
159 ret
= amdgpu_cs_user_fence_chunk(p
, p
->chunks
[i
].kdata
,
162 goto free_partial_kdata
;
166 case AMDGPU_CHUNK_ID_DEPENDENCIES
:
167 case AMDGPU_CHUNK_ID_SYNCOBJ_IN
:
168 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT
:
173 goto free_partial_kdata
;
177 ret
= amdgpu_job_alloc(p
->adev
, num_ibs
, &p
->job
, vm
);
181 if (p
->ctx
->vram_lost_counter
!= p
->job
->vram_lost_counter
) {
186 if (p
->uf_entry
.robj
)
187 p
->job
->uf_addr
= uf_offset
;
195 kvfree(p
->chunks
[i
].kdata
);
205 /* Convert microseconds to bytes. */
206 static u64
us_to_bytes(struct amdgpu_device
*adev
, s64 us
)
208 if (us
<= 0 || !adev
->mm_stats
.log2_max_MBps
)
211 /* Since accum_us is incremented by a million per second, just
212 * multiply it by the number of MB/s to get the number of bytes.
214 return us
<< adev
->mm_stats
.log2_max_MBps
;
217 static s64
bytes_to_us(struct amdgpu_device
*adev
, u64 bytes
)
219 if (!adev
->mm_stats
.log2_max_MBps
)
222 return bytes
>> adev
->mm_stats
.log2_max_MBps
;
225 /* Returns how many bytes TTM can move right now. If no bytes can be moved,
226 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
227 * which means it can go over the threshold once. If that happens, the driver
228 * will be in debt and no other buffer migrations can be done until that debt
231 * This approach allows moving a buffer of any size (it's important to allow
234 * The currency is simply time in microseconds and it increases as the clock
235 * ticks. The accumulated microseconds (us) are converted to bytes and
238 static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device
*adev
,
242 s64 time_us
, increment_us
;
243 u64 free_vram
, total_vram
, used_vram
;
245 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
248 * It means that in order to get full max MBps, at least 5 IBs per
249 * second must be submitted and not more than 200ms apart from each
252 const s64 us_upper_bound
= 200000;
254 if (!adev
->mm_stats
.log2_max_MBps
) {
260 total_vram
= adev
->mc
.real_vram_size
- adev
->vram_pin_size
;
261 used_vram
= amdgpu_vram_mgr_usage(&adev
->mman
.bdev
.man
[TTM_PL_VRAM
]);
262 free_vram
= used_vram
>= total_vram
? 0 : total_vram
- used_vram
;
264 spin_lock(&adev
->mm_stats
.lock
);
266 /* Increase the amount of accumulated us. */
267 time_us
= ktime_to_us(ktime_get());
268 increment_us
= time_us
- adev
->mm_stats
.last_update_us
;
269 adev
->mm_stats
.last_update_us
= time_us
;
270 adev
->mm_stats
.accum_us
= min(adev
->mm_stats
.accum_us
+ increment_us
,
273 /* This prevents the short period of low performance when the VRAM
274 * usage is low and the driver is in debt or doesn't have enough
275 * accumulated us to fill VRAM quickly.
277 * The situation can occur in these cases:
278 * - a lot of VRAM is freed by userspace
279 * - the presence of a big buffer causes a lot of evictions
280 * (solution: split buffers into smaller ones)
282 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
283 * accum_us to a positive number.
285 if (free_vram
>= 128 * 1024 * 1024 || free_vram
>= total_vram
/ 8) {
288 /* Be more aggresive on dGPUs. Try to fill a portion of free
291 if (!(adev
->flags
& AMD_IS_APU
))
292 min_us
= bytes_to_us(adev
, free_vram
/ 4);
294 min_us
= 0; /* Reset accum_us on APUs. */
296 adev
->mm_stats
.accum_us
= max(min_us
, adev
->mm_stats
.accum_us
);
299 /* This is set to 0 if the driver is in debt to disallow (optional)
302 *max_bytes
= us_to_bytes(adev
, adev
->mm_stats
.accum_us
);
304 /* Do the same for visible VRAM if half of it is free */
305 if (adev
->mc
.visible_vram_size
< adev
->mc
.real_vram_size
) {
306 u64 total_vis_vram
= adev
->mc
.visible_vram_size
;
308 amdgpu_vram_mgr_vis_usage(&adev
->mman
.bdev
.man
[TTM_PL_VRAM
]);
310 if (used_vis_vram
< total_vis_vram
) {
311 u64 free_vis_vram
= total_vis_vram
- used_vis_vram
;
312 adev
->mm_stats
.accum_us_vis
= min(adev
->mm_stats
.accum_us_vis
+
313 increment_us
, us_upper_bound
);
315 if (free_vis_vram
>= total_vis_vram
/ 2)
316 adev
->mm_stats
.accum_us_vis
=
317 max(bytes_to_us(adev
, free_vis_vram
/ 2),
318 adev
->mm_stats
.accum_us_vis
);
321 *max_vis_bytes
= us_to_bytes(adev
, adev
->mm_stats
.accum_us_vis
);
326 spin_unlock(&adev
->mm_stats
.lock
);
329 /* Report how many bytes have really been moved for the last command
330 * submission. This can result in a debt that can stop buffer migrations
333 void amdgpu_cs_report_moved_bytes(struct amdgpu_device
*adev
, u64 num_bytes
,
336 spin_lock(&adev
->mm_stats
.lock
);
337 adev
->mm_stats
.accum_us
-= bytes_to_us(adev
, num_bytes
);
338 adev
->mm_stats
.accum_us_vis
-= bytes_to_us(adev
, num_vis_bytes
);
339 spin_unlock(&adev
->mm_stats
.lock
);
342 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser
*p
,
343 struct amdgpu_bo
*bo
)
345 struct amdgpu_device
*adev
= amdgpu_ttm_adev(bo
->tbo
.bdev
);
346 struct ttm_operation_ctx ctx
= {
347 .interruptible
= true,
348 .no_wait_gpu
= false,
349 .allow_reserved_eviction
= false,
358 /* Don't move this buffer if we have depleted our allowance
359 * to move it. Don't move anything if the threshold is zero.
361 if (p
->bytes_moved
< p
->bytes_moved_threshold
) {
362 if (adev
->mc
.visible_vram_size
< adev
->mc
.real_vram_size
&&
363 (bo
->flags
& AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
)) {
364 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
365 * visible VRAM if we've depleted our allowance to do
368 if (p
->bytes_moved_vis
< p
->bytes_moved_vis_threshold
)
369 domain
= bo
->preferred_domains
;
371 domain
= bo
->allowed_domains
;
373 domain
= bo
->preferred_domains
;
376 domain
= bo
->allowed_domains
;
380 amdgpu_ttm_placement_from_domain(bo
, domain
);
381 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, &ctx
);
383 p
->bytes_moved
+= ctx
.bytes_moved
;
384 if (adev
->mc
.visible_vram_size
< adev
->mc
.real_vram_size
&&
385 bo
->tbo
.mem
.mem_type
== TTM_PL_VRAM
&&
386 bo
->tbo
.mem
.start
< adev
->mc
.visible_vram_size
>> PAGE_SHIFT
)
387 p
->bytes_moved_vis
+= ctx
.bytes_moved
;
389 if (unlikely(r
== -ENOMEM
) && domain
!= bo
->allowed_domains
) {
390 domain
= bo
->allowed_domains
;
397 /* Last resort, try to evict something from the current working set */
398 static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser
*p
,
399 struct amdgpu_bo
*validated
)
401 uint32_t domain
= validated
->allowed_domains
;
402 struct ttm_operation_ctx ctx
= { true, false };
408 for (;&p
->evictable
->tv
.head
!= &p
->validated
;
409 p
->evictable
= list_prev_entry(p
->evictable
, tv
.head
)) {
411 struct amdgpu_bo_list_entry
*candidate
= p
->evictable
;
412 struct amdgpu_bo
*bo
= candidate
->robj
;
413 struct amdgpu_device
*adev
= amdgpu_ttm_adev(bo
->tbo
.bdev
);
414 u64 initial_bytes_moved
, bytes_moved
;
415 bool update_bytes_moved_vis
;
418 /* If we reached our current BO we can forget it */
419 if (candidate
->robj
== validated
)
422 /* We can't move pinned BOs here */
426 other
= amdgpu_mem_type_to_domain(bo
->tbo
.mem
.mem_type
);
428 /* Check if this BO is in one of the domains we need space for */
429 if (!(other
& domain
))
432 /* Check if we can move this BO somewhere else */
433 other
= bo
->allowed_domains
& ~domain
;
437 /* Good we can try to move this BO somewhere else */
438 amdgpu_ttm_placement_from_domain(bo
, other
);
439 update_bytes_moved_vis
=
440 adev
->mc
.visible_vram_size
< adev
->mc
.real_vram_size
&&
441 bo
->tbo
.mem
.mem_type
== TTM_PL_VRAM
&&
442 bo
->tbo
.mem
.start
< adev
->mc
.visible_vram_size
>> PAGE_SHIFT
;
443 initial_bytes_moved
= atomic64_read(&adev
->num_bytes_moved
);
444 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, &ctx
);
445 bytes_moved
= atomic64_read(&adev
->num_bytes_moved
) -
447 p
->bytes_moved
+= bytes_moved
;
448 if (update_bytes_moved_vis
)
449 p
->bytes_moved_vis
+= bytes_moved
;
454 p
->evictable
= list_prev_entry(p
->evictable
, tv
.head
);
455 list_move(&candidate
->tv
.head
, &p
->validated
);
463 static int amdgpu_cs_validate(void *param
, struct amdgpu_bo
*bo
)
465 struct amdgpu_cs_parser
*p
= param
;
469 r
= amdgpu_cs_bo_validate(p
, bo
);
470 } while (r
== -ENOMEM
&& amdgpu_cs_try_evict(p
, bo
));
475 r
= amdgpu_cs_bo_validate(p
, bo
->shadow
);
480 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser
*p
,
481 struct list_head
*validated
)
483 struct ttm_operation_ctx ctx
= { true, false };
484 struct amdgpu_bo_list_entry
*lobj
;
487 list_for_each_entry(lobj
, validated
, tv
.head
) {
488 struct amdgpu_bo
*bo
= lobj
->robj
;
489 bool binding_userptr
= false;
490 struct mm_struct
*usermm
;
492 usermm
= amdgpu_ttm_tt_get_usermm(bo
->tbo
.ttm
);
493 if (usermm
&& usermm
!= current
->mm
)
496 /* Check if we have user pages and nobody bound the BO already */
497 if (amdgpu_ttm_tt_userptr_needs_pages(bo
->tbo
.ttm
) &&
499 amdgpu_ttm_placement_from_domain(bo
,
500 AMDGPU_GEM_DOMAIN_CPU
);
501 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, &ctx
);
504 amdgpu_ttm_tt_set_user_pages(bo
->tbo
.ttm
,
506 binding_userptr
= true;
509 if (p
->evictable
== lobj
)
512 r
= amdgpu_cs_validate(p
, bo
);
516 if (binding_userptr
) {
517 kvfree(lobj
->user_pages
);
518 lobj
->user_pages
= NULL
;
524 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser
*p
,
525 union drm_amdgpu_cs
*cs
)
527 struct amdgpu_fpriv
*fpriv
= p
->filp
->driver_priv
;
528 struct amdgpu_bo_list_entry
*e
;
529 struct list_head duplicates
;
530 unsigned i
, tries
= 10;
533 INIT_LIST_HEAD(&p
->validated
);
535 p
->bo_list
= amdgpu_bo_list_get(fpriv
, cs
->in
.bo_list_handle
);
537 amdgpu_bo_list_get_list(p
->bo_list
, &p
->validated
);
538 if (p
->bo_list
->first_userptr
!= p
->bo_list
->num_entries
)
539 p
->mn
= amdgpu_mn_get(p
->adev
);
542 INIT_LIST_HEAD(&duplicates
);
543 amdgpu_vm_get_pd_bo(&fpriv
->vm
, &p
->validated
, &p
->vm_pd
);
545 if (p
->uf_entry
.robj
)
546 list_add(&p
->uf_entry
.tv
.head
, &p
->validated
);
549 struct list_head need_pages
;
552 r
= ttm_eu_reserve_buffers(&p
->ticket
, &p
->validated
, true,
554 if (unlikely(r
!= 0)) {
555 if (r
!= -ERESTARTSYS
)
556 DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
557 goto error_free_pages
;
560 /* Without a BO list we don't have userptr BOs */
564 INIT_LIST_HEAD(&need_pages
);
565 for (i
= p
->bo_list
->first_userptr
;
566 i
< p
->bo_list
->num_entries
; ++i
) {
567 struct amdgpu_bo
*bo
;
569 e
= &p
->bo_list
->array
[i
];
572 if (amdgpu_ttm_tt_userptr_invalidated(bo
->tbo
.ttm
,
573 &e
->user_invalidated
) && e
->user_pages
) {
575 /* We acquired a page array, but somebody
576 * invalidated it. Free it and try again
578 release_pages(e
->user_pages
,
579 bo
->tbo
.ttm
->num_pages
);
580 kvfree(e
->user_pages
);
581 e
->user_pages
= NULL
;
584 if (amdgpu_ttm_tt_userptr_needs_pages(bo
->tbo
.ttm
) &&
586 list_del(&e
->tv
.head
);
587 list_add(&e
->tv
.head
, &need_pages
);
589 amdgpu_bo_unreserve(e
->robj
);
593 if (list_empty(&need_pages
))
596 /* Unreserve everything again. */
597 ttm_eu_backoff_reservation(&p
->ticket
, &p
->validated
);
599 /* We tried too many times, just abort */
602 DRM_ERROR("deadlock in %s\n", __func__
);
603 goto error_free_pages
;
606 /* Fill the page arrays for all userptrs. */
607 list_for_each_entry(e
, &need_pages
, tv
.head
) {
608 struct ttm_tt
*ttm
= e
->robj
->tbo
.ttm
;
610 e
->user_pages
= kvmalloc_array(ttm
->num_pages
,
611 sizeof(struct page
*),
612 GFP_KERNEL
| __GFP_ZERO
);
613 if (!e
->user_pages
) {
615 DRM_ERROR("calloc failure in %s\n", __func__
);
616 goto error_free_pages
;
619 r
= amdgpu_ttm_tt_get_user_pages(ttm
, e
->user_pages
);
621 DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
622 kvfree(e
->user_pages
);
623 e
->user_pages
= NULL
;
624 goto error_free_pages
;
629 list_splice(&need_pages
, &p
->validated
);
632 amdgpu_cs_get_threshold_for_moves(p
->adev
, &p
->bytes_moved_threshold
,
633 &p
->bytes_moved_vis_threshold
);
635 p
->bytes_moved_vis
= 0;
636 p
->evictable
= list_last_entry(&p
->validated
,
637 struct amdgpu_bo_list_entry
,
640 r
= amdgpu_vm_validate_pt_bos(p
->adev
, &fpriv
->vm
,
641 amdgpu_cs_validate
, p
);
643 DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
647 r
= amdgpu_cs_list_validate(p
, &duplicates
);
649 DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
653 r
= amdgpu_cs_list_validate(p
, &p
->validated
);
655 DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
659 amdgpu_cs_report_moved_bytes(p
->adev
, p
->bytes_moved
,
662 struct amdgpu_bo
*gds
= p
->bo_list
->gds_obj
;
663 struct amdgpu_bo
*gws
= p
->bo_list
->gws_obj
;
664 struct amdgpu_bo
*oa
= p
->bo_list
->oa_obj
;
665 struct amdgpu_vm
*vm
= &fpriv
->vm
;
668 for (i
= 0; i
< p
->bo_list
->num_entries
; i
++) {
669 struct amdgpu_bo
*bo
= p
->bo_list
->array
[i
].robj
;
671 p
->bo_list
->array
[i
].bo_va
= amdgpu_vm_bo_find(vm
, bo
);
675 p
->job
->gds_base
= amdgpu_bo_gpu_offset(gds
);
676 p
->job
->gds_size
= amdgpu_bo_size(gds
);
679 p
->job
->gws_base
= amdgpu_bo_gpu_offset(gws
);
680 p
->job
->gws_size
= amdgpu_bo_size(gws
);
683 p
->job
->oa_base
= amdgpu_bo_gpu_offset(oa
);
684 p
->job
->oa_size
= amdgpu_bo_size(oa
);
688 if (!r
&& p
->uf_entry
.robj
) {
689 struct amdgpu_bo
*uf
= p
->uf_entry
.robj
;
691 r
= amdgpu_ttm_alloc_gart(&uf
->tbo
);
692 p
->job
->uf_addr
+= amdgpu_bo_gpu_offset(uf
);
697 ttm_eu_backoff_reservation(&p
->ticket
, &p
->validated
);
702 for (i
= p
->bo_list
->first_userptr
;
703 i
< p
->bo_list
->num_entries
; ++i
) {
704 e
= &p
->bo_list
->array
[i
];
709 release_pages(e
->user_pages
,
710 e
->robj
->tbo
.ttm
->num_pages
);
711 kvfree(e
->user_pages
);
718 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser
*p
)
720 struct amdgpu_bo_list_entry
*e
;
723 list_for_each_entry(e
, &p
->validated
, tv
.head
) {
724 struct reservation_object
*resv
= e
->robj
->tbo
.resv
;
725 r
= amdgpu_sync_resv(p
->adev
, &p
->job
->sync
, resv
, p
->filp
,
726 amdgpu_bo_explicit_sync(e
->robj
));
735 * cs_parser_fini() - clean parser states
736 * @parser: parser structure holding parsing context.
737 * @error: error number
739 * If error is set than unvalidate buffer, otherwise just free memory
740 * used by parsing context.
742 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser
*parser
, int error
,
747 if (error
&& backoff
)
748 ttm_eu_backoff_reservation(&parser
->ticket
,
751 for (i
= 0; i
< parser
->num_post_dep_syncobjs
; i
++)
752 drm_syncobj_put(parser
->post_dep_syncobjs
[i
]);
753 kfree(parser
->post_dep_syncobjs
);
755 dma_fence_put(parser
->fence
);
758 mutex_unlock(&parser
->ctx
->lock
);
759 amdgpu_ctx_put(parser
->ctx
);
762 amdgpu_bo_list_put(parser
->bo_list
);
764 for (i
= 0; i
< parser
->nchunks
; i
++)
765 kvfree(parser
->chunks
[i
].kdata
);
766 kfree(parser
->chunks
);
768 amdgpu_job_free(parser
->job
);
769 amdgpu_bo_unref(&parser
->uf_entry
.robj
);
772 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser
*p
)
774 struct amdgpu_device
*adev
= p
->adev
;
775 struct amdgpu_fpriv
*fpriv
= p
->filp
->driver_priv
;
776 struct amdgpu_vm
*vm
= &fpriv
->vm
;
777 struct amdgpu_bo_va
*bo_va
;
778 struct amdgpu_bo
*bo
;
781 r
= amdgpu_vm_clear_freed(adev
, vm
, NULL
);
785 r
= amdgpu_vm_bo_update(adev
, fpriv
->prt_va
, false);
789 r
= amdgpu_sync_fence(adev
, &p
->job
->sync
,
790 fpriv
->prt_va
->last_pt_update
, false);
794 if (amdgpu_sriov_vf(adev
)) {
797 bo_va
= fpriv
->csa_va
;
799 r
= amdgpu_vm_bo_update(adev
, bo_va
, false);
803 f
= bo_va
->last_pt_update
;
804 r
= amdgpu_sync_fence(adev
, &p
->job
->sync
, f
, false);
810 for (i
= 0; i
< p
->bo_list
->num_entries
; i
++) {
813 /* ignore duplicates */
814 bo
= p
->bo_list
->array
[i
].robj
;
818 bo_va
= p
->bo_list
->array
[i
].bo_va
;
822 r
= amdgpu_vm_bo_update(adev
, bo_va
, false);
826 f
= bo_va
->last_pt_update
;
827 r
= amdgpu_sync_fence(adev
, &p
->job
->sync
, f
, false);
834 r
= amdgpu_vm_handle_moved(adev
, vm
);
838 r
= amdgpu_vm_update_directories(adev
, vm
);
842 r
= amdgpu_sync_fence(adev
, &p
->job
->sync
, vm
->last_update
, false);
846 if (amdgpu_vm_debug
&& p
->bo_list
) {
847 /* Invalidate all BOs to test for userspace bugs */
848 for (i
= 0; i
< p
->bo_list
->num_entries
; i
++) {
849 /* ignore duplicates */
850 bo
= p
->bo_list
->array
[i
].robj
;
854 amdgpu_vm_bo_invalidate(adev
, bo
, false);
861 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device
*adev
,
862 struct amdgpu_cs_parser
*p
)
864 struct amdgpu_fpriv
*fpriv
= p
->filp
->driver_priv
;
865 struct amdgpu_vm
*vm
= &fpriv
->vm
;
866 struct amdgpu_ring
*ring
= p
->job
->ring
;
869 /* Only for UVD/VCE VM emulation */
870 if (p
->job
->ring
->funcs
->parse_cs
) {
873 for (i
= 0, j
= 0; i
< p
->nchunks
&& j
< p
->job
->num_ibs
; i
++) {
874 struct drm_amdgpu_cs_chunk_ib
*chunk_ib
;
875 struct amdgpu_bo_va_mapping
*m
;
876 struct amdgpu_bo
*aobj
= NULL
;
877 struct amdgpu_cs_chunk
*chunk
;
878 uint64_t offset
, va_start
;
879 struct amdgpu_ib
*ib
;
882 chunk
= &p
->chunks
[i
];
883 ib
= &p
->job
->ibs
[j
];
884 chunk_ib
= chunk
->kdata
;
886 if (chunk
->chunk_id
!= AMDGPU_CHUNK_ID_IB
)
889 va_start
= chunk_ib
->va_start
& AMDGPU_VA_HOLE_MASK
;
890 r
= amdgpu_cs_find_mapping(p
, va_start
, &aobj
, &m
);
892 DRM_ERROR("IB va_start is invalid\n");
896 if ((va_start
+ chunk_ib
->ib_bytes
) >
897 (m
->last
+ 1) * AMDGPU_GPU_PAGE_SIZE
) {
898 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
902 /* the IB should be reserved at this point */
903 r
= amdgpu_bo_kmap(aobj
, (void **)&kptr
);
908 offset
= m
->start
* AMDGPU_GPU_PAGE_SIZE
;
909 kptr
+= va_start
- offset
;
911 memcpy(ib
->ptr
, kptr
, chunk_ib
->ib_bytes
);
912 amdgpu_bo_kunmap(aobj
);
914 r
= amdgpu_ring_parse_cs(ring
, p
, j
);
923 p
->job
->vm_pd_addr
= amdgpu_bo_gpu_offset(vm
->root
.base
.bo
);
925 r
= amdgpu_bo_vm_update_pte(p
);
930 return amdgpu_cs_sync_rings(p
);
933 static int amdgpu_cs_ib_fill(struct amdgpu_device
*adev
,
934 struct amdgpu_cs_parser
*parser
)
936 struct amdgpu_fpriv
*fpriv
= parser
->filp
->driver_priv
;
937 struct amdgpu_vm
*vm
= &fpriv
->vm
;
939 int r
, ce_preempt
= 0, de_preempt
= 0;
941 for (i
= 0, j
= 0; i
< parser
->nchunks
&& j
< parser
->job
->num_ibs
; i
++) {
942 struct amdgpu_cs_chunk
*chunk
;
943 struct amdgpu_ib
*ib
;
944 struct drm_amdgpu_cs_chunk_ib
*chunk_ib
;
945 struct amdgpu_ring
*ring
;
947 chunk
= &parser
->chunks
[i
];
948 ib
= &parser
->job
->ibs
[j
];
949 chunk_ib
= (struct drm_amdgpu_cs_chunk_ib
*)chunk
->kdata
;
951 if (chunk
->chunk_id
!= AMDGPU_CHUNK_ID_IB
)
954 if (chunk_ib
->ip_type
== AMDGPU_HW_IP_GFX
&& amdgpu_sriov_vf(adev
)) {
955 if (chunk_ib
->flags
& AMDGPU_IB_FLAG_PREEMPT
) {
956 if (chunk_ib
->flags
& AMDGPU_IB_FLAG_CE
)
962 /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
963 if (ce_preempt
> 1 || de_preempt
> 1)
967 r
= amdgpu_queue_mgr_map(adev
, &parser
->ctx
->queue_mgr
, chunk_ib
->ip_type
,
968 chunk_ib
->ip_instance
, chunk_ib
->ring
, &ring
);
972 if (chunk_ib
->flags
& AMDGPU_IB_FLAG_PREAMBLE
) {
973 parser
->job
->preamble_status
|= AMDGPU_PREAMBLE_IB_PRESENT
;
974 if (!parser
->ctx
->preamble_presented
) {
975 parser
->job
->preamble_status
|= AMDGPU_PREAMBLE_IB_PRESENT_FIRST
;
976 parser
->ctx
->preamble_presented
= true;
980 if (parser
->job
->ring
&& parser
->job
->ring
!= ring
)
983 parser
->job
->ring
= ring
;
985 r
= amdgpu_ib_get(adev
, vm
,
986 ring
->funcs
->parse_cs
? chunk_ib
->ib_bytes
: 0,
989 DRM_ERROR("Failed to get ib !\n");
993 ib
->gpu_addr
= chunk_ib
->va_start
;
994 ib
->length_dw
= chunk_ib
->ib_bytes
/ 4;
995 ib
->flags
= chunk_ib
->flags
;
1000 /* UVD & VCE fw doesn't support user fences */
1001 if (parser
->job
->uf_addr
&& (
1002 parser
->job
->ring
->funcs
->type
== AMDGPU_RING_TYPE_UVD
||
1003 parser
->job
->ring
->funcs
->type
== AMDGPU_RING_TYPE_VCE
))
1006 return amdgpu_ctx_wait_prev_fence(parser
->ctx
, parser
->job
->ring
->idx
);
1009 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser
*p
,
1010 struct amdgpu_cs_chunk
*chunk
)
1012 struct amdgpu_fpriv
*fpriv
= p
->filp
->driver_priv
;
1015 struct drm_amdgpu_cs_chunk_dep
*deps
;
1017 deps
= (struct drm_amdgpu_cs_chunk_dep
*)chunk
->kdata
;
1018 num_deps
= chunk
->length_dw
* 4 /
1019 sizeof(struct drm_amdgpu_cs_chunk_dep
);
1021 for (i
= 0; i
< num_deps
; ++i
) {
1022 struct amdgpu_ring
*ring
;
1023 struct amdgpu_ctx
*ctx
;
1024 struct dma_fence
*fence
;
1026 ctx
= amdgpu_ctx_get(fpriv
, deps
[i
].ctx_id
);
1030 r
= amdgpu_queue_mgr_map(p
->adev
, &ctx
->queue_mgr
,
1032 deps
[i
].ip_instance
,
1033 deps
[i
].ring
, &ring
);
1035 amdgpu_ctx_put(ctx
);
1039 fence
= amdgpu_ctx_get_fence(ctx
, ring
,
1041 if (IS_ERR(fence
)) {
1043 amdgpu_ctx_put(ctx
);
1046 r
= amdgpu_sync_fence(p
->adev
, &p
->job
->sync
, fence
,
1048 dma_fence_put(fence
);
1049 amdgpu_ctx_put(ctx
);
1057 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser
*p
,
1061 struct dma_fence
*fence
;
1062 r
= drm_syncobj_find_fence(p
->filp
, handle
, &fence
);
1066 r
= amdgpu_sync_fence(p
->adev
, &p
->job
->sync
, fence
, true);
1067 dma_fence_put(fence
);
1072 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser
*p
,
1073 struct amdgpu_cs_chunk
*chunk
)
1077 struct drm_amdgpu_cs_chunk_sem
*deps
;
1079 deps
= (struct drm_amdgpu_cs_chunk_sem
*)chunk
->kdata
;
1080 num_deps
= chunk
->length_dw
* 4 /
1081 sizeof(struct drm_amdgpu_cs_chunk_sem
);
1083 for (i
= 0; i
< num_deps
; ++i
) {
1084 r
= amdgpu_syncobj_lookup_and_add_to_sync(p
, deps
[i
].handle
);
1091 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser
*p
,
1092 struct amdgpu_cs_chunk
*chunk
)
1096 struct drm_amdgpu_cs_chunk_sem
*deps
;
1097 deps
= (struct drm_amdgpu_cs_chunk_sem
*)chunk
->kdata
;
1098 num_deps
= chunk
->length_dw
* 4 /
1099 sizeof(struct drm_amdgpu_cs_chunk_sem
);
1101 p
->post_dep_syncobjs
= kmalloc_array(num_deps
,
1102 sizeof(struct drm_syncobj
*),
1104 p
->num_post_dep_syncobjs
= 0;
1106 if (!p
->post_dep_syncobjs
)
1109 for (i
= 0; i
< num_deps
; ++i
) {
1110 p
->post_dep_syncobjs
[i
] = drm_syncobj_find(p
->filp
, deps
[i
].handle
);
1111 if (!p
->post_dep_syncobjs
[i
])
1113 p
->num_post_dep_syncobjs
++;
1118 static int amdgpu_cs_dependencies(struct amdgpu_device
*adev
,
1119 struct amdgpu_cs_parser
*p
)
1123 for (i
= 0; i
< p
->nchunks
; ++i
) {
1124 struct amdgpu_cs_chunk
*chunk
;
1126 chunk
= &p
->chunks
[i
];
1128 if (chunk
->chunk_id
== AMDGPU_CHUNK_ID_DEPENDENCIES
) {
1129 r
= amdgpu_cs_process_fence_dep(p
, chunk
);
1132 } else if (chunk
->chunk_id
== AMDGPU_CHUNK_ID_SYNCOBJ_IN
) {
1133 r
= amdgpu_cs_process_syncobj_in_dep(p
, chunk
);
1136 } else if (chunk
->chunk_id
== AMDGPU_CHUNK_ID_SYNCOBJ_OUT
) {
1137 r
= amdgpu_cs_process_syncobj_out_dep(p
, chunk
);
1146 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser
*p
)
1150 for (i
= 0; i
< p
->num_post_dep_syncobjs
; ++i
)
1151 drm_syncobj_replace_fence(p
->post_dep_syncobjs
[i
], p
->fence
);
1154 static int amdgpu_cs_submit(struct amdgpu_cs_parser
*p
,
1155 union drm_amdgpu_cs
*cs
)
1157 struct amdgpu_ring
*ring
= p
->job
->ring
;
1158 struct drm_sched_entity
*entity
= &p
->ctx
->rings
[ring
->idx
].entity
;
1159 struct amdgpu_job
*job
;
1165 amdgpu_mn_lock(p
->mn
);
1167 for (i
= p
->bo_list
->first_userptr
;
1168 i
< p
->bo_list
->num_entries
; ++i
) {
1169 struct amdgpu_bo
*bo
= p
->bo_list
->array
[i
].robj
;
1171 if (amdgpu_ttm_tt_userptr_needs_pages(bo
->tbo
.ttm
)) {
1172 amdgpu_mn_unlock(p
->mn
);
1173 return -ERESTARTSYS
;
1181 r
= drm_sched_job_init(&job
->base
, &ring
->sched
, entity
, p
->filp
);
1183 amdgpu_job_free(job
);
1184 amdgpu_mn_unlock(p
->mn
);
1188 job
->owner
= p
->filp
;
1189 job
->fence_ctx
= entity
->fence_context
;
1190 p
->fence
= dma_fence_get(&job
->base
.s_fence
->finished
);
1192 r
= amdgpu_ctx_add_fence(p
->ctx
, ring
, p
->fence
, &seq
);
1194 dma_fence_put(p
->fence
);
1195 dma_fence_put(&job
->base
.s_fence
->finished
);
1196 amdgpu_job_free(job
);
1197 amdgpu_mn_unlock(p
->mn
);
1201 amdgpu_cs_post_dependencies(p
);
1203 cs
->out
.handle
= seq
;
1204 job
->uf_sequence
= seq
;
1206 amdgpu_job_free_resources(job
);
1207 amdgpu_ring_priority_get(job
->ring
, job
->base
.s_priority
);
1209 trace_amdgpu_cs_ioctl(job
);
1210 drm_sched_entity_push_job(&job
->base
, entity
);
1212 ttm_eu_fence_buffer_objects(&p
->ticket
, &p
->validated
, p
->fence
);
1213 amdgpu_mn_unlock(p
->mn
);
1218 int amdgpu_cs_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*filp
)
1220 struct amdgpu_device
*adev
= dev
->dev_private
;
1221 union drm_amdgpu_cs
*cs
= data
;
1222 struct amdgpu_cs_parser parser
= {};
1223 bool reserved_buffers
= false;
1226 if (!adev
->accel_working
)
1232 r
= amdgpu_cs_parser_init(&parser
, data
);
1234 DRM_ERROR("Failed to initialize parser !\n");
1238 r
= amdgpu_cs_ib_fill(adev
, &parser
);
1242 r
= amdgpu_cs_parser_bos(&parser
, data
);
1245 DRM_ERROR("Not enough memory for command submission!\n");
1246 else if (r
!= -ERESTARTSYS
)
1247 DRM_ERROR("Failed to process the buffer list %d!\n", r
);
1251 reserved_buffers
= true;
1253 r
= amdgpu_cs_dependencies(adev
, &parser
);
1255 DRM_ERROR("Failed in the dependencies handling %d!\n", r
);
1259 for (i
= 0; i
< parser
.job
->num_ibs
; i
++)
1260 trace_amdgpu_cs(&parser
, i
);
1262 r
= amdgpu_cs_ib_vm_chunk(adev
, &parser
);
1266 r
= amdgpu_cs_submit(&parser
, cs
);
1269 amdgpu_cs_parser_fini(&parser
, r
, reserved_buffers
);
1274 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1277 * @data: data from userspace
1278 * @filp: file private
1280 * Wait for the command submission identified by handle to finish.
1282 int amdgpu_cs_wait_ioctl(struct drm_device
*dev
, void *data
,
1283 struct drm_file
*filp
)
1285 union drm_amdgpu_wait_cs
*wait
= data
;
1286 struct amdgpu_device
*adev
= dev
->dev_private
;
1287 unsigned long timeout
= amdgpu_gem_timeout(wait
->in
.timeout
);
1288 struct amdgpu_ring
*ring
= NULL
;
1289 struct amdgpu_ctx
*ctx
;
1290 struct dma_fence
*fence
;
1293 ctx
= amdgpu_ctx_get(filp
->driver_priv
, wait
->in
.ctx_id
);
1297 r
= amdgpu_queue_mgr_map(adev
, &ctx
->queue_mgr
,
1298 wait
->in
.ip_type
, wait
->in
.ip_instance
,
1299 wait
->in
.ring
, &ring
);
1301 amdgpu_ctx_put(ctx
);
1305 fence
= amdgpu_ctx_get_fence(ctx
, ring
, wait
->in
.handle
);
1309 r
= dma_fence_wait_timeout(fence
, true, timeout
);
1310 if (r
> 0 && fence
->error
)
1312 dma_fence_put(fence
);
1316 amdgpu_ctx_put(ctx
);
1320 memset(wait
, 0, sizeof(*wait
));
1321 wait
->out
.status
= (r
== 0);
1327 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1329 * @adev: amdgpu device
1330 * @filp: file private
1331 * @user: drm_amdgpu_fence copied from user space
1333 static struct dma_fence
*amdgpu_cs_get_fence(struct amdgpu_device
*adev
,
1334 struct drm_file
*filp
,
1335 struct drm_amdgpu_fence
*user
)
1337 struct amdgpu_ring
*ring
;
1338 struct amdgpu_ctx
*ctx
;
1339 struct dma_fence
*fence
;
1342 ctx
= amdgpu_ctx_get(filp
->driver_priv
, user
->ctx_id
);
1344 return ERR_PTR(-EINVAL
);
1346 r
= amdgpu_queue_mgr_map(adev
, &ctx
->queue_mgr
, user
->ip_type
,
1347 user
->ip_instance
, user
->ring
, &ring
);
1349 amdgpu_ctx_put(ctx
);
1353 fence
= amdgpu_ctx_get_fence(ctx
, ring
, user
->seq_no
);
1354 amdgpu_ctx_put(ctx
);
1359 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device
*dev
, void *data
,
1360 struct drm_file
*filp
)
1362 struct amdgpu_device
*adev
= dev
->dev_private
;
1363 union drm_amdgpu_fence_to_handle
*info
= data
;
1364 struct dma_fence
*fence
;
1365 struct drm_syncobj
*syncobj
;
1366 struct sync_file
*sync_file
;
1369 fence
= amdgpu_cs_get_fence(adev
, filp
, &info
->in
.fence
);
1371 return PTR_ERR(fence
);
1373 switch (info
->in
.what
) {
1374 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ
:
1375 r
= drm_syncobj_create(&syncobj
, 0, fence
);
1376 dma_fence_put(fence
);
1379 r
= drm_syncobj_get_handle(filp
, syncobj
, &info
->out
.handle
);
1380 drm_syncobj_put(syncobj
);
1383 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD
:
1384 r
= drm_syncobj_create(&syncobj
, 0, fence
);
1385 dma_fence_put(fence
);
1388 r
= drm_syncobj_get_fd(syncobj
, (int*)&info
->out
.handle
);
1389 drm_syncobj_put(syncobj
);
1392 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD
:
1393 fd
= get_unused_fd_flags(O_CLOEXEC
);
1395 dma_fence_put(fence
);
1399 sync_file
= sync_file_create(fence
);
1400 dma_fence_put(fence
);
1406 fd_install(fd
, sync_file
->file
);
1407 info
->out
.handle
= fd
;
1416 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1418 * @adev: amdgpu device
1419 * @filp: file private
1420 * @wait: wait parameters
1421 * @fences: array of drm_amdgpu_fence
1423 static int amdgpu_cs_wait_all_fences(struct amdgpu_device
*adev
,
1424 struct drm_file
*filp
,
1425 union drm_amdgpu_wait_fences
*wait
,
1426 struct drm_amdgpu_fence
*fences
)
1428 uint32_t fence_count
= wait
->in
.fence_count
;
1432 for (i
= 0; i
< fence_count
; i
++) {
1433 struct dma_fence
*fence
;
1434 unsigned long timeout
= amdgpu_gem_timeout(wait
->in
.timeout_ns
);
1436 fence
= amdgpu_cs_get_fence(adev
, filp
, &fences
[i
]);
1438 return PTR_ERR(fence
);
1442 r
= dma_fence_wait_timeout(fence
, true, timeout
);
1443 dma_fence_put(fence
);
1451 return fence
->error
;
1454 memset(wait
, 0, sizeof(*wait
));
1455 wait
->out
.status
= (r
> 0);
1461 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1463 * @adev: amdgpu device
1464 * @filp: file private
1465 * @wait: wait parameters
1466 * @fences: array of drm_amdgpu_fence
1468 static int amdgpu_cs_wait_any_fence(struct amdgpu_device
*adev
,
1469 struct drm_file
*filp
,
1470 union drm_amdgpu_wait_fences
*wait
,
1471 struct drm_amdgpu_fence
*fences
)
1473 unsigned long timeout
= amdgpu_gem_timeout(wait
->in
.timeout_ns
);
1474 uint32_t fence_count
= wait
->in
.fence_count
;
1475 uint32_t first
= ~0;
1476 struct dma_fence
**array
;
1480 /* Prepare the fence array */
1481 array
= kcalloc(fence_count
, sizeof(struct dma_fence
*), GFP_KERNEL
);
1486 for (i
= 0; i
< fence_count
; i
++) {
1487 struct dma_fence
*fence
;
1489 fence
= amdgpu_cs_get_fence(adev
, filp
, &fences
[i
]);
1490 if (IS_ERR(fence
)) {
1492 goto err_free_fence_array
;
1495 } else { /* NULL, the fence has been already signaled */
1502 r
= dma_fence_wait_any_timeout(array
, fence_count
, true, timeout
,
1505 goto err_free_fence_array
;
1508 memset(wait
, 0, sizeof(*wait
));
1509 wait
->out
.status
= (r
> 0);
1510 wait
->out
.first_signaled
= first
;
1512 if (first
< fence_count
&& array
[first
])
1513 r
= array
[first
]->error
;
1517 err_free_fence_array
:
1518 for (i
= 0; i
< fence_count
; i
++)
1519 dma_fence_put(array
[i
]);
1526 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1529 * @data: data from userspace
1530 * @filp: file private
1532 int amdgpu_cs_wait_fences_ioctl(struct drm_device
*dev
, void *data
,
1533 struct drm_file
*filp
)
1535 struct amdgpu_device
*adev
= dev
->dev_private
;
1536 union drm_amdgpu_wait_fences
*wait
= data
;
1537 uint32_t fence_count
= wait
->in
.fence_count
;
1538 struct drm_amdgpu_fence
*fences_user
;
1539 struct drm_amdgpu_fence
*fences
;
1542 /* Get the fences from userspace */
1543 fences
= kmalloc_array(fence_count
, sizeof(struct drm_amdgpu_fence
),
1548 fences_user
= u64_to_user_ptr(wait
->in
.fences
);
1549 if (copy_from_user(fences
, fences_user
,
1550 sizeof(struct drm_amdgpu_fence
) * fence_count
)) {
1552 goto err_free_fences
;
1555 if (wait
->in
.wait_all
)
1556 r
= amdgpu_cs_wait_all_fences(adev
, filp
, wait
, fences
);
1558 r
= amdgpu_cs_wait_any_fence(adev
, filp
, wait
, fences
);
1567 * amdgpu_cs_find_bo_va - find bo_va for VM address
1569 * @parser: command submission parser context
1571 * @bo: resulting BO of the mapping found
1573 * Search the buffer objects in the command submission context for a certain
1574 * virtual memory address. Returns allocation structure when found, NULL
1577 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser
*parser
,
1578 uint64_t addr
, struct amdgpu_bo
**bo
,
1579 struct amdgpu_bo_va_mapping
**map
)
1581 struct amdgpu_fpriv
*fpriv
= parser
->filp
->driver_priv
;
1582 struct ttm_operation_ctx ctx
= { false, false };
1583 struct amdgpu_vm
*vm
= &fpriv
->vm
;
1584 struct amdgpu_bo_va_mapping
*mapping
;
1587 addr
/= AMDGPU_GPU_PAGE_SIZE
;
1589 mapping
= amdgpu_vm_bo_lookup_mapping(vm
, addr
);
1590 if (!mapping
|| !mapping
->bo_va
|| !mapping
->bo_va
->base
.bo
)
1593 *bo
= mapping
->bo_va
->base
.bo
;
1596 /* Double check that the BO is reserved by this CS */
1597 if (READ_ONCE((*bo
)->tbo
.resv
->lock
.ctx
) != &parser
->ticket
)
1600 if (!((*bo
)->flags
& AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
)) {
1601 (*bo
)->flags
|= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
;
1602 amdgpu_ttm_placement_from_domain(*bo
, (*bo
)->allowed_domains
);
1603 r
= ttm_bo_validate(&(*bo
)->tbo
, &(*bo
)->placement
, &ctx
);
1608 return amdgpu_ttm_alloc_gart(&(*bo
)->tbo
);