2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: monk liu <monk.liu@amd.com>
25 #include <drm/drm_auth.h>
27 #include "amdgpu_sched.h"
28 #include "amdgpu_ras.h"
30 #define to_amdgpu_ctx_entity(e) \
31 container_of((e), struct amdgpu_ctx_entity, entity)
33 const unsigned int amdgpu_ctx_num_entities
[AMDGPU_HW_IP_NUM
] = {
34 [AMDGPU_HW_IP_GFX
] = 1,
35 [AMDGPU_HW_IP_COMPUTE
] = 4,
36 [AMDGPU_HW_IP_DMA
] = 2,
37 [AMDGPU_HW_IP_UVD
] = 1,
38 [AMDGPU_HW_IP_VCE
] = 1,
39 [AMDGPU_HW_IP_UVD_ENC
] = 1,
40 [AMDGPU_HW_IP_VCN_DEC
] = 1,
41 [AMDGPU_HW_IP_VCN_ENC
] = 1,
42 [AMDGPU_HW_IP_VCN_JPEG
] = 1,
45 static int amdgpu_ctx_total_num_entities(void)
47 unsigned i
, num_entities
= 0;
49 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
)
50 num_entities
+= amdgpu_ctx_num_entities
[i
];
55 static int amdgpu_ctx_priority_permit(struct drm_file
*filp
,
56 enum drm_sched_priority priority
)
58 /* NORMAL and below are accessible by everyone */
59 if (priority
<= DRM_SCHED_PRIORITY_NORMAL
)
62 if (capable(CAP_SYS_NICE
))
65 if (drm_is_current_master(filp
))
71 static int amdgpu_ctx_init(struct amdgpu_device
*adev
,
72 enum drm_sched_priority priority
,
73 struct drm_file
*filp
,
74 struct amdgpu_ctx
*ctx
)
76 unsigned num_entities
= amdgpu_ctx_total_num_entities();
80 if (priority
< 0 || priority
>= DRM_SCHED_PRIORITY_MAX
)
83 r
= amdgpu_ctx_priority_permit(filp
, priority
);
87 memset(ctx
, 0, sizeof(*ctx
));
90 ctx
->fences
= kcalloc(amdgpu_sched_jobs
* num_entities
,
91 sizeof(struct dma_fence
*), GFP_KERNEL
);
95 ctx
->entities
[0] = kcalloc(num_entities
,
96 sizeof(struct amdgpu_ctx_entity
),
98 if (!ctx
->entities
[0]) {
100 goto error_free_fences
;
103 for (i
= 0; i
< num_entities
; ++i
) {
104 struct amdgpu_ctx_entity
*entity
= &ctx
->entities
[0][i
];
106 entity
->sequence
= 1;
107 entity
->fences
= &ctx
->fences
[amdgpu_sched_jobs
* i
];
109 for (i
= 1; i
< AMDGPU_HW_IP_NUM
; ++i
)
110 ctx
->entities
[i
] = ctx
->entities
[i
- 1] +
111 amdgpu_ctx_num_entities
[i
- 1];
113 kref_init(&ctx
->refcount
);
114 spin_lock_init(&ctx
->ring_lock
);
115 mutex_init(&ctx
->lock
);
117 ctx
->reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
118 ctx
->reset_counter_query
= ctx
->reset_counter
;
119 ctx
->vram_lost_counter
= atomic_read(&adev
->vram_lost_counter
);
120 ctx
->init_priority
= priority
;
121 ctx
->override_priority
= DRM_SCHED_PRIORITY_UNSET
;
123 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
124 struct drm_gpu_scheduler
**scheds
;
125 struct drm_gpu_scheduler
*sched
;
126 unsigned num_scheds
= 0;
129 case AMDGPU_HW_IP_GFX
:
130 sched
= &adev
->gfx
.gfx_ring
[0].sched
;
134 case AMDGPU_HW_IP_COMPUTE
:
135 scheds
= adev
->gfx
.compute_sched
;
136 num_scheds
= adev
->gfx
.num_compute_sched
;
138 case AMDGPU_HW_IP_DMA
:
139 scheds
= adev
->sdma
.sdma_sched
;
140 num_scheds
= adev
->sdma
.num_sdma_sched
;
142 case AMDGPU_HW_IP_UVD
:
143 sched
= &adev
->uvd
.inst
[0].ring
.sched
;
147 case AMDGPU_HW_IP_VCE
:
148 sched
= &adev
->vce
.ring
[0].sched
;
152 case AMDGPU_HW_IP_UVD_ENC
:
153 sched
= &adev
->uvd
.inst
[0].ring_enc
[0].sched
;
157 case AMDGPU_HW_IP_VCN_DEC
:
158 scheds
= adev
->vcn
.vcn_dec_sched
;
159 num_scheds
= adev
->vcn
.num_vcn_dec_sched
;
161 case AMDGPU_HW_IP_VCN_ENC
:
162 scheds
= adev
->vcn
.vcn_enc_sched
;
163 num_scheds
= adev
->vcn
.num_vcn_enc_sched
;
165 case AMDGPU_HW_IP_VCN_JPEG
:
166 scheds
= adev
->jpeg
.jpeg_sched
;
167 num_scheds
= adev
->jpeg
.num_jpeg_sched
;
171 for (j
= 0; j
< amdgpu_ctx_num_entities
[i
]; ++j
)
172 r
= drm_sched_entity_init(&ctx
->entities
[i
][j
].entity
,
174 num_scheds
, &ctx
->guilty
);
176 goto error_cleanup_entities
;
181 error_cleanup_entities
:
182 for (i
= 0; i
< num_entities
; ++i
)
183 drm_sched_entity_destroy(&ctx
->entities
[0][i
].entity
);
184 kfree(ctx
->entities
[0]);
192 static void amdgpu_ctx_fini(struct kref
*ref
)
194 struct amdgpu_ctx
*ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
195 unsigned num_entities
= amdgpu_ctx_total_num_entities();
196 struct amdgpu_device
*adev
= ctx
->adev
;
202 for (i
= 0; i
< num_entities
; ++i
)
203 for (j
= 0; j
< amdgpu_sched_jobs
; ++j
)
204 dma_fence_put(ctx
->entities
[0][i
].fences
[j
]);
206 kfree(ctx
->entities
[0]);
208 mutex_destroy(&ctx
->lock
);
213 int amdgpu_ctx_get_entity(struct amdgpu_ctx
*ctx
, u32 hw_ip
, u32 instance
,
214 u32 ring
, struct drm_sched_entity
**entity
)
216 if (hw_ip
>= AMDGPU_HW_IP_NUM
) {
217 DRM_ERROR("unknown HW IP type: %d\n", hw_ip
);
221 /* Right now all IPs have only one instance - multiple rings. */
223 DRM_DEBUG("invalid ip instance: %d\n", instance
);
227 if (ring
>= amdgpu_ctx_num_entities
[hw_ip
]) {
228 DRM_DEBUG("invalid ring: %d %d\n", hw_ip
, ring
);
232 *entity
= &ctx
->entities
[hw_ip
][ring
].entity
;
236 static int amdgpu_ctx_alloc(struct amdgpu_device
*adev
,
237 struct amdgpu_fpriv
*fpriv
,
238 struct drm_file
*filp
,
239 enum drm_sched_priority priority
,
242 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
243 struct amdgpu_ctx
*ctx
;
246 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
250 mutex_lock(&mgr
->lock
);
251 r
= idr_alloc(&mgr
->ctx_handles
, ctx
, 1, AMDGPU_VM_MAX_NUM_CTX
, GFP_KERNEL
);
253 mutex_unlock(&mgr
->lock
);
259 r
= amdgpu_ctx_init(adev
, priority
, filp
, ctx
);
261 idr_remove(&mgr
->ctx_handles
, *id
);
265 mutex_unlock(&mgr
->lock
);
269 static void amdgpu_ctx_do_release(struct kref
*ref
)
271 struct amdgpu_ctx
*ctx
;
272 unsigned num_entities
;
275 ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
277 num_entities
= amdgpu_ctx_total_num_entities();
278 for (i
= 0; i
< num_entities
; i
++)
279 drm_sched_entity_destroy(&ctx
->entities
[0][i
].entity
);
281 amdgpu_ctx_fini(ref
);
284 static int amdgpu_ctx_free(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
286 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
287 struct amdgpu_ctx
*ctx
;
289 mutex_lock(&mgr
->lock
);
290 ctx
= idr_remove(&mgr
->ctx_handles
, id
);
292 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
293 mutex_unlock(&mgr
->lock
);
294 return ctx
? 0 : -EINVAL
;
297 static int amdgpu_ctx_query(struct amdgpu_device
*adev
,
298 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
299 union drm_amdgpu_ctx_out
*out
)
301 struct amdgpu_ctx
*ctx
;
302 struct amdgpu_ctx_mgr
*mgr
;
303 unsigned reset_counter
;
308 mgr
= &fpriv
->ctx_mgr
;
309 mutex_lock(&mgr
->lock
);
310 ctx
= idr_find(&mgr
->ctx_handles
, id
);
312 mutex_unlock(&mgr
->lock
);
316 /* TODO: these two are always zero */
317 out
->state
.flags
= 0x0;
318 out
->state
.hangs
= 0x0;
320 /* determine if a GPU reset has occured since the last call */
321 reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
322 /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
323 if (ctx
->reset_counter_query
== reset_counter
)
324 out
->state
.reset_status
= AMDGPU_CTX_NO_RESET
;
326 out
->state
.reset_status
= AMDGPU_CTX_UNKNOWN_RESET
;
327 ctx
->reset_counter_query
= reset_counter
;
329 mutex_unlock(&mgr
->lock
);
333 static int amdgpu_ctx_query2(struct amdgpu_device
*adev
,
334 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
335 union drm_amdgpu_ctx_out
*out
)
337 struct amdgpu_ctx
*ctx
;
338 struct amdgpu_ctx_mgr
*mgr
;
339 unsigned long ras_counter
;
344 mgr
= &fpriv
->ctx_mgr
;
345 mutex_lock(&mgr
->lock
);
346 ctx
= idr_find(&mgr
->ctx_handles
, id
);
348 mutex_unlock(&mgr
->lock
);
352 out
->state
.flags
= 0x0;
353 out
->state
.hangs
= 0x0;
355 if (ctx
->reset_counter
!= atomic_read(&adev
->gpu_reset_counter
))
356 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RESET
;
358 if (ctx
->vram_lost_counter
!= atomic_read(&adev
->vram_lost_counter
))
359 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST
;
361 if (atomic_read(&ctx
->guilty
))
362 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_GUILTY
;
365 ras_counter
= amdgpu_ras_query_error_count(adev
, false);
366 /*ras counter is monotonic increasing*/
367 if (ras_counter
!= ctx
->ras_counter_ue
) {
368 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE
;
369 ctx
->ras_counter_ue
= ras_counter
;
373 ras_counter
= amdgpu_ras_query_error_count(adev
, true);
374 if (ras_counter
!= ctx
->ras_counter_ce
) {
375 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE
;
376 ctx
->ras_counter_ce
= ras_counter
;
379 mutex_unlock(&mgr
->lock
);
383 int amdgpu_ctx_ioctl(struct drm_device
*dev
, void *data
,
384 struct drm_file
*filp
)
388 enum drm_sched_priority priority
;
390 union drm_amdgpu_ctx
*args
= data
;
391 struct amdgpu_device
*adev
= dev
->dev_private
;
392 struct amdgpu_fpriv
*fpriv
= filp
->driver_priv
;
395 id
= args
->in
.ctx_id
;
396 priority
= amdgpu_to_sched_priority(args
->in
.priority
);
398 /* For backwards compatibility reasons, we need to accept
399 * ioctls with garbage in the priority field */
400 if (priority
== DRM_SCHED_PRIORITY_INVALID
)
401 priority
= DRM_SCHED_PRIORITY_NORMAL
;
403 switch (args
->in
.op
) {
404 case AMDGPU_CTX_OP_ALLOC_CTX
:
405 r
= amdgpu_ctx_alloc(adev
, fpriv
, filp
, priority
, &id
);
406 args
->out
.alloc
.ctx_id
= id
;
408 case AMDGPU_CTX_OP_FREE_CTX
:
409 r
= amdgpu_ctx_free(fpriv
, id
);
411 case AMDGPU_CTX_OP_QUERY_STATE
:
412 r
= amdgpu_ctx_query(adev
, fpriv
, id
, &args
->out
);
414 case AMDGPU_CTX_OP_QUERY_STATE2
:
415 r
= amdgpu_ctx_query2(adev
, fpriv
, id
, &args
->out
);
424 struct amdgpu_ctx
*amdgpu_ctx_get(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
426 struct amdgpu_ctx
*ctx
;
427 struct amdgpu_ctx_mgr
*mgr
;
432 mgr
= &fpriv
->ctx_mgr
;
434 mutex_lock(&mgr
->lock
);
435 ctx
= idr_find(&mgr
->ctx_handles
, id
);
437 kref_get(&ctx
->refcount
);
438 mutex_unlock(&mgr
->lock
);
442 int amdgpu_ctx_put(struct amdgpu_ctx
*ctx
)
447 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
451 void amdgpu_ctx_add_fence(struct amdgpu_ctx
*ctx
,
452 struct drm_sched_entity
*entity
,
453 struct dma_fence
*fence
, uint64_t* handle
)
455 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
456 uint64_t seq
= centity
->sequence
;
457 struct dma_fence
*other
= NULL
;
460 idx
= seq
& (amdgpu_sched_jobs
- 1);
461 other
= centity
->fences
[idx
];
463 BUG_ON(!dma_fence_is_signaled(other
));
465 dma_fence_get(fence
);
467 spin_lock(&ctx
->ring_lock
);
468 centity
->fences
[idx
] = fence
;
470 spin_unlock(&ctx
->ring_lock
);
472 dma_fence_put(other
);
477 struct dma_fence
*amdgpu_ctx_get_fence(struct amdgpu_ctx
*ctx
,
478 struct drm_sched_entity
*entity
,
481 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
482 struct dma_fence
*fence
;
484 spin_lock(&ctx
->ring_lock
);
487 seq
= centity
->sequence
- 1;
489 if (seq
>= centity
->sequence
) {
490 spin_unlock(&ctx
->ring_lock
);
491 return ERR_PTR(-EINVAL
);
495 if (seq
+ amdgpu_sched_jobs
< centity
->sequence
) {
496 spin_unlock(&ctx
->ring_lock
);
500 fence
= dma_fence_get(centity
->fences
[seq
& (amdgpu_sched_jobs
- 1)]);
501 spin_unlock(&ctx
->ring_lock
);
506 void amdgpu_ctx_priority_override(struct amdgpu_ctx
*ctx
,
507 enum drm_sched_priority priority
)
509 unsigned num_entities
= amdgpu_ctx_total_num_entities();
510 enum drm_sched_priority ctx_prio
;
513 ctx
->override_priority
= priority
;
515 ctx_prio
= (ctx
->override_priority
== DRM_SCHED_PRIORITY_UNSET
) ?
516 ctx
->init_priority
: ctx
->override_priority
;
518 for (i
= 0; i
< num_entities
; i
++) {
519 struct drm_sched_entity
*entity
= &ctx
->entities
[0][i
].entity
;
521 drm_sched_entity_set_priority(entity
, ctx_prio
);
525 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx
*ctx
,
526 struct drm_sched_entity
*entity
)
528 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
529 struct dma_fence
*other
;
533 spin_lock(&ctx
->ring_lock
);
534 idx
= centity
->sequence
& (amdgpu_sched_jobs
- 1);
535 other
= dma_fence_get(centity
->fences
[idx
]);
536 spin_unlock(&ctx
->ring_lock
);
541 r
= dma_fence_wait(other
, true);
542 if (r
< 0 && r
!= -ERESTARTSYS
)
543 DRM_ERROR("Error (%ld) waiting for fence!\n", r
);
545 dma_fence_put(other
);
549 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr
*mgr
)
551 mutex_init(&mgr
->lock
);
552 idr_init(&mgr
->ctx_handles
);
555 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr
*mgr
, long timeout
)
557 unsigned num_entities
= amdgpu_ctx_total_num_entities();
558 struct amdgpu_ctx
*ctx
;
562 idp
= &mgr
->ctx_handles
;
564 mutex_lock(&mgr
->lock
);
565 idr_for_each_entry(idp
, ctx
, id
) {
566 for (i
= 0; i
< num_entities
; i
++) {
567 struct drm_sched_entity
*entity
;
569 entity
= &ctx
->entities
[0][i
].entity
;
570 timeout
= drm_sched_entity_flush(entity
, timeout
);
573 mutex_unlock(&mgr
->lock
);
577 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr
*mgr
)
579 unsigned num_entities
= amdgpu_ctx_total_num_entities();
580 struct amdgpu_ctx
*ctx
;
584 idp
= &mgr
->ctx_handles
;
586 idr_for_each_entry(idp
, ctx
, id
) {
587 if (kref_read(&ctx
->refcount
) != 1) {
588 DRM_ERROR("ctx %p is still alive\n", ctx
);
592 for (i
= 0; i
< num_entities
; i
++)
593 drm_sched_entity_fini(&ctx
->entities
[0][i
].entity
);
597 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr
*mgr
)
599 struct amdgpu_ctx
*ctx
;
603 amdgpu_ctx_mgr_entity_fini(mgr
);
605 idp
= &mgr
->ctx_handles
;
607 idr_for_each_entry(idp
, ctx
, id
) {
608 if (kref_put(&ctx
->refcount
, amdgpu_ctx_fini
) != 1)
609 DRM_ERROR("ctx %p is still alive\n", ctx
);
612 idr_destroy(&mgr
->ctx_handles
);
613 mutex_destroy(&mgr
->lock
);
616 void amdgpu_ctx_init_sched(struct amdgpu_device
*adev
)
620 for (i
= 0; i
< adev
->gfx
.num_gfx_rings
; i
++) {
621 adev
->gfx
.gfx_sched
[i
] = &adev
->gfx
.gfx_ring
[i
].sched
;
622 adev
->gfx
.num_gfx_sched
++;
625 for (i
= 0; i
< adev
->gfx
.num_compute_rings
; i
++) {
626 adev
->gfx
.compute_sched
[i
] = &adev
->gfx
.compute_ring
[i
].sched
;
627 adev
->gfx
.num_compute_sched
++;
630 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
631 adev
->sdma
.sdma_sched
[i
] = &adev
->sdma
.instance
[i
].ring
.sched
;
632 adev
->sdma
.num_sdma_sched
++;
635 for (i
= 0; i
< adev
->vcn
.num_vcn_inst
; ++i
) {
636 if (adev
->vcn
.harvest_config
& (1 << i
))
638 adev
->vcn
.vcn_dec_sched
[adev
->vcn
.num_vcn_dec_sched
++] =
639 &adev
->vcn
.inst
[i
].ring_dec
.sched
;
642 for (i
= 0; i
< adev
->vcn
.num_vcn_inst
; ++i
) {
643 if (adev
->vcn
.harvest_config
& (1 << i
))
645 for (j
= 0; j
< adev
->vcn
.num_enc_rings
; ++j
)
646 adev
->vcn
.vcn_enc_sched
[adev
->vcn
.num_vcn_enc_sched
++] =
647 &adev
->vcn
.inst
[i
].ring_enc
[j
].sched
;
650 for (i
= 0; i
< adev
->jpeg
.num_jpeg_inst
; ++i
) {
651 if (adev
->jpeg
.harvest_config
& (1 << i
))
653 adev
->jpeg
.jpeg_sched
[adev
->jpeg
.num_jpeg_sched
++] =
654 &adev
->jpeg
.inst
[i
].ring_dec
.sched
;