2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: monk liu <monk.liu@amd.com>
26 #include <drm/drm_auth.h>
28 #include "amdgpu_sched.h"
30 #define to_amdgpu_ctx_entity(e) \
31 container_of((e), struct amdgpu_ctx_entity, entity)
33 const unsigned int amdgpu_ctx_num_entities
[AMDGPU_HW_IP_NUM
] = {
34 [AMDGPU_HW_IP_GFX
] = 1,
35 [AMDGPU_HW_IP_COMPUTE
] = 4,
36 [AMDGPU_HW_IP_DMA
] = 2,
37 [AMDGPU_HW_IP_UVD
] = 1,
38 [AMDGPU_HW_IP_VCE
] = 1,
39 [AMDGPU_HW_IP_UVD_ENC
] = 1,
40 [AMDGPU_HW_IP_VCN_DEC
] = 1,
41 [AMDGPU_HW_IP_VCN_ENC
] = 1,
42 [AMDGPU_HW_IP_VCN_JPEG
] = 1,
45 static int amdgput_ctx_total_num_entities(void)
47 unsigned i
, num_entities
= 0;
49 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
)
50 num_entities
+= amdgpu_ctx_num_entities
[i
];
55 static int amdgpu_ctx_priority_permit(struct drm_file
*filp
,
56 enum drm_sched_priority priority
)
58 /* NORMAL and below are accessible by everyone */
59 if (priority
<= DRM_SCHED_PRIORITY_NORMAL
)
62 if (capable(CAP_SYS_NICE
))
65 if (drm_is_current_master(filp
))
71 static int amdgpu_ctx_init(struct amdgpu_device
*adev
,
72 enum drm_sched_priority priority
,
73 struct drm_file
*filp
,
74 struct amdgpu_ctx
*ctx
)
76 unsigned num_entities
= amdgput_ctx_total_num_entities();
80 if (priority
< 0 || priority
>= DRM_SCHED_PRIORITY_MAX
)
83 r
= amdgpu_ctx_priority_permit(filp
, priority
);
87 memset(ctx
, 0, sizeof(*ctx
));
90 ctx
->fences
= kcalloc(amdgpu_sched_jobs
* num_entities
,
91 sizeof(struct dma_fence
*), GFP_KERNEL
);
95 ctx
->entities
[0] = kcalloc(num_entities
,
96 sizeof(struct amdgpu_ctx_entity
),
98 if (!ctx
->entities
[0]) {
100 goto error_free_fences
;
103 for (i
= 0; i
< num_entities
; ++i
) {
104 struct amdgpu_ctx_entity
*entity
= &ctx
->entities
[0][i
];
106 entity
->sequence
= 1;
107 entity
->fences
= &ctx
->fences
[amdgpu_sched_jobs
* i
];
109 for (i
= 1; i
< AMDGPU_HW_IP_NUM
; ++i
)
110 ctx
->entities
[i
] = ctx
->entities
[i
- 1] +
111 amdgpu_ctx_num_entities
[i
- 1];
113 kref_init(&ctx
->refcount
);
114 spin_lock_init(&ctx
->ring_lock
);
115 mutex_init(&ctx
->lock
);
117 ctx
->reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
118 ctx
->reset_counter_query
= ctx
->reset_counter
;
119 ctx
->vram_lost_counter
= atomic_read(&adev
->vram_lost_counter
);
120 ctx
->init_priority
= priority
;
121 ctx
->override_priority
= DRM_SCHED_PRIORITY_UNSET
;
123 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; ++i
) {
124 struct amdgpu_ring
*rings
[AMDGPU_MAX_RINGS
];
125 struct drm_sched_rq
*rqs
[AMDGPU_MAX_RINGS
];
127 unsigned num_rqs
= 0;
130 case AMDGPU_HW_IP_GFX
:
131 rings
[0] = &adev
->gfx
.gfx_ring
[0];
134 case AMDGPU_HW_IP_COMPUTE
:
135 for (j
= 0; j
< adev
->gfx
.num_compute_rings
; ++j
)
136 rings
[j
] = &adev
->gfx
.compute_ring
[j
];
137 num_rings
= adev
->gfx
.num_compute_rings
;
139 case AMDGPU_HW_IP_DMA
:
140 for (j
= 0; j
< adev
->sdma
.num_instances
; ++j
)
141 rings
[j
] = &adev
->sdma
.instance
[j
].ring
;
142 num_rings
= adev
->sdma
.num_instances
;
144 case AMDGPU_HW_IP_UVD
:
145 rings
[0] = &adev
->uvd
.inst
[0].ring
;
148 case AMDGPU_HW_IP_VCE
:
149 rings
[0] = &adev
->vce
.ring
[0];
152 case AMDGPU_HW_IP_UVD_ENC
:
153 rings
[0] = &adev
->uvd
.inst
[0].ring_enc
[0];
156 case AMDGPU_HW_IP_VCN_DEC
:
157 rings
[0] = &adev
->vcn
.ring_dec
;
160 case AMDGPU_HW_IP_VCN_ENC
:
161 rings
[0] = &adev
->vcn
.ring_enc
[0];
164 case AMDGPU_HW_IP_VCN_JPEG
:
165 rings
[0] = &adev
->vcn
.ring_jpeg
;
170 for (j
= 0; j
< num_rings
; ++j
) {
174 rqs
[num_rqs
++] = &rings
[j
]->sched
.sched_rq
[priority
];
177 for (j
= 0; j
< amdgpu_ctx_num_entities
[i
]; ++j
)
178 r
= drm_sched_entity_init(&ctx
->entities
[i
][j
].entity
,
179 rqs
, num_rqs
, &ctx
->guilty
);
181 goto error_cleanup_entities
;
186 error_cleanup_entities
:
187 for (i
= 0; i
< num_entities
; ++i
)
188 drm_sched_entity_destroy(&ctx
->entities
[0][i
].entity
);
189 kfree(ctx
->entities
[0]);
197 static void amdgpu_ctx_fini(struct kref
*ref
)
199 struct amdgpu_ctx
*ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
200 unsigned num_entities
= amdgput_ctx_total_num_entities();
201 struct amdgpu_device
*adev
= ctx
->adev
;
207 for (i
= 0; i
< num_entities
; ++i
)
208 for (j
= 0; j
< amdgpu_sched_jobs
; ++j
)
209 dma_fence_put(ctx
->entities
[0][i
].fences
[j
]);
211 kfree(ctx
->entities
[0]);
213 mutex_destroy(&ctx
->lock
);
218 int amdgpu_ctx_get_entity(struct amdgpu_ctx
*ctx
, u32 hw_ip
, u32 instance
,
219 u32 ring
, struct drm_sched_entity
**entity
)
221 if (hw_ip
>= AMDGPU_HW_IP_NUM
) {
222 DRM_ERROR("unknown HW IP type: %d\n", hw_ip
);
226 /* Right now all IPs have only one instance - multiple rings. */
228 DRM_DEBUG("invalid ip instance: %d\n", instance
);
232 if (ring
>= amdgpu_ctx_num_entities
[hw_ip
]) {
233 DRM_DEBUG("invalid ring: %d %d\n", hw_ip
, ring
);
237 *entity
= &ctx
->entities
[hw_ip
][ring
].entity
;
241 static int amdgpu_ctx_alloc(struct amdgpu_device
*adev
,
242 struct amdgpu_fpriv
*fpriv
,
243 struct drm_file
*filp
,
244 enum drm_sched_priority priority
,
247 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
248 struct amdgpu_ctx
*ctx
;
251 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
255 mutex_lock(&mgr
->lock
);
256 r
= idr_alloc(&mgr
->ctx_handles
, ctx
, 1, AMDGPU_VM_MAX_NUM_CTX
, GFP_KERNEL
);
258 mutex_unlock(&mgr
->lock
);
264 r
= amdgpu_ctx_init(adev
, priority
, filp
, ctx
);
266 idr_remove(&mgr
->ctx_handles
, *id
);
270 mutex_unlock(&mgr
->lock
);
274 static void amdgpu_ctx_do_release(struct kref
*ref
)
276 struct amdgpu_ctx
*ctx
;
277 unsigned num_entities
;
280 ctx
= container_of(ref
, struct amdgpu_ctx
, refcount
);
283 for (i
= 0; i
< AMDGPU_HW_IP_NUM
; i
++)
284 num_entities
+= amdgpu_ctx_num_entities
[i
];
286 for (i
= 0; i
< num_entities
; i
++)
287 drm_sched_entity_destroy(&ctx
->entities
[0][i
].entity
);
289 amdgpu_ctx_fini(ref
);
292 static int amdgpu_ctx_free(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
294 struct amdgpu_ctx_mgr
*mgr
= &fpriv
->ctx_mgr
;
295 struct amdgpu_ctx
*ctx
;
297 mutex_lock(&mgr
->lock
);
298 ctx
= idr_remove(&mgr
->ctx_handles
, id
);
300 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
301 mutex_unlock(&mgr
->lock
);
302 return ctx
? 0 : -EINVAL
;
305 static int amdgpu_ctx_query(struct amdgpu_device
*adev
,
306 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
307 union drm_amdgpu_ctx_out
*out
)
309 struct amdgpu_ctx
*ctx
;
310 struct amdgpu_ctx_mgr
*mgr
;
311 unsigned reset_counter
;
316 mgr
= &fpriv
->ctx_mgr
;
317 mutex_lock(&mgr
->lock
);
318 ctx
= idr_find(&mgr
->ctx_handles
, id
);
320 mutex_unlock(&mgr
->lock
);
324 /* TODO: these two are always zero */
325 out
->state
.flags
= 0x0;
326 out
->state
.hangs
= 0x0;
328 /* determine if a GPU reset has occured since the last call */
329 reset_counter
= atomic_read(&adev
->gpu_reset_counter
);
330 /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
331 if (ctx
->reset_counter_query
== reset_counter
)
332 out
->state
.reset_status
= AMDGPU_CTX_NO_RESET
;
334 out
->state
.reset_status
= AMDGPU_CTX_UNKNOWN_RESET
;
335 ctx
->reset_counter_query
= reset_counter
;
337 mutex_unlock(&mgr
->lock
);
341 static int amdgpu_ctx_query2(struct amdgpu_device
*adev
,
342 struct amdgpu_fpriv
*fpriv
, uint32_t id
,
343 union drm_amdgpu_ctx_out
*out
)
345 struct amdgpu_ctx
*ctx
;
346 struct amdgpu_ctx_mgr
*mgr
;
351 mgr
= &fpriv
->ctx_mgr
;
352 mutex_lock(&mgr
->lock
);
353 ctx
= idr_find(&mgr
->ctx_handles
, id
);
355 mutex_unlock(&mgr
->lock
);
359 out
->state
.flags
= 0x0;
360 out
->state
.hangs
= 0x0;
362 if (ctx
->reset_counter
!= atomic_read(&adev
->gpu_reset_counter
))
363 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_RESET
;
365 if (ctx
->vram_lost_counter
!= atomic_read(&adev
->vram_lost_counter
))
366 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST
;
368 if (atomic_read(&ctx
->guilty
))
369 out
->state
.flags
|= AMDGPU_CTX_QUERY2_FLAGS_GUILTY
;
371 mutex_unlock(&mgr
->lock
);
375 int amdgpu_ctx_ioctl(struct drm_device
*dev
, void *data
,
376 struct drm_file
*filp
)
380 enum drm_sched_priority priority
;
382 union drm_amdgpu_ctx
*args
= data
;
383 struct amdgpu_device
*adev
= dev
->dev_private
;
384 struct amdgpu_fpriv
*fpriv
= filp
->driver_priv
;
387 id
= args
->in
.ctx_id
;
388 priority
= amdgpu_to_sched_priority(args
->in
.priority
);
390 /* For backwards compatibility reasons, we need to accept
391 * ioctls with garbage in the priority field */
392 if (priority
== DRM_SCHED_PRIORITY_INVALID
)
393 priority
= DRM_SCHED_PRIORITY_NORMAL
;
395 switch (args
->in
.op
) {
396 case AMDGPU_CTX_OP_ALLOC_CTX
:
397 r
= amdgpu_ctx_alloc(adev
, fpriv
, filp
, priority
, &id
);
398 args
->out
.alloc
.ctx_id
= id
;
400 case AMDGPU_CTX_OP_FREE_CTX
:
401 r
= amdgpu_ctx_free(fpriv
, id
);
403 case AMDGPU_CTX_OP_QUERY_STATE
:
404 r
= amdgpu_ctx_query(adev
, fpriv
, id
, &args
->out
);
406 case AMDGPU_CTX_OP_QUERY_STATE2
:
407 r
= amdgpu_ctx_query2(adev
, fpriv
, id
, &args
->out
);
416 struct amdgpu_ctx
*amdgpu_ctx_get(struct amdgpu_fpriv
*fpriv
, uint32_t id
)
418 struct amdgpu_ctx
*ctx
;
419 struct amdgpu_ctx_mgr
*mgr
;
424 mgr
= &fpriv
->ctx_mgr
;
426 mutex_lock(&mgr
->lock
);
427 ctx
= idr_find(&mgr
->ctx_handles
, id
);
429 kref_get(&ctx
->refcount
);
430 mutex_unlock(&mgr
->lock
);
434 int amdgpu_ctx_put(struct amdgpu_ctx
*ctx
)
439 kref_put(&ctx
->refcount
, amdgpu_ctx_do_release
);
443 void amdgpu_ctx_add_fence(struct amdgpu_ctx
*ctx
,
444 struct drm_sched_entity
*entity
,
445 struct dma_fence
*fence
, uint64_t* handle
)
447 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
448 uint64_t seq
= centity
->sequence
;
449 struct dma_fence
*other
= NULL
;
452 idx
= seq
& (amdgpu_sched_jobs
- 1);
453 other
= centity
->fences
[idx
];
455 BUG_ON(!dma_fence_is_signaled(other
));
457 dma_fence_get(fence
);
459 spin_lock(&ctx
->ring_lock
);
460 centity
->fences
[idx
] = fence
;
462 spin_unlock(&ctx
->ring_lock
);
464 dma_fence_put(other
);
469 struct dma_fence
*amdgpu_ctx_get_fence(struct amdgpu_ctx
*ctx
,
470 struct drm_sched_entity
*entity
,
473 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
474 struct dma_fence
*fence
;
476 spin_lock(&ctx
->ring_lock
);
479 seq
= centity
->sequence
- 1;
481 if (seq
>= centity
->sequence
) {
482 spin_unlock(&ctx
->ring_lock
);
483 return ERR_PTR(-EINVAL
);
487 if (seq
+ amdgpu_sched_jobs
< centity
->sequence
) {
488 spin_unlock(&ctx
->ring_lock
);
492 fence
= dma_fence_get(centity
->fences
[seq
& (amdgpu_sched_jobs
- 1)]);
493 spin_unlock(&ctx
->ring_lock
);
498 void amdgpu_ctx_priority_override(struct amdgpu_ctx
*ctx
,
499 enum drm_sched_priority priority
)
501 unsigned num_entities
= amdgput_ctx_total_num_entities();
502 enum drm_sched_priority ctx_prio
;
505 ctx
->override_priority
= priority
;
507 ctx_prio
= (ctx
->override_priority
== DRM_SCHED_PRIORITY_UNSET
) ?
508 ctx
->init_priority
: ctx
->override_priority
;
510 for (i
= 0; i
< num_entities
; i
++) {
511 struct drm_sched_entity
*entity
= &ctx
->entities
[0][i
].entity
;
513 drm_sched_entity_set_priority(entity
, ctx_prio
);
517 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx
*ctx
,
518 struct drm_sched_entity
*entity
)
520 struct amdgpu_ctx_entity
*centity
= to_amdgpu_ctx_entity(entity
);
521 unsigned idx
= centity
->sequence
& (amdgpu_sched_jobs
- 1);
522 struct dma_fence
*other
= centity
->fences
[idx
];
526 r
= dma_fence_wait(other
, true);
528 if (r
!= -ERESTARTSYS
)
529 DRM_ERROR("Error (%ld) waiting for fence!\n", r
);
538 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr
*mgr
)
540 mutex_init(&mgr
->lock
);
541 idr_init(&mgr
->ctx_handles
);
544 void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr
*mgr
)
546 unsigned num_entities
= amdgput_ctx_total_num_entities();
547 struct amdgpu_ctx
*ctx
;
550 long max_wait
= MAX_WAIT_SCHED_ENTITY_Q_EMPTY
;
552 idp
= &mgr
->ctx_handles
;
554 mutex_lock(&mgr
->lock
);
555 idr_for_each_entry(idp
, ctx
, id
) {
558 mutex_unlock(&mgr
->lock
);
562 for (i
= 0; i
< num_entities
; i
++) {
563 struct drm_sched_entity
*entity
;
565 entity
= &ctx
->entities
[0][i
].entity
;
566 max_wait
= drm_sched_entity_flush(entity
, max_wait
);
569 mutex_unlock(&mgr
->lock
);
572 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr
*mgr
)
574 unsigned num_entities
= amdgput_ctx_total_num_entities();
575 struct amdgpu_ctx
*ctx
;
579 idp
= &mgr
->ctx_handles
;
581 idr_for_each_entry(idp
, ctx
, id
) {
586 if (kref_read(&ctx
->refcount
) != 1) {
587 DRM_ERROR("ctx %p is still alive\n", ctx
);
591 for (i
= 0; i
< num_entities
; i
++)
592 drm_sched_entity_fini(&ctx
->entities
[0][i
].entity
);
596 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr
*mgr
)
598 struct amdgpu_ctx
*ctx
;
602 amdgpu_ctx_mgr_entity_fini(mgr
);
604 idp
= &mgr
->ctx_handles
;
606 idr_for_each_entry(idp
, ctx
, id
) {
607 if (kref_put(&ctx
->refcount
, amdgpu_ctx_fini
) != 1)
608 DRM_ERROR("ctx %p is still alive\n", ctx
);
611 idr_destroy(&mgr
->ctx_handles
);
612 mutex_destroy(&mgr
->lock
);