1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
4 #include <linux/kthread.h>
5 #include <linux/slab.h>
6 #include <linux/xarray.h>
9 #include "lima_sched.h"
12 #include "lima_l2_cache.h"
16 struct dma_fence base
;
17 struct lima_sched_pipe
*pipe
;
20 static struct kmem_cache
*lima_fence_slab
;
21 static int lima_fence_slab_refcnt
;
23 int lima_sched_slab_init(void)
25 if (!lima_fence_slab
) {
26 lima_fence_slab
= kmem_cache_create(
27 "lima_fence", sizeof(struct lima_fence
), 0,
28 SLAB_HWCACHE_ALIGN
, NULL
);
33 lima_fence_slab_refcnt
++;
37 void lima_sched_slab_fini(void)
39 if (!--lima_fence_slab_refcnt
) {
40 kmem_cache_destroy(lima_fence_slab
);
41 lima_fence_slab
= NULL
;
45 static inline struct lima_fence
*to_lima_fence(struct dma_fence
*fence
)
47 return container_of(fence
, struct lima_fence
, base
);
50 static const char *lima_fence_get_driver_name(struct dma_fence
*fence
)
55 static const char *lima_fence_get_timeline_name(struct dma_fence
*fence
)
57 struct lima_fence
*f
= to_lima_fence(fence
);
59 return f
->pipe
->base
.name
;
62 static void lima_fence_release_rcu(struct rcu_head
*rcu
)
64 struct dma_fence
*f
= container_of(rcu
, struct dma_fence
, rcu
);
65 struct lima_fence
*fence
= to_lima_fence(f
);
67 kmem_cache_free(lima_fence_slab
, fence
);
70 static void lima_fence_release(struct dma_fence
*fence
)
72 struct lima_fence
*f
= to_lima_fence(fence
);
74 call_rcu(&f
->base
.rcu
, lima_fence_release_rcu
);
77 static const struct dma_fence_ops lima_fence_ops
= {
78 .get_driver_name
= lima_fence_get_driver_name
,
79 .get_timeline_name
= lima_fence_get_timeline_name
,
80 .release
= lima_fence_release
,
83 static struct lima_fence
*lima_fence_create(struct lima_sched_pipe
*pipe
)
85 struct lima_fence
*fence
;
87 fence
= kmem_cache_zalloc(lima_fence_slab
, GFP_KERNEL
);
92 dma_fence_init(&fence
->base
, &lima_fence_ops
, &pipe
->fence_lock
,
93 pipe
->fence_context
, ++pipe
->fence_seqno
);
98 static inline struct lima_sched_task
*to_lima_task(struct drm_sched_job
*job
)
100 return container_of(job
, struct lima_sched_task
, base
);
103 static inline struct lima_sched_pipe
*to_lima_pipe(struct drm_gpu_scheduler
*sched
)
105 return container_of(sched
, struct lima_sched_pipe
, base
);
108 int lima_sched_task_init(struct lima_sched_task
*task
,
109 struct lima_sched_context
*context
,
110 struct lima_bo
**bos
, int num_bos
,
115 task
->bos
= kmemdup(bos
, sizeof(*bos
) * num_bos
, GFP_KERNEL
);
119 for (i
= 0; i
< num_bos
; i
++)
120 drm_gem_object_get(&bos
[i
]->base
.base
);
122 err
= drm_sched_job_init(&task
->base
, &context
->base
, vm
);
128 task
->num_bos
= num_bos
;
129 task
->vm
= lima_vm_get(vm
);
131 xa_init_flags(&task
->deps
, XA_FLAGS_ALLOC
);
136 void lima_sched_task_fini(struct lima_sched_task
*task
)
138 struct dma_fence
*fence
;
142 drm_sched_job_cleanup(&task
->base
);
144 xa_for_each(&task
->deps
, index
, fence
) {
145 dma_fence_put(fence
);
147 xa_destroy(&task
->deps
);
150 for (i
= 0; i
< task
->num_bos
; i
++)
151 drm_gem_object_put_unlocked(&task
->bos
[i
]->base
.base
);
155 lima_vm_put(task
->vm
);
158 int lima_sched_context_init(struct lima_sched_pipe
*pipe
,
159 struct lima_sched_context
*context
,
162 struct drm_gpu_scheduler
*sched
= &pipe
->base
;
164 return drm_sched_entity_init(&context
->base
, DRM_SCHED_PRIORITY_NORMAL
,
168 void lima_sched_context_fini(struct lima_sched_pipe
*pipe
,
169 struct lima_sched_context
*context
)
171 drm_sched_entity_fini(&context
->base
);
174 struct dma_fence
*lima_sched_context_queue_task(struct lima_sched_context
*context
,
175 struct lima_sched_task
*task
)
177 struct dma_fence
*fence
= dma_fence_get(&task
->base
.s_fence
->finished
);
179 drm_sched_entity_push_job(&task
->base
, &context
->base
);
183 static struct dma_fence
*lima_sched_dependency(struct drm_sched_job
*job
,
184 struct drm_sched_entity
*entity
)
186 struct lima_sched_task
*task
= to_lima_task(job
);
188 if (!xa_empty(&task
->deps
))
189 return xa_erase(&task
->deps
, task
->last_dep
++);
194 static struct dma_fence
*lima_sched_run_job(struct drm_sched_job
*job
)
196 struct lima_sched_task
*task
= to_lima_task(job
);
197 struct lima_sched_pipe
*pipe
= to_lima_pipe(job
->sched
);
198 struct lima_fence
*fence
;
199 struct dma_fence
*ret
;
200 struct lima_vm
*vm
= NULL
, *last_vm
= NULL
;
203 /* after GPU reset */
204 if (job
->s_fence
->finished
.error
< 0)
207 fence
= lima_fence_create(pipe
);
210 task
->fence
= &fence
->base
;
212 /* for caller usage of the fence, otherwise irq handler
213 * may consume the fence before caller use it
215 ret
= dma_fence_get(task
->fence
);
217 pipe
->current_task
= task
;
219 /* this is needed for MMU to work correctly, otherwise GP/PP
220 * will hang or page fault for unknown reason after running for
223 * Need to investigate:
224 * 1. is it related to TLB
225 * 2. how much performance will be affected by L2 cache flush
226 * 3. can we reduce the calling of this function because all
227 * GP/PP use the same L2 cache on mali400
230 * 1. move this to task fini to save some wait time?
231 * 2. when GP/PP use different l2 cache, need PP wait GP l2
234 for (i
= 0; i
< pipe
->num_l2_cache
; i
++)
235 lima_l2_cache_flush(pipe
->l2_cache
[i
]);
237 if (task
->vm
!= pipe
->current_vm
) {
238 vm
= lima_vm_get(task
->vm
);
239 last_vm
= pipe
->current_vm
;
240 pipe
->current_vm
= task
->vm
;
244 lima_mmu_switch_vm(pipe
->bcast_mmu
, vm
);
246 for (i
= 0; i
< pipe
->num_mmu
; i
++)
247 lima_mmu_switch_vm(pipe
->mmu
[i
], vm
);
251 lima_vm_put(last_vm
);
254 pipe
->task_run(pipe
, task
);
259 static void lima_sched_timedout_job(struct drm_sched_job
*job
)
261 struct lima_sched_pipe
*pipe
= to_lima_pipe(job
->sched
);
262 struct lima_sched_task
*task
= to_lima_task(job
);
265 DRM_ERROR("lima job timeout\n");
267 drm_sched_stop(&pipe
->base
, &task
->base
);
269 drm_sched_increase_karma(&task
->base
);
271 pipe
->task_error(pipe
);
274 lima_mmu_page_fault_resume(pipe
->bcast_mmu
);
278 for (i
= 0; i
< pipe
->num_mmu
; i
++)
279 lima_mmu_page_fault_resume(pipe
->mmu
[i
]);
282 if (pipe
->current_vm
)
283 lima_vm_put(pipe
->current_vm
);
285 pipe
->current_vm
= NULL
;
286 pipe
->current_task
= NULL
;
288 drm_sched_resubmit_jobs(&pipe
->base
);
289 drm_sched_start(&pipe
->base
, true);
292 static void lima_sched_free_job(struct drm_sched_job
*job
)
294 struct lima_sched_task
*task
= to_lima_task(job
);
295 struct lima_sched_pipe
*pipe
= to_lima_pipe(job
->sched
);
296 struct lima_vm
*vm
= task
->vm
;
297 struct lima_bo
**bos
= task
->bos
;
300 dma_fence_put(task
->fence
);
302 for (i
= 0; i
< task
->num_bos
; i
++)
303 lima_vm_bo_del(vm
, bos
[i
]);
305 lima_sched_task_fini(task
);
306 kmem_cache_free(pipe
->task_slab
, task
);
309 static const struct drm_sched_backend_ops lima_sched_ops
= {
310 .dependency
= lima_sched_dependency
,
311 .run_job
= lima_sched_run_job
,
312 .timedout_job
= lima_sched_timedout_job
,
313 .free_job
= lima_sched_free_job
,
316 int lima_sched_pipe_init(struct lima_sched_pipe
*pipe
, const char *name
)
318 unsigned int timeout
= lima_sched_timeout_ms
> 0 ?
319 lima_sched_timeout_ms
: 500;
321 pipe
->fence_context
= dma_fence_context_alloc(1);
322 spin_lock_init(&pipe
->fence_lock
);
324 return drm_sched_init(&pipe
->base
, &lima_sched_ops
, 1, 0,
325 msecs_to_jiffies(timeout
), name
);
328 void lima_sched_pipe_fini(struct lima_sched_pipe
*pipe
)
330 drm_sched_fini(&pipe
->base
);
333 void lima_sched_pipe_task_done(struct lima_sched_pipe
*pipe
)
336 drm_sched_fault(&pipe
->base
);
338 struct lima_sched_task
*task
= pipe
->current_task
;
340 pipe
->task_fini(pipe
);
341 dma_fence_signal(task
->fence
);