1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2013 Red Hat
4 * Author: Rob Clark <robdclark@gmail.com>
10 #include "msm_fence.h"
11 #include "msm_gpu_trace.h"
12 #include "adreno/adreno_gpu.h"
14 #include <generated/utsrelease.h>
15 #include <linux/string_helpers.h>
16 #include <linux/devfreq.h>
17 #include <linux/devfreq_cooling.h>
18 #include <linux/devcoredump.h>
19 #include <linux/sched/task.h>
25 static int msm_devfreq_target(struct device
*dev
, unsigned long *freq
,
28 struct msm_gpu
*gpu
= dev_to_gpu(dev
);
29 struct dev_pm_opp
*opp
;
31 opp
= devfreq_recommended_opp(dev
, freq
, flags
);
36 trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp
));
38 if (gpu
->funcs
->gpu_set_freq
)
39 gpu
->funcs
->gpu_set_freq(gpu
, opp
);
41 clk_set_rate(gpu
->core_clk
, *freq
);
48 static int msm_devfreq_get_dev_status(struct device
*dev
,
49 struct devfreq_dev_status
*status
)
51 struct msm_gpu
*gpu
= dev_to_gpu(dev
);
54 if (gpu
->funcs
->gpu_get_freq
)
55 status
->current_frequency
= gpu
->funcs
->gpu_get_freq(gpu
);
57 status
->current_frequency
= clk_get_rate(gpu
->core_clk
);
59 status
->busy_time
= gpu
->funcs
->gpu_busy(gpu
);
62 status
->total_time
= ktime_us_delta(time
, gpu
->devfreq
.time
);
63 gpu
->devfreq
.time
= time
;
68 static int msm_devfreq_get_cur_freq(struct device
*dev
, unsigned long *freq
)
70 struct msm_gpu
*gpu
= dev_to_gpu(dev
);
72 if (gpu
->funcs
->gpu_get_freq
)
73 *freq
= gpu
->funcs
->gpu_get_freq(gpu
);
75 *freq
= clk_get_rate(gpu
->core_clk
);
80 static struct devfreq_dev_profile msm_devfreq_profile
= {
82 .target
= msm_devfreq_target
,
83 .get_dev_status
= msm_devfreq_get_dev_status
,
84 .get_cur_freq
= msm_devfreq_get_cur_freq
,
87 static void msm_devfreq_init(struct msm_gpu
*gpu
)
89 /* We need target support to do devfreq */
90 if (!gpu
->funcs
->gpu_busy
)
93 msm_devfreq_profile
.initial_freq
= gpu
->fast_rate
;
96 * Don't set the freq_table or max_state and let devfreq build the table
98 * After a deferred probe, these may have be left to non-zero values,
99 * so set them back to zero before creating the devfreq device
101 msm_devfreq_profile
.freq_table
= NULL
;
102 msm_devfreq_profile
.max_state
= 0;
104 gpu
->devfreq
.devfreq
= devm_devfreq_add_device(&gpu
->pdev
->dev
,
105 &msm_devfreq_profile
, DEVFREQ_GOV_SIMPLE_ONDEMAND
,
108 if (IS_ERR(gpu
->devfreq
.devfreq
)) {
109 DRM_DEV_ERROR(&gpu
->pdev
->dev
, "Couldn't initialize GPU devfreq\n");
110 gpu
->devfreq
.devfreq
= NULL
;
114 devfreq_suspend_device(gpu
->devfreq
.devfreq
);
116 gpu
->cooling
= of_devfreq_cooling_register(gpu
->pdev
->dev
.of_node
,
117 gpu
->devfreq
.devfreq
);
118 if (IS_ERR(gpu
->cooling
)) {
119 DRM_DEV_ERROR(&gpu
->pdev
->dev
,
120 "Couldn't register GPU cooling device\n");
125 static int enable_pwrrail(struct msm_gpu
*gpu
)
127 struct drm_device
*dev
= gpu
->dev
;
131 ret
= regulator_enable(gpu
->gpu_reg
);
133 DRM_DEV_ERROR(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
139 ret
= regulator_enable(gpu
->gpu_cx
);
141 DRM_DEV_ERROR(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
149 static int disable_pwrrail(struct msm_gpu
*gpu
)
152 regulator_disable(gpu
->gpu_cx
);
154 regulator_disable(gpu
->gpu_reg
);
158 static int enable_clk(struct msm_gpu
*gpu
)
160 if (gpu
->core_clk
&& gpu
->fast_rate
)
161 clk_set_rate(gpu
->core_clk
, gpu
->fast_rate
);
163 /* Set the RBBM timer rate to 19.2Mhz */
164 if (gpu
->rbbmtimer_clk
)
165 clk_set_rate(gpu
->rbbmtimer_clk
, 19200000);
167 return clk_bulk_prepare_enable(gpu
->nr_clocks
, gpu
->grp_clks
);
170 static int disable_clk(struct msm_gpu
*gpu
)
172 clk_bulk_disable_unprepare(gpu
->nr_clocks
, gpu
->grp_clks
);
175 * Set the clock to a deliberately low rate. On older targets the clock
176 * speed had to be non zero to avoid problems. On newer targets this
177 * will be rounded down to zero anyway so it all works out.
180 clk_set_rate(gpu
->core_clk
, 27000000);
182 if (gpu
->rbbmtimer_clk
)
183 clk_set_rate(gpu
->rbbmtimer_clk
, 0);
188 static int enable_axi(struct msm_gpu
*gpu
)
190 return clk_prepare_enable(gpu
->ebi1_clk
);
193 static int disable_axi(struct msm_gpu
*gpu
)
195 clk_disable_unprepare(gpu
->ebi1_clk
);
199 void msm_gpu_resume_devfreq(struct msm_gpu
*gpu
)
201 gpu
->devfreq
.busy_cycles
= 0;
202 gpu
->devfreq
.time
= ktime_get();
204 devfreq_resume_device(gpu
->devfreq
.devfreq
);
207 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
211 DBG("%s", gpu
->name
);
212 trace_msm_gpu_resume(0);
214 ret
= enable_pwrrail(gpu
);
218 ret
= enable_clk(gpu
);
222 ret
= enable_axi(gpu
);
226 msm_gpu_resume_devfreq(gpu
);
228 gpu
->needs_hw_init
= true;
233 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
237 DBG("%s", gpu
->name
);
238 trace_msm_gpu_suspend(0);
240 devfreq_suspend_device(gpu
->devfreq
.devfreq
);
242 ret
= disable_axi(gpu
);
246 ret
= disable_clk(gpu
);
250 ret
= disable_pwrrail(gpu
);
257 int msm_gpu_hw_init(struct msm_gpu
*gpu
)
261 WARN_ON(!mutex_is_locked(&gpu
->dev
->struct_mutex
));
263 if (!gpu
->needs_hw_init
)
266 disable_irq(gpu
->irq
);
267 ret
= gpu
->funcs
->hw_init(gpu
);
269 gpu
->needs_hw_init
= false;
270 enable_irq(gpu
->irq
);
275 static void update_fences(struct msm_gpu
*gpu
, struct msm_ringbuffer
*ring
,
278 struct msm_gem_submit
*submit
;
280 spin_lock(&ring
->submit_lock
);
281 list_for_each_entry(submit
, &ring
->submits
, node
) {
282 if (submit
->seqno
> fence
)
285 msm_update_fence(submit
->ring
->fctx
,
286 submit
->fence
->seqno
);
288 spin_unlock(&ring
->submit_lock
);
291 #ifdef CONFIG_DEV_COREDUMP
292 static ssize_t
msm_gpu_devcoredump_read(char *buffer
, loff_t offset
,
293 size_t count
, void *data
, size_t datalen
)
295 struct msm_gpu
*gpu
= data
;
296 struct drm_print_iterator iter
;
297 struct drm_printer p
;
298 struct msm_gpu_state
*state
;
300 state
= msm_gpu_crashstate_get(gpu
);
309 p
= drm_coredump_printer(&iter
);
311 drm_printf(&p
, "---\n");
312 drm_printf(&p
, "kernel: " UTS_RELEASE
"\n");
313 drm_printf(&p
, "module: " KBUILD_MODNAME
"\n");
314 drm_printf(&p
, "time: %lld.%09ld\n",
315 state
->time
.tv_sec
, state
->time
.tv_nsec
);
317 drm_printf(&p
, "comm: %s\n", state
->comm
);
319 drm_printf(&p
, "cmdline: %s\n", state
->cmd
);
321 gpu
->funcs
->show(gpu
, state
, &p
);
323 msm_gpu_crashstate_put(gpu
);
325 return count
- iter
.remain
;
328 static void msm_gpu_devcoredump_free(void *data
)
330 struct msm_gpu
*gpu
= data
;
332 msm_gpu_crashstate_put(gpu
);
335 static void msm_gpu_crashstate_get_bo(struct msm_gpu_state
*state
,
336 struct msm_gem_object
*obj
, u64 iova
, u32 flags
)
338 struct msm_gpu_state_bo
*state_bo
= &state
->bos
[state
->nr_bos
];
340 /* Don't record write only objects */
341 state_bo
->size
= obj
->base
.size
;
342 state_bo
->iova
= iova
;
344 /* Only store data for non imported buffer objects marked for read */
345 if ((flags
& MSM_SUBMIT_BO_READ
) && !obj
->base
.import_attach
) {
348 state_bo
->data
= kvmalloc(obj
->base
.size
, GFP_KERNEL
);
352 msm_gem_lock(&obj
->base
);
353 ptr
= msm_gem_get_vaddr_active(&obj
->base
);
354 msm_gem_unlock(&obj
->base
);
356 kvfree(state_bo
->data
);
357 state_bo
->data
= NULL
;
361 memcpy(state_bo
->data
, ptr
, obj
->base
.size
);
362 msm_gem_put_vaddr(&obj
->base
);
368 static void msm_gpu_crashstate_capture(struct msm_gpu
*gpu
,
369 struct msm_gem_submit
*submit
, char *comm
, char *cmd
)
371 struct msm_gpu_state
*state
;
373 /* Check if the target supports capturing crash state */
374 if (!gpu
->funcs
->gpu_state_get
)
377 /* Only save one crash state at a time */
381 state
= gpu
->funcs
->gpu_state_get(gpu
);
382 if (IS_ERR_OR_NULL(state
))
385 /* Fill in the additional crash state information */
386 state
->comm
= kstrdup(comm
, GFP_KERNEL
);
387 state
->cmd
= kstrdup(cmd
, GFP_KERNEL
);
392 /* count # of buffers to dump: */
393 for (i
= 0; i
< submit
->nr_bos
; i
++)
394 if (should_dump(submit
, i
))
396 /* always dump cmd bo's, but don't double count them: */
397 for (i
= 0; i
< submit
->nr_cmds
; i
++)
398 if (!should_dump(submit
, submit
->cmd
[i
].idx
))
401 state
->bos
= kcalloc(nr
,
402 sizeof(struct msm_gpu_state_bo
), GFP_KERNEL
);
404 for (i
= 0; i
< submit
->nr_bos
; i
++) {
405 if (should_dump(submit
, i
)) {
406 msm_gpu_crashstate_get_bo(state
, submit
->bos
[i
].obj
,
407 submit
->bos
[i
].iova
, submit
->bos
[i
].flags
);
411 for (i
= 0; state
->bos
&& i
< submit
->nr_cmds
; i
++) {
412 int idx
= submit
->cmd
[i
].idx
;
414 if (!should_dump(submit
, submit
->cmd
[i
].idx
)) {
415 msm_gpu_crashstate_get_bo(state
, submit
->bos
[idx
].obj
,
416 submit
->bos
[idx
].iova
, submit
->bos
[idx
].flags
);
421 /* Set the active crash state to be dumped on failure */
422 gpu
->crashstate
= state
;
424 /* FIXME: Release the crashstate if this errors out? */
425 dev_coredumpm(gpu
->dev
->dev
, THIS_MODULE
, gpu
, 0, GFP_KERNEL
,
426 msm_gpu_devcoredump_read
, msm_gpu_devcoredump_free
);
429 static void msm_gpu_crashstate_capture(struct msm_gpu
*gpu
,
430 struct msm_gem_submit
*submit
, char *comm
, char *cmd
)
436 * Hangcheck detection for locked gpu:
439 static struct msm_gem_submit
*
440 find_submit(struct msm_ringbuffer
*ring
, uint32_t fence
)
442 struct msm_gem_submit
*submit
;
444 spin_lock(&ring
->submit_lock
);
445 list_for_each_entry(submit
, &ring
->submits
, node
) {
446 if (submit
->seqno
== fence
) {
447 spin_unlock(&ring
->submit_lock
);
451 spin_unlock(&ring
->submit_lock
);
456 static void retire_submits(struct msm_gpu
*gpu
);
458 static void recover_worker(struct kthread_work
*work
)
460 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
461 struct drm_device
*dev
= gpu
->dev
;
462 struct msm_drm_private
*priv
= dev
->dev_private
;
463 struct msm_gem_submit
*submit
;
464 struct msm_ringbuffer
*cur_ring
= gpu
->funcs
->active_ring(gpu
);
465 char *comm
= NULL
, *cmd
= NULL
;
468 mutex_lock(&dev
->struct_mutex
);
470 DRM_DEV_ERROR(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
472 submit
= find_submit(cur_ring
, cur_ring
->memptrs
->fence
+ 1);
474 struct task_struct
*task
;
476 /* Increment the fault counts */
477 gpu
->global_faults
++;
478 submit
->queue
->faults
++;
480 task
= get_pid_task(submit
->pid
, PIDTYPE_PID
);
482 comm
= kstrdup(task
->comm
, GFP_KERNEL
);
483 cmd
= kstrdup_quotable_cmdline(task
, GFP_KERNEL
);
484 put_task_struct(task
);
487 /* msm_rd_dump_submit() needs bo locked to dump: */
488 for (i
= 0; i
< submit
->nr_bos
; i
++)
489 msm_gem_lock(&submit
->bos
[i
].obj
->base
);
492 DRM_DEV_ERROR(dev
->dev
, "%s: offending task: %s (%s)\n",
493 gpu
->name
, comm
, cmd
);
495 msm_rd_dump_submit(priv
->hangrd
, submit
,
496 "offending task: %s (%s)", comm
, cmd
);
498 msm_rd_dump_submit(priv
->hangrd
, submit
, NULL
);
501 for (i
= 0; i
< submit
->nr_bos
; i
++)
502 msm_gem_unlock(&submit
->bos
[i
].obj
->base
);
505 /* Record the crash state */
506 pm_runtime_get_sync(&gpu
->pdev
->dev
);
507 msm_gpu_crashstate_capture(gpu
, submit
, comm
, cmd
);
508 pm_runtime_put_sync(&gpu
->pdev
->dev
);
514 * Update all the rings with the latest and greatest fence.. this
515 * needs to happen after msm_rd_dump_submit() to ensure that the
516 * bo's referenced by the offending submit are still around.
518 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
519 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
521 uint32_t fence
= ring
->memptrs
->fence
;
524 * For the current (faulting?) ring/submit advance the fence by
525 * one more to clear the faulting submit
527 if (ring
== cur_ring
)
530 update_fences(gpu
, ring
, fence
);
533 if (msm_gpu_active(gpu
)) {
534 /* retire completed submits, plus the one that hung: */
537 pm_runtime_get_sync(&gpu
->pdev
->dev
);
538 gpu
->funcs
->recover(gpu
);
539 pm_runtime_put_sync(&gpu
->pdev
->dev
);
542 * Replay all remaining submits starting with highest priority
545 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
546 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
548 spin_lock(&ring
->submit_lock
);
549 list_for_each_entry(submit
, &ring
->submits
, node
)
550 gpu
->funcs
->submit(gpu
, submit
);
551 spin_unlock(&ring
->submit_lock
);
555 mutex_unlock(&dev
->struct_mutex
);
560 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
562 mod_timer(&gpu
->hangcheck_timer
,
563 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
566 static void hangcheck_handler(struct timer_list
*t
)
568 struct msm_gpu
*gpu
= from_timer(gpu
, t
, hangcheck_timer
);
569 struct drm_device
*dev
= gpu
->dev
;
570 struct msm_ringbuffer
*ring
= gpu
->funcs
->active_ring(gpu
);
571 uint32_t fence
= ring
->memptrs
->fence
;
573 if (fence
!= ring
->hangcheck_fence
) {
574 /* some progress has been made.. ya! */
575 ring
->hangcheck_fence
= fence
;
576 } else if (fence
< ring
->seqno
) {
577 /* no progress and not done.. hung! */
578 ring
->hangcheck_fence
= fence
;
579 DRM_DEV_ERROR(dev
->dev
, "%s: hangcheck detected gpu lockup rb %d!\n",
580 gpu
->name
, ring
->id
);
581 DRM_DEV_ERROR(dev
->dev
, "%s: completed fence: %u\n",
583 DRM_DEV_ERROR(dev
->dev
, "%s: submitted fence: %u\n",
584 gpu
->name
, ring
->seqno
);
586 kthread_queue_work(gpu
->worker
, &gpu
->recover_work
);
589 /* if still more pending work, reset the hangcheck timer: */
590 if (ring
->seqno
> ring
->hangcheck_fence
)
591 hangcheck_timer_reset(gpu
);
593 /* workaround for missing irq: */
594 kthread_queue_work(gpu
->worker
, &gpu
->retire_work
);
598 * Performance Counters:
601 /* called under perf_lock */
602 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
604 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
605 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
607 /* read current values: */
608 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
609 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
612 for (i
= 0; i
< n
; i
++)
613 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
615 /* save current values: */
616 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
617 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
622 static void update_sw_cntrs(struct msm_gpu
*gpu
)
628 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
629 if (!gpu
->perfcntr_active
)
633 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
635 gpu
->totaltime
+= elapsed
;
636 if (gpu
->last_sample
.active
)
637 gpu
->activetime
+= elapsed
;
639 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
640 gpu
->last_sample
.time
= time
;
643 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
646 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
650 pm_runtime_get_sync(&gpu
->pdev
->dev
);
652 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
653 /* we could dynamically enable/disable perfcntr registers too.. */
654 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
655 gpu
->last_sample
.time
= ktime_get();
656 gpu
->activetime
= gpu
->totaltime
= 0;
657 gpu
->perfcntr_active
= true;
658 update_hw_cntrs(gpu
, 0, NULL
);
659 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
662 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
664 gpu
->perfcntr_active
= false;
665 pm_runtime_put_sync(&gpu
->pdev
->dev
);
668 /* returns -errno or # of cntrs sampled */
669 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
670 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
675 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
677 if (!gpu
->perfcntr_active
) {
682 *activetime
= gpu
->activetime
;
683 *totaltime
= gpu
->totaltime
;
685 gpu
->activetime
= gpu
->totaltime
= 0;
687 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
690 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
696 * Cmdstream submission/retirement:
699 static void retire_submit(struct msm_gpu
*gpu
, struct msm_ringbuffer
*ring
,
700 struct msm_gem_submit
*submit
)
702 int index
= submit
->seqno
% MSM_GPU_SUBMIT_STATS_COUNT
;
703 volatile struct msm_gpu_submit_stats
*stats
;
704 u64 elapsed
, clock
= 0;
707 stats
= &ring
->memptrs
->stats
[index
];
708 /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */
709 elapsed
= (stats
->alwayson_end
- stats
->alwayson_start
) * 10000;
710 do_div(elapsed
, 192);
712 /* Calculate the clock frequency from the number of CP cycles */
714 clock
= (stats
->cpcycles_end
- stats
->cpcycles_start
) * 1000;
715 do_div(clock
, elapsed
);
718 trace_msm_gpu_submit_retired(submit
, elapsed
, clock
,
719 stats
->alwayson_start
, stats
->alwayson_end
);
721 for (i
= 0; i
< submit
->nr_bos
; i
++) {
722 struct drm_gem_object
*obj
= &submit
->bos
[i
].obj
->base
;
725 msm_gem_active_put(obj
);
726 msm_gem_unpin_iova_locked(obj
, submit
->aspace
);
728 drm_gem_object_put(obj
);
731 pm_runtime_mark_last_busy(&gpu
->pdev
->dev
);
732 pm_runtime_put_autosuspend(&gpu
->pdev
->dev
);
734 spin_lock(&ring
->submit_lock
);
735 list_del(&submit
->node
);
736 spin_unlock(&ring
->submit_lock
);
738 msm_gem_submit_put(submit
);
741 static void retire_submits(struct msm_gpu
*gpu
)
745 /* Retire the commits starting with highest priority */
746 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
747 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
750 struct msm_gem_submit
*submit
= NULL
;
752 spin_lock(&ring
->submit_lock
);
753 submit
= list_first_entry_or_null(&ring
->submits
,
754 struct msm_gem_submit
, node
);
755 spin_unlock(&ring
->submit_lock
);
758 * If no submit, we are done. If submit->fence hasn't
759 * been signalled, then later submits are not signalled
760 * either, so we are also done.
762 if (submit
&& dma_fence_is_signaled(submit
->fence
)) {
763 retire_submit(gpu
, ring
, submit
);
771 static void retire_worker(struct kthread_work
*work
)
773 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
776 for (i
= 0; i
< gpu
->nr_rings
; i
++)
777 update_fences(gpu
, gpu
->rb
[i
], gpu
->rb
[i
]->memptrs
->fence
);
782 /* call from irq handler to schedule work to retire bo's */
783 void msm_gpu_retire(struct msm_gpu
*gpu
)
785 kthread_queue_work(gpu
->worker
, &gpu
->retire_work
);
786 update_sw_cntrs(gpu
);
789 /* add bo's to gpu's ring, and kick gpu: */
790 void msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
)
792 struct drm_device
*dev
= gpu
->dev
;
793 struct msm_drm_private
*priv
= dev
->dev_private
;
794 struct msm_ringbuffer
*ring
= submit
->ring
;
797 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
799 pm_runtime_get_sync(&gpu
->pdev
->dev
);
801 msm_gpu_hw_init(gpu
);
803 submit
->seqno
= ++ring
->seqno
;
805 msm_rd_dump_submit(priv
->rd
, submit
, NULL
);
807 update_sw_cntrs(gpu
);
809 for (i
= 0; i
< submit
->nr_bos
; i
++) {
810 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
811 struct drm_gem_object
*drm_obj
= &msm_obj
->base
;
814 /* submit takes a reference to the bo and iova until retired: */
815 drm_gem_object_get(&msm_obj
->base
);
816 msm_gem_get_and_pin_iova_locked(&msm_obj
->base
, submit
->aspace
, &iova
);
818 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
819 dma_resv_add_excl_fence(drm_obj
->resv
, submit
->fence
);
820 else if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
821 dma_resv_add_shared_fence(drm_obj
->resv
, submit
->fence
);
823 msm_gem_active_get(drm_obj
, gpu
);
827 * ring->submits holds a ref to the submit, to deal with the case
828 * that a submit completes before msm_ioctl_gem_submit() returns.
830 msm_gem_submit_get(submit
);
832 spin_lock(&ring
->submit_lock
);
833 list_add_tail(&submit
->node
, &ring
->submits
);
834 spin_unlock(&ring
->submit_lock
);
836 gpu
->funcs
->submit(gpu
, submit
);
837 priv
->lastctx
= submit
->queue
->ctx
;
839 hangcheck_timer_reset(gpu
);
846 static irqreturn_t
irq_handler(int irq
, void *data
)
848 struct msm_gpu
*gpu
= data
;
849 return gpu
->funcs
->irq(gpu
);
852 static int get_clocks(struct platform_device
*pdev
, struct msm_gpu
*gpu
)
854 int ret
= devm_clk_bulk_get_all(&pdev
->dev
, &gpu
->grp_clks
);
861 gpu
->nr_clocks
= ret
;
863 gpu
->core_clk
= msm_clk_bulk_get_clock(gpu
->grp_clks
,
864 gpu
->nr_clocks
, "core");
866 gpu
->rbbmtimer_clk
= msm_clk_bulk_get_clock(gpu
->grp_clks
,
867 gpu
->nr_clocks
, "rbbmtimer");
872 /* Return a new address space for a msm_drm_private instance */
873 struct msm_gem_address_space
*
874 msm_gpu_create_private_address_space(struct msm_gpu
*gpu
, struct task_struct
*task
)
876 struct msm_gem_address_space
*aspace
= NULL
;
881 * If the target doesn't support private address spaces then return
884 if (gpu
->funcs
->create_private_address_space
) {
885 aspace
= gpu
->funcs
->create_private_address_space(gpu
);
887 aspace
->pid
= get_pid(task_pid(task
));
890 if (IS_ERR_OR_NULL(aspace
))
891 aspace
= msm_gem_address_space_get(gpu
->aspace
);
896 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
897 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
898 const char *name
, struct msm_gpu_config
*config
)
900 int i
, ret
, nr_rings
= config
->nr_rings
;
902 uint64_t memptrs_iova
;
904 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
905 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
911 gpu
->worker
= kthread_create_worker(0, "%s-worker", gpu
->name
);
912 if (IS_ERR(gpu
->worker
)) {
913 ret
= PTR_ERR(gpu
->worker
);
918 sched_set_fifo_low(gpu
->worker
->task
);
920 INIT_LIST_HEAD(&gpu
->active_list
);
921 kthread_init_work(&gpu
->retire_work
, retire_worker
);
922 kthread_init_work(&gpu
->recover_work
, recover_worker
);
924 timer_setup(&gpu
->hangcheck_timer
, hangcheck_handler
, 0);
926 spin_lock_init(&gpu
->perf_lock
);
930 gpu
->mmio
= msm_ioremap(pdev
, config
->ioname
, name
);
931 if (IS_ERR(gpu
->mmio
)) {
932 ret
= PTR_ERR(gpu
->mmio
);
937 gpu
->irq
= platform_get_irq(pdev
, 0);
940 DRM_DEV_ERROR(drm
->dev
, "failed to get irq: %d\n", ret
);
944 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
945 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
947 DRM_DEV_ERROR(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
951 ret
= get_clocks(pdev
, gpu
);
955 gpu
->ebi1_clk
= msm_clk_get(pdev
, "bus");
956 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
957 if (IS_ERR(gpu
->ebi1_clk
))
958 gpu
->ebi1_clk
= NULL
;
960 /* Acquire regulators: */
961 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
962 DBG("gpu_reg: %p", gpu
->gpu_reg
);
963 if (IS_ERR(gpu
->gpu_reg
))
966 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
967 DBG("gpu_cx: %p", gpu
->gpu_cx
);
968 if (IS_ERR(gpu
->gpu_cx
))
972 platform_set_drvdata(pdev
, &gpu
->adreno_smmu
);
974 msm_devfreq_init(gpu
);
977 gpu
->aspace
= gpu
->funcs
->create_address_space(gpu
, pdev
);
979 if (gpu
->aspace
== NULL
)
980 DRM_DEV_INFO(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
981 else if (IS_ERR(gpu
->aspace
)) {
982 ret
= PTR_ERR(gpu
->aspace
);
986 memptrs
= msm_gem_kernel_new(drm
,
987 sizeof(struct msm_rbmemptrs
) * nr_rings
,
988 check_apriv(gpu
, MSM_BO_UNCACHED
), gpu
->aspace
, &gpu
->memptrs_bo
,
991 if (IS_ERR(memptrs
)) {
992 ret
= PTR_ERR(memptrs
);
993 DRM_DEV_ERROR(drm
->dev
, "could not allocate memptrs: %d\n", ret
);
997 msm_gem_object_set_name(gpu
->memptrs_bo
, "memptrs");
999 if (nr_rings
> ARRAY_SIZE(gpu
->rb
)) {
1000 DRM_DEV_INFO_ONCE(drm
->dev
, "Only creating %zu ringbuffers\n",
1001 ARRAY_SIZE(gpu
->rb
));
1002 nr_rings
= ARRAY_SIZE(gpu
->rb
);
1005 /* Create ringbuffer(s): */
1006 for (i
= 0; i
< nr_rings
; i
++) {
1007 gpu
->rb
[i
] = msm_ringbuffer_new(gpu
, i
, memptrs
, memptrs_iova
);
1009 if (IS_ERR(gpu
->rb
[i
])) {
1010 ret
= PTR_ERR(gpu
->rb
[i
]);
1011 DRM_DEV_ERROR(drm
->dev
,
1012 "could not create ringbuffer %d: %d\n", i
, ret
);
1016 memptrs
+= sizeof(struct msm_rbmemptrs
);
1017 memptrs_iova
+= sizeof(struct msm_rbmemptrs
);
1020 gpu
->nr_rings
= nr_rings
;
1025 for (i
= 0; i
< ARRAY_SIZE(gpu
->rb
); i
++) {
1026 msm_ringbuffer_destroy(gpu
->rb
[i
]);
1030 msm_gem_kernel_put(gpu
->memptrs_bo
, gpu
->aspace
, false);
1032 platform_set_drvdata(pdev
, NULL
);
1036 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
1040 DBG("%s", gpu
->name
);
1042 WARN_ON(!list_empty(&gpu
->active_list
));
1044 for (i
= 0; i
< ARRAY_SIZE(gpu
->rb
); i
++) {
1045 msm_ringbuffer_destroy(gpu
->rb
[i
]);
1049 msm_gem_kernel_put(gpu
->memptrs_bo
, gpu
->aspace
, false);
1051 if (!IS_ERR_OR_NULL(gpu
->aspace
)) {
1052 gpu
->aspace
->mmu
->funcs
->detach(gpu
->aspace
->mmu
);
1053 msm_gem_address_space_put(gpu
->aspace
);
1057 kthread_destroy_worker(gpu
->worker
);
1060 devfreq_cooling_unregister(gpu
->cooling
);