2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "msm_fence.h"
23 #include <generated/utsrelease.h>
24 #include <linux/string_helpers.h>
25 #include <linux/pm_opp.h>
26 #include <linux/devfreq.h>
27 #include <linux/devcoredump.h>
28 #include <linux/sched/task.h>
34 static int msm_devfreq_target(struct device
*dev
, unsigned long *freq
,
37 struct msm_gpu
*gpu
= platform_get_drvdata(to_platform_device(dev
));
38 struct dev_pm_opp
*opp
;
40 opp
= devfreq_recommended_opp(dev
, freq
, flags
);
45 clk_set_rate(gpu
->core_clk
, *freq
);
51 static int msm_devfreq_get_dev_status(struct device
*dev
,
52 struct devfreq_dev_status
*status
)
54 struct msm_gpu
*gpu
= platform_get_drvdata(to_platform_device(dev
));
56 u32 freq
= ((u32
) status
->current_frequency
) / 1000000;
59 status
->current_frequency
= (unsigned long) clk_get_rate(gpu
->core_clk
);
60 gpu
->funcs
->gpu_busy(gpu
, &cycles
);
62 status
->busy_time
= ((u32
) (cycles
- gpu
->devfreq
.busy_cycles
)) / freq
;
64 gpu
->devfreq
.busy_cycles
= cycles
;
67 status
->total_time
= ktime_us_delta(time
, gpu
->devfreq
.time
);
68 gpu
->devfreq
.time
= time
;
73 static int msm_devfreq_get_cur_freq(struct device
*dev
, unsigned long *freq
)
75 struct msm_gpu
*gpu
= platform_get_drvdata(to_platform_device(dev
));
77 *freq
= (unsigned long) clk_get_rate(gpu
->core_clk
);
82 static struct devfreq_dev_profile msm_devfreq_profile
= {
84 .target
= msm_devfreq_target
,
85 .get_dev_status
= msm_devfreq_get_dev_status
,
86 .get_cur_freq
= msm_devfreq_get_cur_freq
,
89 static void msm_devfreq_init(struct msm_gpu
*gpu
)
91 /* We need target support to do devfreq */
92 if (!gpu
->funcs
->gpu_busy
|| !gpu
->core_clk
)
95 msm_devfreq_profile
.initial_freq
= gpu
->fast_rate
;
98 * Don't set the freq_table or max_state and let devfreq build the table
102 gpu
->devfreq
.devfreq
= devm_devfreq_add_device(&gpu
->pdev
->dev
,
103 &msm_devfreq_profile
, "simple_ondemand", NULL
);
105 if (IS_ERR(gpu
->devfreq
.devfreq
)) {
106 dev_err(&gpu
->pdev
->dev
, "Couldn't initialize GPU devfreq\n");
107 gpu
->devfreq
.devfreq
= NULL
;
111 static int enable_pwrrail(struct msm_gpu
*gpu
)
113 struct drm_device
*dev
= gpu
->dev
;
117 ret
= regulator_enable(gpu
->gpu_reg
);
119 dev_err(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
125 ret
= regulator_enable(gpu
->gpu_cx
);
127 dev_err(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
135 static int disable_pwrrail(struct msm_gpu
*gpu
)
138 regulator_disable(gpu
->gpu_cx
);
140 regulator_disable(gpu
->gpu_reg
);
144 static int enable_clk(struct msm_gpu
*gpu
)
146 if (gpu
->core_clk
&& gpu
->fast_rate
)
147 clk_set_rate(gpu
->core_clk
, gpu
->fast_rate
);
149 /* Set the RBBM timer rate to 19.2Mhz */
150 if (gpu
->rbbmtimer_clk
)
151 clk_set_rate(gpu
->rbbmtimer_clk
, 19200000);
153 return clk_bulk_prepare_enable(gpu
->nr_clocks
, gpu
->grp_clks
);
156 static int disable_clk(struct msm_gpu
*gpu
)
158 clk_bulk_disable_unprepare(gpu
->nr_clocks
, gpu
->grp_clks
);
161 * Set the clock to a deliberately low rate. On older targets the clock
162 * speed had to be non zero to avoid problems. On newer targets this
163 * will be rounded down to zero anyway so it all works out.
166 clk_set_rate(gpu
->core_clk
, 27000000);
168 if (gpu
->rbbmtimer_clk
)
169 clk_set_rate(gpu
->rbbmtimer_clk
, 0);
174 static int enable_axi(struct msm_gpu
*gpu
)
177 clk_prepare_enable(gpu
->ebi1_clk
);
181 static int disable_axi(struct msm_gpu
*gpu
)
184 clk_disable_unprepare(gpu
->ebi1_clk
);
188 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
192 DBG("%s", gpu
->name
);
194 ret
= enable_pwrrail(gpu
);
198 ret
= enable_clk(gpu
);
202 ret
= enable_axi(gpu
);
206 if (gpu
->devfreq
.devfreq
) {
207 gpu
->devfreq
.busy_cycles
= 0;
208 gpu
->devfreq
.time
= ktime_get();
210 devfreq_resume_device(gpu
->devfreq
.devfreq
);
213 gpu
->needs_hw_init
= true;
218 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
222 DBG("%s", gpu
->name
);
224 if (gpu
->devfreq
.devfreq
)
225 devfreq_suspend_device(gpu
->devfreq
.devfreq
);
227 ret
= disable_axi(gpu
);
231 ret
= disable_clk(gpu
);
235 ret
= disable_pwrrail(gpu
);
242 int msm_gpu_hw_init(struct msm_gpu
*gpu
)
246 WARN_ON(!mutex_is_locked(&gpu
->dev
->struct_mutex
));
248 if (!gpu
->needs_hw_init
)
251 disable_irq(gpu
->irq
);
252 ret
= gpu
->funcs
->hw_init(gpu
);
254 gpu
->needs_hw_init
= false;
255 enable_irq(gpu
->irq
);
260 #ifdef CONFIG_DEV_COREDUMP
261 static ssize_t
msm_gpu_devcoredump_read(char *buffer
, loff_t offset
,
262 size_t count
, void *data
, size_t datalen
)
264 struct msm_gpu
*gpu
= data
;
265 struct drm_print_iterator iter
;
266 struct drm_printer p
;
267 struct msm_gpu_state
*state
;
269 state
= msm_gpu_crashstate_get(gpu
);
278 p
= drm_coredump_printer(&iter
);
280 drm_printf(&p
, "---\n");
281 drm_printf(&p
, "kernel: " UTS_RELEASE
"\n");
282 drm_printf(&p
, "module: " KBUILD_MODNAME
"\n");
283 drm_printf(&p
, "time: %lld.%09ld\n",
284 state
->time
.tv_sec
, state
->time
.tv_nsec
);
286 drm_printf(&p
, "comm: %s\n", state
->comm
);
288 drm_printf(&p
, "cmdline: %s\n", state
->cmd
);
290 gpu
->funcs
->show(gpu
, state
, &p
);
292 msm_gpu_crashstate_put(gpu
);
294 return count
- iter
.remain
;
297 static void msm_gpu_devcoredump_free(void *data
)
299 struct msm_gpu
*gpu
= data
;
301 msm_gpu_crashstate_put(gpu
);
304 static void msm_gpu_crashstate_get_bo(struct msm_gpu_state
*state
,
305 struct msm_gem_object
*obj
, u64 iova
, u32 flags
)
307 struct msm_gpu_state_bo
*state_bo
= &state
->bos
[state
->nr_bos
];
309 /* Don't record write only objects */
311 state_bo
->size
= obj
->base
.size
;
312 state_bo
->iova
= iova
;
314 /* Only store the data for buffer objects marked for read */
315 if ((flags
& MSM_SUBMIT_BO_READ
)) {
318 state_bo
->data
= kvmalloc(obj
->base
.size
, GFP_KERNEL
);
322 ptr
= msm_gem_get_vaddr_active(&obj
->base
);
324 kvfree(state_bo
->data
);
328 memcpy(state_bo
->data
, ptr
, obj
->base
.size
);
329 msm_gem_put_vaddr(&obj
->base
);
335 static void msm_gpu_crashstate_capture(struct msm_gpu
*gpu
,
336 struct msm_gem_submit
*submit
, char *comm
, char *cmd
)
338 struct msm_gpu_state
*state
;
340 /* Only save one crash state at a time */
344 state
= gpu
->funcs
->gpu_state_get(gpu
);
345 if (IS_ERR_OR_NULL(state
))
348 /* Fill in the additional crash state information */
349 state
->comm
= kstrdup(comm
, GFP_KERNEL
);
350 state
->cmd
= kstrdup(cmd
, GFP_KERNEL
);
355 state
->bos
= kcalloc(submit
->nr_bos
,
356 sizeof(struct msm_gpu_state_bo
), GFP_KERNEL
);
358 for (i
= 0; state
->bos
&& i
< submit
->nr_bos
; i
++)
359 msm_gpu_crashstate_get_bo(state
, submit
->bos
[i
].obj
,
360 submit
->bos
[i
].iova
, submit
->bos
[i
].flags
);
363 /* Set the active crash state to be dumped on failure */
364 gpu
->crashstate
= state
;
366 /* FIXME: Release the crashstate if this errors out? */
367 dev_coredumpm(gpu
->dev
->dev
, THIS_MODULE
, gpu
, 0, GFP_KERNEL
,
368 msm_gpu_devcoredump_read
, msm_gpu_devcoredump_free
);
371 static void msm_gpu_crashstate_capture(struct msm_gpu
*gpu
,
372 struct msm_gem_submit
*submit
, char *comm
, char *cmd
)
378 * Hangcheck detection for locked gpu:
381 static void update_fences(struct msm_gpu
*gpu
, struct msm_ringbuffer
*ring
,
384 struct msm_gem_submit
*submit
;
386 list_for_each_entry(submit
, &ring
->submits
, node
) {
387 if (submit
->seqno
> fence
)
390 msm_update_fence(submit
->ring
->fctx
,
391 submit
->fence
->seqno
);
395 static struct msm_gem_submit
*
396 find_submit(struct msm_ringbuffer
*ring
, uint32_t fence
)
398 struct msm_gem_submit
*submit
;
400 WARN_ON(!mutex_is_locked(&ring
->gpu
->dev
->struct_mutex
));
402 list_for_each_entry(submit
, &ring
->submits
, node
)
403 if (submit
->seqno
== fence
)
409 static void retire_submits(struct msm_gpu
*gpu
);
411 static void recover_worker(struct work_struct
*work
)
413 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
414 struct drm_device
*dev
= gpu
->dev
;
415 struct msm_drm_private
*priv
= dev
->dev_private
;
416 struct msm_gem_submit
*submit
;
417 struct msm_ringbuffer
*cur_ring
= gpu
->funcs
->active_ring(gpu
);
418 char *comm
= NULL
, *cmd
= NULL
;
421 mutex_lock(&dev
->struct_mutex
);
423 dev_err(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
425 submit
= find_submit(cur_ring
, cur_ring
->memptrs
->fence
+ 1);
427 struct task_struct
*task
;
429 task
= get_pid_task(submit
->pid
, PIDTYPE_PID
);
431 comm
= kstrdup(task
->comm
, GFP_KERNEL
);
434 * So slightly annoying, in other paths like
435 * mmap'ing gem buffers, mmap_sem is acquired
436 * before struct_mutex, which means we can't
437 * hold struct_mutex across the call to
438 * get_cmdline(). But submits are retired
439 * from the same in-order workqueue, so we can
440 * safely drop the lock here without worrying
441 * about the submit going away.
443 mutex_unlock(&dev
->struct_mutex
);
444 cmd
= kstrdup_quotable_cmdline(task
, GFP_KERNEL
);
445 put_task_struct(task
);
446 mutex_lock(&dev
->struct_mutex
);
450 dev_err(dev
->dev
, "%s: offending task: %s (%s)\n",
451 gpu
->name
, comm
, cmd
);
453 msm_rd_dump_submit(priv
->hangrd
, submit
,
454 "offending task: %s (%s)", comm
, cmd
);
456 msm_rd_dump_submit(priv
->hangrd
, submit
, NULL
);
459 /* Record the crash state */
460 pm_runtime_get_sync(&gpu
->pdev
->dev
);
461 msm_gpu_crashstate_capture(gpu
, submit
, comm
, cmd
);
462 pm_runtime_put_sync(&gpu
->pdev
->dev
);
468 * Update all the rings with the latest and greatest fence.. this
469 * needs to happen after msm_rd_dump_submit() to ensure that the
470 * bo's referenced by the offending submit are still around.
472 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
473 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
475 uint32_t fence
= ring
->memptrs
->fence
;
478 * For the current (faulting?) ring/submit advance the fence by
479 * one more to clear the faulting submit
481 if (ring
== cur_ring
)
484 update_fences(gpu
, ring
, fence
);
487 if (msm_gpu_active(gpu
)) {
488 /* retire completed submits, plus the one that hung: */
491 pm_runtime_get_sync(&gpu
->pdev
->dev
);
492 gpu
->funcs
->recover(gpu
);
493 pm_runtime_put_sync(&gpu
->pdev
->dev
);
496 * Replay all remaining submits starting with highest priority
499 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
500 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
502 list_for_each_entry(submit
, &ring
->submits
, node
)
503 gpu
->funcs
->submit(gpu
, submit
, NULL
);
507 mutex_unlock(&dev
->struct_mutex
);
512 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
514 DBG("%s", gpu
->name
);
515 mod_timer(&gpu
->hangcheck_timer
,
516 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
519 static void hangcheck_handler(struct timer_list
*t
)
521 struct msm_gpu
*gpu
= from_timer(gpu
, t
, hangcheck_timer
);
522 struct drm_device
*dev
= gpu
->dev
;
523 struct msm_drm_private
*priv
= dev
->dev_private
;
524 struct msm_ringbuffer
*ring
= gpu
->funcs
->active_ring(gpu
);
525 uint32_t fence
= ring
->memptrs
->fence
;
527 if (fence
!= ring
->hangcheck_fence
) {
528 /* some progress has been made.. ya! */
529 ring
->hangcheck_fence
= fence
;
530 } else if (fence
< ring
->seqno
) {
531 /* no progress and not done.. hung! */
532 ring
->hangcheck_fence
= fence
;
533 dev_err(dev
->dev
, "%s: hangcheck detected gpu lockup rb %d!\n",
534 gpu
->name
, ring
->id
);
535 dev_err(dev
->dev
, "%s: completed fence: %u\n",
537 dev_err(dev
->dev
, "%s: submitted fence: %u\n",
538 gpu
->name
, ring
->seqno
);
540 queue_work(priv
->wq
, &gpu
->recover_work
);
543 /* if still more pending work, reset the hangcheck timer: */
544 if (ring
->seqno
> ring
->hangcheck_fence
)
545 hangcheck_timer_reset(gpu
);
547 /* workaround for missing irq: */
548 queue_work(priv
->wq
, &gpu
->retire_work
);
552 * Performance Counters:
555 /* called under perf_lock */
556 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
558 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
559 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
561 /* read current values: */
562 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
563 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
566 for (i
= 0; i
< n
; i
++)
567 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
569 /* save current values: */
570 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
571 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
576 static void update_sw_cntrs(struct msm_gpu
*gpu
)
582 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
583 if (!gpu
->perfcntr_active
)
587 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
589 gpu
->totaltime
+= elapsed
;
590 if (gpu
->last_sample
.active
)
591 gpu
->activetime
+= elapsed
;
593 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
594 gpu
->last_sample
.time
= time
;
597 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
600 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
604 pm_runtime_get_sync(&gpu
->pdev
->dev
);
606 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
607 /* we could dynamically enable/disable perfcntr registers too.. */
608 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
609 gpu
->last_sample
.time
= ktime_get();
610 gpu
->activetime
= gpu
->totaltime
= 0;
611 gpu
->perfcntr_active
= true;
612 update_hw_cntrs(gpu
, 0, NULL
);
613 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
616 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
618 gpu
->perfcntr_active
= false;
619 pm_runtime_put_sync(&gpu
->pdev
->dev
);
622 /* returns -errno or # of cntrs sampled */
623 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
624 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
629 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
631 if (!gpu
->perfcntr_active
) {
636 *activetime
= gpu
->activetime
;
637 *totaltime
= gpu
->totaltime
;
639 gpu
->activetime
= gpu
->totaltime
= 0;
641 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
644 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
650 * Cmdstream submission/retirement:
653 static void retire_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
)
657 for (i
= 0; i
< submit
->nr_bos
; i
++) {
658 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
659 /* move to inactive: */
660 msm_gem_move_to_inactive(&msm_obj
->base
);
661 msm_gem_put_iova(&msm_obj
->base
, gpu
->aspace
);
662 drm_gem_object_put(&msm_obj
->base
);
665 pm_runtime_mark_last_busy(&gpu
->pdev
->dev
);
666 pm_runtime_put_autosuspend(&gpu
->pdev
->dev
);
667 msm_gem_submit_free(submit
);
670 static void retire_submits(struct msm_gpu
*gpu
)
672 struct drm_device
*dev
= gpu
->dev
;
673 struct msm_gem_submit
*submit
, *tmp
;
676 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
678 /* Retire the commits starting with highest priority */
679 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
680 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
682 list_for_each_entry_safe(submit
, tmp
, &ring
->submits
, node
) {
683 if (dma_fence_is_signaled(submit
->fence
))
684 retire_submit(gpu
, submit
);
689 static void retire_worker(struct work_struct
*work
)
691 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
692 struct drm_device
*dev
= gpu
->dev
;
695 for (i
= 0; i
< gpu
->nr_rings
; i
++)
696 update_fences(gpu
, gpu
->rb
[i
], gpu
->rb
[i
]->memptrs
->fence
);
698 mutex_lock(&dev
->struct_mutex
);
700 mutex_unlock(&dev
->struct_mutex
);
703 /* call from irq handler to schedule work to retire bo's */
704 void msm_gpu_retire(struct msm_gpu
*gpu
)
706 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
707 queue_work(priv
->wq
, &gpu
->retire_work
);
708 update_sw_cntrs(gpu
);
711 /* add bo's to gpu's ring, and kick gpu: */
712 void msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
713 struct msm_file_private
*ctx
)
715 struct drm_device
*dev
= gpu
->dev
;
716 struct msm_drm_private
*priv
= dev
->dev_private
;
717 struct msm_ringbuffer
*ring
= submit
->ring
;
720 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
722 pm_runtime_get_sync(&gpu
->pdev
->dev
);
724 msm_gpu_hw_init(gpu
);
726 submit
->seqno
= ++ring
->seqno
;
728 list_add_tail(&submit
->node
, &ring
->submits
);
730 msm_rd_dump_submit(priv
->rd
, submit
, NULL
);
732 update_sw_cntrs(gpu
);
734 for (i
= 0; i
< submit
->nr_bos
; i
++) {
735 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
738 /* can't happen yet.. but when we add 2d support we'll have
739 * to deal w/ cross-ring synchronization:
741 WARN_ON(is_active(msm_obj
) && (msm_obj
->gpu
!= gpu
));
743 /* submit takes a reference to the bo and iova until retired: */
744 drm_gem_object_get(&msm_obj
->base
);
745 msm_gem_get_iova(&msm_obj
->base
,
746 submit
->gpu
->aspace
, &iova
);
748 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
749 msm_gem_move_to_active(&msm_obj
->base
, gpu
, true, submit
->fence
);
750 else if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
751 msm_gem_move_to_active(&msm_obj
->base
, gpu
, false, submit
->fence
);
754 gpu
->funcs
->submit(gpu
, submit
, ctx
);
757 hangcheck_timer_reset(gpu
);
764 static irqreturn_t
irq_handler(int irq
, void *data
)
766 struct msm_gpu
*gpu
= data
;
767 return gpu
->funcs
->irq(gpu
);
770 static int get_clocks(struct platform_device
*pdev
, struct msm_gpu
*gpu
)
772 int ret
= msm_clk_bulk_get(&pdev
->dev
, &gpu
->grp_clks
);
779 gpu
->nr_clocks
= ret
;
781 gpu
->core_clk
= msm_clk_bulk_get_clock(gpu
->grp_clks
,
782 gpu
->nr_clocks
, "core");
784 gpu
->rbbmtimer_clk
= msm_clk_bulk_get_clock(gpu
->grp_clks
,
785 gpu
->nr_clocks
, "rbbmtimer");
790 static struct msm_gem_address_space
*
791 msm_gpu_create_address_space(struct msm_gpu
*gpu
, struct platform_device
*pdev
,
792 uint64_t va_start
, uint64_t va_end
)
794 struct iommu_domain
*iommu
;
795 struct msm_gem_address_space
*aspace
;
799 * Setup IOMMU.. eventually we will (I think) do this once per context
800 * and have separate page tables per context. For now, to keep things
801 * simple and to get something working, just use a single address space:
803 iommu
= iommu_domain_alloc(&platform_bus_type
);
807 iommu
->geometry
.aperture_start
= va_start
;
808 iommu
->geometry
.aperture_end
= va_end
;
810 dev_info(gpu
->dev
->dev
, "%s: using IOMMU\n", gpu
->name
);
812 aspace
= msm_gem_address_space_create(&pdev
->dev
, iommu
, "gpu");
813 if (IS_ERR(aspace
)) {
814 dev_err(gpu
->dev
->dev
, "failed to init iommu: %ld\n",
816 iommu_domain_free(iommu
);
817 return ERR_CAST(aspace
);
820 ret
= aspace
->mmu
->funcs
->attach(aspace
->mmu
, NULL
, 0);
822 msm_gem_address_space_put(aspace
);
829 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
830 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
831 const char *name
, struct msm_gpu_config
*config
)
833 int i
, ret
, nr_rings
= config
->nr_rings
;
835 uint64_t memptrs_iova
;
837 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
838 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
844 INIT_LIST_HEAD(&gpu
->active_list
);
845 INIT_WORK(&gpu
->retire_work
, retire_worker
);
846 INIT_WORK(&gpu
->recover_work
, recover_worker
);
849 timer_setup(&gpu
->hangcheck_timer
, hangcheck_handler
, 0);
851 spin_lock_init(&gpu
->perf_lock
);
855 gpu
->mmio
= msm_ioremap(pdev
, config
->ioname
, name
);
856 if (IS_ERR(gpu
->mmio
)) {
857 ret
= PTR_ERR(gpu
->mmio
);
862 gpu
->irq
= platform_get_irq_byname(pdev
, config
->irqname
);
865 dev_err(drm
->dev
, "failed to get irq: %d\n", ret
);
869 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
870 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
872 dev_err(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
876 ret
= get_clocks(pdev
, gpu
);
880 gpu
->ebi1_clk
= msm_clk_get(pdev
, "bus");
881 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
882 if (IS_ERR(gpu
->ebi1_clk
))
883 gpu
->ebi1_clk
= NULL
;
885 /* Acquire regulators: */
886 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
887 DBG("gpu_reg: %p", gpu
->gpu_reg
);
888 if (IS_ERR(gpu
->gpu_reg
))
891 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
892 DBG("gpu_cx: %p", gpu
->gpu_cx
);
893 if (IS_ERR(gpu
->gpu_cx
))
897 platform_set_drvdata(pdev
, gpu
);
899 msm_devfreq_init(gpu
);
901 gpu
->aspace
= msm_gpu_create_address_space(gpu
, pdev
,
902 config
->va_start
, config
->va_end
);
904 if (gpu
->aspace
== NULL
)
905 dev_info(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
906 else if (IS_ERR(gpu
->aspace
)) {
907 ret
= PTR_ERR(gpu
->aspace
);
911 memptrs
= msm_gem_kernel_new(drm
, sizeof(*gpu
->memptrs_bo
),
912 MSM_BO_UNCACHED
, gpu
->aspace
, &gpu
->memptrs_bo
,
915 if (IS_ERR(memptrs
)) {
916 ret
= PTR_ERR(memptrs
);
917 dev_err(drm
->dev
, "could not allocate memptrs: %d\n", ret
);
921 if (nr_rings
> ARRAY_SIZE(gpu
->rb
)) {
922 DRM_DEV_INFO_ONCE(drm
->dev
, "Only creating %zu ringbuffers\n",
923 ARRAY_SIZE(gpu
->rb
));
924 nr_rings
= ARRAY_SIZE(gpu
->rb
);
927 /* Create ringbuffer(s): */
928 for (i
= 0; i
< nr_rings
; i
++) {
929 gpu
->rb
[i
] = msm_ringbuffer_new(gpu
, i
, memptrs
, memptrs_iova
);
931 if (IS_ERR(gpu
->rb
[i
])) {
932 ret
= PTR_ERR(gpu
->rb
[i
]);
934 "could not create ringbuffer %d: %d\n", i
, ret
);
938 memptrs
+= sizeof(struct msm_rbmemptrs
);
939 memptrs_iova
+= sizeof(struct msm_rbmemptrs
);
942 gpu
->nr_rings
= nr_rings
;
947 for (i
= 0; i
< ARRAY_SIZE(gpu
->rb
); i
++) {
948 msm_ringbuffer_destroy(gpu
->rb
[i
]);
952 if (gpu
->memptrs_bo
) {
953 msm_gem_put_vaddr(gpu
->memptrs_bo
);
954 msm_gem_put_iova(gpu
->memptrs_bo
, gpu
->aspace
);
955 drm_gem_object_put_unlocked(gpu
->memptrs_bo
);
958 platform_set_drvdata(pdev
, NULL
);
962 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
966 DBG("%s", gpu
->name
);
968 WARN_ON(!list_empty(&gpu
->active_list
));
970 for (i
= 0; i
< ARRAY_SIZE(gpu
->rb
); i
++) {
971 msm_ringbuffer_destroy(gpu
->rb
[i
]);
975 if (gpu
->memptrs_bo
) {
976 msm_gem_put_vaddr(gpu
->memptrs_bo
);
977 msm_gem_put_iova(gpu
->memptrs_bo
, gpu
->aspace
);
978 drm_gem_object_put_unlocked(gpu
->memptrs_bo
);
981 if (!IS_ERR_OR_NULL(gpu
->aspace
)) {
982 gpu
->aspace
->mmu
->funcs
->detach(gpu
->aspace
->mmu
,
984 msm_gem_address_space_put(gpu
->aspace
);