2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "msm_fence.h"
28 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
29 #include <mach/board.h>
30 static void bs_init(struct msm_gpu
*gpu
)
32 if (gpu
->bus_scale_table
) {
33 gpu
->bsc
= msm_bus_scale_register_client(gpu
->bus_scale_table
);
34 DBG("bus scale client: %08x", gpu
->bsc
);
38 static void bs_fini(struct msm_gpu
*gpu
)
41 msm_bus_scale_unregister_client(gpu
->bsc
);
46 static void bs_set(struct msm_gpu
*gpu
, int idx
)
49 DBG("set bus scaling: %d", idx
);
50 msm_bus_scale_client_update_request(gpu
->bsc
, idx
);
54 static void bs_init(struct msm_gpu
*gpu
) {}
55 static void bs_fini(struct msm_gpu
*gpu
) {}
56 static void bs_set(struct msm_gpu
*gpu
, int idx
) {}
59 static int enable_pwrrail(struct msm_gpu
*gpu
)
61 struct drm_device
*dev
= gpu
->dev
;
65 ret
= regulator_enable(gpu
->gpu_reg
);
67 dev_err(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
73 ret
= regulator_enable(gpu
->gpu_cx
);
75 dev_err(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
83 static int disable_pwrrail(struct msm_gpu
*gpu
)
86 regulator_disable(gpu
->gpu_cx
);
88 regulator_disable(gpu
->gpu_reg
);
92 static int enable_clk(struct msm_gpu
*gpu
)
94 struct clk
*rate_clk
= NULL
;
97 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
98 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--) {
99 if (gpu
->grp_clks
[i
]) {
100 clk_prepare(gpu
->grp_clks
[i
]);
101 rate_clk
= gpu
->grp_clks
[i
];
105 if (rate_clk
&& gpu
->fast_rate
)
106 clk_set_rate(rate_clk
, gpu
->fast_rate
);
108 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--)
109 if (gpu
->grp_clks
[i
])
110 clk_enable(gpu
->grp_clks
[i
]);
115 static int disable_clk(struct msm_gpu
*gpu
)
117 struct clk
*rate_clk
= NULL
;
120 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
121 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--) {
122 if (gpu
->grp_clks
[i
]) {
123 clk_disable(gpu
->grp_clks
[i
]);
124 rate_clk
= gpu
->grp_clks
[i
];
128 if (rate_clk
&& gpu
->slow_rate
)
129 clk_set_rate(rate_clk
, gpu
->slow_rate
);
131 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--)
132 if (gpu
->grp_clks
[i
])
133 clk_unprepare(gpu
->grp_clks
[i
]);
138 static int enable_axi(struct msm_gpu
*gpu
)
141 clk_prepare_enable(gpu
->ebi1_clk
);
143 bs_set(gpu
, gpu
->bus_freq
);
147 static int disable_axi(struct msm_gpu
*gpu
)
150 clk_disable_unprepare(gpu
->ebi1_clk
);
156 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
158 struct drm_device
*dev
= gpu
->dev
;
161 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
163 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
165 if (gpu
->active_cnt
++ > 0)
168 if (WARN_ON(gpu
->active_cnt
<= 0))
171 ret
= enable_pwrrail(gpu
);
175 ret
= enable_clk(gpu
);
179 ret
= enable_axi(gpu
);
186 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
188 struct drm_device
*dev
= gpu
->dev
;
191 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
193 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
195 if (--gpu
->active_cnt
> 0)
198 if (WARN_ON(gpu
->active_cnt
< 0))
201 ret
= disable_axi(gpu
);
205 ret
= disable_clk(gpu
);
209 ret
= disable_pwrrail(gpu
);
217 * Inactivity detection (for suspend):
220 static void inactive_worker(struct work_struct
*work
)
222 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, inactive_work
);
223 struct drm_device
*dev
= gpu
->dev
;
228 DBG("%s: inactive!\n", gpu
->name
);
229 mutex_lock(&dev
->struct_mutex
);
230 if (!(msm_gpu_active(gpu
) || gpu
->inactive
)) {
233 gpu
->inactive
= true;
235 mutex_unlock(&dev
->struct_mutex
);
238 static void inactive_handler(unsigned long data
)
240 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
241 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
243 queue_work(priv
->wq
, &gpu
->inactive_work
);
246 /* cancel inactive timer and make sure we are awake: */
247 static void inactive_cancel(struct msm_gpu
*gpu
)
249 DBG("%s", gpu
->name
);
250 del_timer(&gpu
->inactive_timer
);
254 gpu
->inactive
= false;
258 static void inactive_start(struct msm_gpu
*gpu
)
260 DBG("%s", gpu
->name
);
261 mod_timer(&gpu
->inactive_timer
,
262 round_jiffies_up(jiffies
+ DRM_MSM_INACTIVE_JIFFIES
));
266 * Hangcheck detection for locked gpu:
269 static void retire_submits(struct msm_gpu
*gpu
);
271 static void recover_worker(struct work_struct
*work
)
273 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
274 struct drm_device
*dev
= gpu
->dev
;
275 struct msm_gem_submit
*submit
;
276 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
278 msm_update_fence(gpu
->fctx
, fence
+ 1);
280 mutex_lock(&dev
->struct_mutex
);
282 dev_err(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
283 list_for_each_entry(submit
, &gpu
->submit_list
, node
) {
284 if (submit
->fence
->seqno
== (fence
+ 1)) {
285 struct task_struct
*task
;
288 task
= pid_task(submit
->pid
, PIDTYPE_PID
);
290 dev_err(dev
->dev
, "%s: offending task: %s\n",
291 gpu
->name
, task
->comm
);
298 if (msm_gpu_active(gpu
)) {
299 /* retire completed submits, plus the one that hung: */
302 inactive_cancel(gpu
);
303 gpu
->funcs
->recover(gpu
);
305 /* replay the remaining submits after the one that hung: */
306 list_for_each_entry(submit
, &gpu
->submit_list
, node
) {
307 gpu
->funcs
->submit(gpu
, submit
, NULL
);
311 mutex_unlock(&dev
->struct_mutex
);
316 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
318 DBG("%s", gpu
->name
);
319 mod_timer(&gpu
->hangcheck_timer
,
320 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
323 static void hangcheck_handler(unsigned long data
)
325 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
326 struct drm_device
*dev
= gpu
->dev
;
327 struct msm_drm_private
*priv
= dev
->dev_private
;
328 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
330 if (fence
!= gpu
->hangcheck_fence
) {
331 /* some progress has been made.. ya! */
332 gpu
->hangcheck_fence
= fence
;
333 } else if (fence
< gpu
->fctx
->last_fence
) {
334 /* no progress and not done.. hung! */
335 gpu
->hangcheck_fence
= fence
;
336 dev_err(dev
->dev
, "%s: hangcheck detected gpu lockup!\n",
338 dev_err(dev
->dev
, "%s: completed fence: %u\n",
340 dev_err(dev
->dev
, "%s: submitted fence: %u\n",
341 gpu
->name
, gpu
->fctx
->last_fence
);
342 queue_work(priv
->wq
, &gpu
->recover_work
);
345 /* if still more pending work, reset the hangcheck timer: */
346 if (gpu
->fctx
->last_fence
> gpu
->hangcheck_fence
)
347 hangcheck_timer_reset(gpu
);
349 /* workaround for missing irq: */
350 queue_work(priv
->wq
, &gpu
->retire_work
);
354 * Performance Counters:
357 /* called under perf_lock */
358 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
360 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
361 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
363 /* read current values: */
364 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
365 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
368 for (i
= 0; i
< n
; i
++)
369 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
371 /* save current values: */
372 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
373 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
378 static void update_sw_cntrs(struct msm_gpu
*gpu
)
384 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
385 if (!gpu
->perfcntr_active
)
389 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
391 gpu
->totaltime
+= elapsed
;
392 if (gpu
->last_sample
.active
)
393 gpu
->activetime
+= elapsed
;
395 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
396 gpu
->last_sample
.time
= time
;
399 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
402 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
406 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
407 /* we could dynamically enable/disable perfcntr registers too.. */
408 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
409 gpu
->last_sample
.time
= ktime_get();
410 gpu
->activetime
= gpu
->totaltime
= 0;
411 gpu
->perfcntr_active
= true;
412 update_hw_cntrs(gpu
, 0, NULL
);
413 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
416 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
418 gpu
->perfcntr_active
= false;
421 /* returns -errno or # of cntrs sampled */
422 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
423 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
428 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
430 if (!gpu
->perfcntr_active
) {
435 *activetime
= gpu
->activetime
;
436 *totaltime
= gpu
->totaltime
;
438 gpu
->activetime
= gpu
->totaltime
= 0;
440 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
443 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
449 * Cmdstream submission/retirement:
452 static void retire_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
)
456 for (i
= 0; i
< submit
->nr_bos
; i
++) {
457 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
458 /* move to inactive: */
459 msm_gem_move_to_inactive(&msm_obj
->base
);
460 msm_gem_put_iova(&msm_obj
->base
, gpu
->id
);
461 drm_gem_object_unreference(&msm_obj
->base
);
464 msm_gem_submit_free(submit
);
467 static void retire_submits(struct msm_gpu
*gpu
)
469 struct drm_device
*dev
= gpu
->dev
;
471 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
473 while (!list_empty(&gpu
->submit_list
)) {
474 struct msm_gem_submit
*submit
;
476 submit
= list_first_entry(&gpu
->submit_list
,
477 struct msm_gem_submit
, node
);
479 if (fence_is_signaled(submit
->fence
)) {
480 retire_submit(gpu
, submit
);
487 static void retire_worker(struct work_struct
*work
)
489 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
490 struct drm_device
*dev
= gpu
->dev
;
491 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
493 msm_update_fence(gpu
->fctx
, fence
);
495 mutex_lock(&dev
->struct_mutex
);
497 mutex_unlock(&dev
->struct_mutex
);
499 if (!msm_gpu_active(gpu
))
503 /* call from irq handler to schedule work to retire bo's */
504 void msm_gpu_retire(struct msm_gpu
*gpu
)
506 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
507 queue_work(priv
->wq
, &gpu
->retire_work
);
508 update_sw_cntrs(gpu
);
511 /* add bo's to gpu's ring, and kick gpu: */
512 void msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
513 struct msm_file_private
*ctx
)
515 struct drm_device
*dev
= gpu
->dev
;
516 struct msm_drm_private
*priv
= dev
->dev_private
;
519 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
521 inactive_cancel(gpu
);
523 list_add_tail(&submit
->node
, &gpu
->submit_list
);
525 msm_rd_dump_submit(submit
);
527 update_sw_cntrs(gpu
);
529 for (i
= 0; i
< submit
->nr_bos
; i
++) {
530 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
533 /* can't happen yet.. but when we add 2d support we'll have
534 * to deal w/ cross-ring synchronization:
536 WARN_ON(is_active(msm_obj
) && (msm_obj
->gpu
!= gpu
));
538 /* submit takes a reference to the bo and iova until retired: */
539 drm_gem_object_reference(&msm_obj
->base
);
540 msm_gem_get_iova_locked(&msm_obj
->base
,
541 submit
->gpu
->id
, &iova
);
543 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
544 msm_gem_move_to_active(&msm_obj
->base
, gpu
, true, submit
->fence
);
545 else if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
546 msm_gem_move_to_active(&msm_obj
->base
, gpu
, false, submit
->fence
);
549 gpu
->funcs
->submit(gpu
, submit
, ctx
);
552 hangcheck_timer_reset(gpu
);
559 static irqreturn_t
irq_handler(int irq
, void *data
)
561 struct msm_gpu
*gpu
= data
;
562 return gpu
->funcs
->irq(gpu
);
565 static const char *clk_names
[] = {
566 "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk",
570 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
571 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
572 const char *name
, const char *ioname
, const char *irqname
, int ringsz
)
574 struct iommu_domain
*iommu
;
577 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
578 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
583 gpu
->inactive
= true;
584 gpu
->fctx
= msm_fence_context_alloc(drm
, name
);
585 if (IS_ERR(gpu
->fctx
)) {
586 ret
= PTR_ERR(gpu
->fctx
);
591 INIT_LIST_HEAD(&gpu
->active_list
);
592 INIT_WORK(&gpu
->retire_work
, retire_worker
);
593 INIT_WORK(&gpu
->inactive_work
, inactive_worker
);
594 INIT_WORK(&gpu
->recover_work
, recover_worker
);
596 INIT_LIST_HEAD(&gpu
->submit_list
);
598 setup_timer(&gpu
->inactive_timer
, inactive_handler
,
600 setup_timer(&gpu
->hangcheck_timer
, hangcheck_handler
,
603 spin_lock_init(&gpu
->perf_lock
);
605 BUG_ON(ARRAY_SIZE(clk_names
) != ARRAY_SIZE(gpu
->grp_clks
));
608 gpu
->mmio
= msm_ioremap(pdev
, ioname
, name
);
609 if (IS_ERR(gpu
->mmio
)) {
610 ret
= PTR_ERR(gpu
->mmio
);
615 gpu
->irq
= platform_get_irq_byname(pdev
, irqname
);
618 dev_err(drm
->dev
, "failed to get irq: %d\n", ret
);
622 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
623 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
625 dev_err(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
629 /* Acquire clocks: */
630 for (i
= 0; i
< ARRAY_SIZE(clk_names
); i
++) {
631 gpu
->grp_clks
[i
] = devm_clk_get(&pdev
->dev
, clk_names
[i
]);
632 DBG("grp_clks[%s]: %p", clk_names
[i
], gpu
->grp_clks
[i
]);
633 if (IS_ERR(gpu
->grp_clks
[i
]))
634 gpu
->grp_clks
[i
] = NULL
;
637 gpu
->ebi1_clk
= devm_clk_get(&pdev
->dev
, "bus_clk");
638 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
639 if (IS_ERR(gpu
->ebi1_clk
))
640 gpu
->ebi1_clk
= NULL
;
642 /* Acquire regulators: */
643 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
644 DBG("gpu_reg: %p", gpu
->gpu_reg
);
645 if (IS_ERR(gpu
->gpu_reg
))
648 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
649 DBG("gpu_cx: %p", gpu
->gpu_cx
);
650 if (IS_ERR(gpu
->gpu_cx
))
653 /* Setup IOMMU.. eventually we will (I think) do this once per context
654 * and have separate page tables per context. For now, to keep things
655 * simple and to get something working, just use a single address space:
657 iommu
= iommu_domain_alloc(&platform_bus_type
);
659 dev_info(drm
->dev
, "%s: using IOMMU\n", name
);
660 gpu
->mmu
= msm_iommu_new(&pdev
->dev
, iommu
);
661 if (IS_ERR(gpu
->mmu
)) {
662 ret
= PTR_ERR(gpu
->mmu
);
663 dev_err(drm
->dev
, "failed to init iommu: %d\n", ret
);
665 iommu_domain_free(iommu
);
670 dev_info(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
672 gpu
->id
= msm_register_mmu(drm
, gpu
->mmu
);
675 /* Create ringbuffer: */
676 mutex_lock(&drm
->struct_mutex
);
677 gpu
->rb
= msm_ringbuffer_new(gpu
, ringsz
);
678 mutex_unlock(&drm
->struct_mutex
);
679 if (IS_ERR(gpu
->rb
)) {
680 ret
= PTR_ERR(gpu
->rb
);
682 dev_err(drm
->dev
, "could not create ringbuffer: %d\n", ret
);
694 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
696 DBG("%s", gpu
->name
);
698 WARN_ON(!list_empty(&gpu
->active_list
));
704 msm_gem_put_iova(gpu
->rb
->bo
, gpu
->id
);
705 msm_ringbuffer_destroy(gpu
->rb
);
709 gpu
->mmu
->funcs
->destroy(gpu
->mmu
);
712 msm_fence_context_free(gpu
->fctx
);