2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
27 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
28 #include <mach/board.h>
29 static void bs_init(struct msm_gpu
*gpu
)
31 if (gpu
->bus_scale_table
) {
32 gpu
->bsc
= msm_bus_scale_register_client(gpu
->bus_scale_table
);
33 DBG("bus scale client: %08x", gpu
->bsc
);
37 static void bs_fini(struct msm_gpu
*gpu
)
40 msm_bus_scale_unregister_client(gpu
->bsc
);
45 static void bs_set(struct msm_gpu
*gpu
, int idx
)
48 DBG("set bus scaling: %d", idx
);
49 msm_bus_scale_client_update_request(gpu
->bsc
, idx
);
53 static void bs_init(struct msm_gpu
*gpu
) {}
54 static void bs_fini(struct msm_gpu
*gpu
) {}
55 static void bs_set(struct msm_gpu
*gpu
, int idx
) {}
58 static int enable_pwrrail(struct msm_gpu
*gpu
)
60 struct drm_device
*dev
= gpu
->dev
;
64 ret
= regulator_enable(gpu
->gpu_reg
);
66 dev_err(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
72 ret
= regulator_enable(gpu
->gpu_cx
);
74 dev_err(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
82 static int disable_pwrrail(struct msm_gpu
*gpu
)
85 regulator_disable(gpu
->gpu_cx
);
87 regulator_disable(gpu
->gpu_reg
);
91 static int enable_clk(struct msm_gpu
*gpu
)
93 struct clk
*rate_clk
= NULL
;
96 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
97 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--) {
98 if (gpu
->grp_clks
[i
]) {
99 clk_prepare(gpu
->grp_clks
[i
]);
100 rate_clk
= gpu
->grp_clks
[i
];
104 if (rate_clk
&& gpu
->fast_rate
)
105 clk_set_rate(rate_clk
, gpu
->fast_rate
);
107 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--)
108 if (gpu
->grp_clks
[i
])
109 clk_enable(gpu
->grp_clks
[i
]);
114 static int disable_clk(struct msm_gpu
*gpu
)
116 struct clk
*rate_clk
= NULL
;
119 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
120 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--) {
121 if (gpu
->grp_clks
[i
]) {
122 clk_disable(gpu
->grp_clks
[i
]);
123 rate_clk
= gpu
->grp_clks
[i
];
127 if (rate_clk
&& gpu
->slow_rate
)
128 clk_set_rate(rate_clk
, gpu
->slow_rate
);
130 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--)
131 if (gpu
->grp_clks
[i
])
132 clk_unprepare(gpu
->grp_clks
[i
]);
137 static int enable_axi(struct msm_gpu
*gpu
)
140 clk_prepare_enable(gpu
->ebi1_clk
);
142 bs_set(gpu
, gpu
->bus_freq
);
146 static int disable_axi(struct msm_gpu
*gpu
)
149 clk_disable_unprepare(gpu
->ebi1_clk
);
155 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
157 struct drm_device
*dev
= gpu
->dev
;
160 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
162 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
164 if (gpu
->active_cnt
++ > 0)
167 if (WARN_ON(gpu
->active_cnt
<= 0))
170 ret
= enable_pwrrail(gpu
);
174 ret
= enable_clk(gpu
);
178 ret
= enable_axi(gpu
);
185 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
187 struct drm_device
*dev
= gpu
->dev
;
190 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
192 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
194 if (--gpu
->active_cnt
> 0)
197 if (WARN_ON(gpu
->active_cnt
< 0))
200 ret
= disable_axi(gpu
);
204 ret
= disable_clk(gpu
);
208 ret
= disable_pwrrail(gpu
);
216 * Inactivity detection (for suspend):
219 static void inactive_worker(struct work_struct
*work
)
221 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, inactive_work
);
222 struct drm_device
*dev
= gpu
->dev
;
227 DBG("%s: inactive!\n", gpu
->name
);
228 mutex_lock(&dev
->struct_mutex
);
229 if (!(msm_gpu_active(gpu
) || gpu
->inactive
)) {
232 gpu
->inactive
= true;
234 mutex_unlock(&dev
->struct_mutex
);
237 static void inactive_handler(unsigned long data
)
239 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
240 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
242 queue_work(priv
->wq
, &gpu
->inactive_work
);
245 /* cancel inactive timer and make sure we are awake: */
246 static void inactive_cancel(struct msm_gpu
*gpu
)
248 DBG("%s", gpu
->name
);
249 del_timer(&gpu
->inactive_timer
);
253 gpu
->inactive
= false;
257 static void inactive_start(struct msm_gpu
*gpu
)
259 DBG("%s", gpu
->name
);
260 mod_timer(&gpu
->inactive_timer
,
261 round_jiffies_up(jiffies
+ DRM_MSM_INACTIVE_JIFFIES
));
265 * Hangcheck detection for locked gpu:
268 static void retire_submits(struct msm_gpu
*gpu
, uint32_t fence
);
270 static void recover_worker(struct work_struct
*work
)
272 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
273 struct drm_device
*dev
= gpu
->dev
;
275 dev_err(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
277 mutex_lock(&dev
->struct_mutex
);
278 if (msm_gpu_active(gpu
)) {
279 struct msm_gem_submit
*submit
;
280 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
282 /* retire completed submits, plus the one that hung: */
283 retire_submits(gpu
, fence
+ 1);
285 inactive_cancel(gpu
);
286 gpu
->funcs
->recover(gpu
);
288 /* replay the remaining submits after the one that hung: */
289 list_for_each_entry(submit
, &gpu
->submit_list
, node
) {
290 gpu
->funcs
->submit(gpu
, submit
, NULL
);
293 mutex_unlock(&dev
->struct_mutex
);
298 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
300 DBG("%s", gpu
->name
);
301 mod_timer(&gpu
->hangcheck_timer
,
302 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
305 static void hangcheck_handler(unsigned long data
)
307 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
308 struct drm_device
*dev
= gpu
->dev
;
309 struct msm_drm_private
*priv
= dev
->dev_private
;
310 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
312 if (fence
!= gpu
->hangcheck_fence
) {
313 /* some progress has been made.. ya! */
314 gpu
->hangcheck_fence
= fence
;
315 } else if (fence
< gpu
->submitted_fence
) {
316 /* no progress and not done.. hung! */
317 gpu
->hangcheck_fence
= fence
;
318 dev_err(dev
->dev
, "%s: hangcheck detected gpu lockup!\n",
320 dev_err(dev
->dev
, "%s: completed fence: %u\n",
322 dev_err(dev
->dev
, "%s: submitted fence: %u\n",
323 gpu
->name
, gpu
->submitted_fence
);
324 queue_work(priv
->wq
, &gpu
->recover_work
);
327 /* if still more pending work, reset the hangcheck timer: */
328 if (gpu
->submitted_fence
> gpu
->hangcheck_fence
)
329 hangcheck_timer_reset(gpu
);
331 /* workaround for missing irq: */
332 queue_work(priv
->wq
, &gpu
->retire_work
);
336 * Performance Counters:
339 /* called under perf_lock */
340 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
342 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
343 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
345 /* read current values: */
346 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
347 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
350 for (i
= 0; i
< n
; i
++)
351 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
353 /* save current values: */
354 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
355 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
360 static void update_sw_cntrs(struct msm_gpu
*gpu
)
366 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
367 if (!gpu
->perfcntr_active
)
371 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
373 gpu
->totaltime
+= elapsed
;
374 if (gpu
->last_sample
.active
)
375 gpu
->activetime
+= elapsed
;
377 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
378 gpu
->last_sample
.time
= time
;
381 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
384 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
388 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
389 /* we could dynamically enable/disable perfcntr registers too.. */
390 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
391 gpu
->last_sample
.time
= ktime_get();
392 gpu
->activetime
= gpu
->totaltime
= 0;
393 gpu
->perfcntr_active
= true;
394 update_hw_cntrs(gpu
, 0, NULL
);
395 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
398 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
400 gpu
->perfcntr_active
= false;
403 /* returns -errno or # of cntrs sampled */
404 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
405 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
410 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
412 if (!gpu
->perfcntr_active
) {
417 *activetime
= gpu
->activetime
;
418 *totaltime
= gpu
->totaltime
;
420 gpu
->activetime
= gpu
->totaltime
= 0;
422 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
425 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
431 * Cmdstream submission/retirement:
434 static void retire_submits(struct msm_gpu
*gpu
, uint32_t fence
)
436 struct drm_device
*dev
= gpu
->dev
;
438 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
440 while (!list_empty(&gpu
->submit_list
)) {
441 struct msm_gem_submit
*submit
;
443 submit
= list_first_entry(&gpu
->submit_list
,
444 struct msm_gem_submit
, node
);
446 if (submit
->fence
<= fence
) {
447 list_del(&submit
->node
);
455 static void retire_worker(struct work_struct
*work
)
457 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
458 struct drm_device
*dev
= gpu
->dev
;
459 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
461 msm_update_fence(gpu
->dev
, fence
);
463 mutex_lock(&dev
->struct_mutex
);
465 retire_submits(gpu
, fence
);
467 while (!list_empty(&gpu
->active_list
)) {
468 struct msm_gem_object
*obj
;
470 obj
= list_first_entry(&gpu
->active_list
,
471 struct msm_gem_object
, mm_list
);
473 if ((obj
->read_fence
<= fence
) &&
474 (obj
->write_fence
<= fence
)) {
475 /* move to inactive: */
476 msm_gem_move_to_inactive(&obj
->base
);
477 msm_gem_put_iova(&obj
->base
, gpu
->id
);
478 drm_gem_object_unreference(&obj
->base
);
484 mutex_unlock(&dev
->struct_mutex
);
486 if (!msm_gpu_active(gpu
))
490 /* call from irq handler to schedule work to retire bo's */
491 void msm_gpu_retire(struct msm_gpu
*gpu
)
493 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
494 queue_work(priv
->wq
, &gpu
->retire_work
);
495 update_sw_cntrs(gpu
);
498 /* add bo's to gpu's ring, and kick gpu: */
499 int msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
500 struct msm_file_private
*ctx
)
502 struct drm_device
*dev
= gpu
->dev
;
503 struct msm_drm_private
*priv
= dev
->dev_private
;
506 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
508 submit
->fence
= ++priv
->next_fence
;
510 gpu
->submitted_fence
= submit
->fence
;
512 inactive_cancel(gpu
);
514 list_add_tail(&submit
->node
, &gpu
->submit_list
);
516 msm_rd_dump_submit(submit
);
518 gpu
->submitted_fence
= submit
->fence
;
520 update_sw_cntrs(gpu
);
522 for (i
= 0; i
< submit
->nr_bos
; i
++) {
523 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
525 /* can't happen yet.. but when we add 2d support we'll have
526 * to deal w/ cross-ring synchronization:
528 WARN_ON(is_active(msm_obj
) && (msm_obj
->gpu
!= gpu
));
530 if (!is_active(msm_obj
)) {
533 /* ring takes a reference to the bo and iova: */
534 drm_gem_object_reference(&msm_obj
->base
);
535 msm_gem_get_iova_locked(&msm_obj
->base
,
536 submit
->gpu
->id
, &iova
);
539 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
540 msm_gem_move_to_active(&msm_obj
->base
, gpu
, false, submit
->fence
);
542 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
543 msm_gem_move_to_active(&msm_obj
->base
, gpu
, true, submit
->fence
);
546 ret
= gpu
->funcs
->submit(gpu
, submit
, ctx
);
549 hangcheck_timer_reset(gpu
);
558 static irqreturn_t
irq_handler(int irq
, void *data
)
560 struct msm_gpu
*gpu
= data
;
561 return gpu
->funcs
->irq(gpu
);
564 static const char *clk_names
[] = {
565 "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk",
569 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
570 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
571 const char *name
, const char *ioname
, const char *irqname
, int ringsz
)
573 struct iommu_domain
*iommu
;
576 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
577 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
582 gpu
->inactive
= true;
584 INIT_LIST_HEAD(&gpu
->active_list
);
585 INIT_WORK(&gpu
->retire_work
, retire_worker
);
586 INIT_WORK(&gpu
->inactive_work
, inactive_worker
);
587 INIT_WORK(&gpu
->recover_work
, recover_worker
);
589 INIT_LIST_HEAD(&gpu
->submit_list
);
591 setup_timer(&gpu
->inactive_timer
, inactive_handler
,
593 setup_timer(&gpu
->hangcheck_timer
, hangcheck_handler
,
596 spin_lock_init(&gpu
->perf_lock
);
598 BUG_ON(ARRAY_SIZE(clk_names
) != ARRAY_SIZE(gpu
->grp_clks
));
601 gpu
->mmio
= msm_ioremap(pdev
, ioname
, name
);
602 if (IS_ERR(gpu
->mmio
)) {
603 ret
= PTR_ERR(gpu
->mmio
);
608 gpu
->irq
= platform_get_irq_byname(pdev
, irqname
);
611 dev_err(drm
->dev
, "failed to get irq: %d\n", ret
);
615 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
616 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
618 dev_err(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
622 /* Acquire clocks: */
623 for (i
= 0; i
< ARRAY_SIZE(clk_names
); i
++) {
624 gpu
->grp_clks
[i
] = devm_clk_get(&pdev
->dev
, clk_names
[i
]);
625 DBG("grp_clks[%s]: %p", clk_names
[i
], gpu
->grp_clks
[i
]);
626 if (IS_ERR(gpu
->grp_clks
[i
]))
627 gpu
->grp_clks
[i
] = NULL
;
630 gpu
->ebi1_clk
= devm_clk_get(&pdev
->dev
, "bus_clk");
631 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
632 if (IS_ERR(gpu
->ebi1_clk
))
633 gpu
->ebi1_clk
= NULL
;
635 /* Acquire regulators: */
636 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
637 DBG("gpu_reg: %p", gpu
->gpu_reg
);
638 if (IS_ERR(gpu
->gpu_reg
))
641 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
642 DBG("gpu_cx: %p", gpu
->gpu_cx
);
643 if (IS_ERR(gpu
->gpu_cx
))
646 /* Setup IOMMU.. eventually we will (I think) do this once per context
647 * and have separate page tables per context. For now, to keep things
648 * simple and to get something working, just use a single address space:
650 iommu
= iommu_domain_alloc(&platform_bus_type
);
652 dev_info(drm
->dev
, "%s: using IOMMU\n", name
);
653 gpu
->mmu
= msm_iommu_new(&pdev
->dev
, iommu
);
654 if (IS_ERR(gpu
->mmu
)) {
655 ret
= PTR_ERR(gpu
->mmu
);
656 dev_err(drm
->dev
, "failed to init iommu: %d\n", ret
);
658 iommu_domain_free(iommu
);
663 dev_info(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
665 gpu
->id
= msm_register_mmu(drm
, gpu
->mmu
);
668 /* Create ringbuffer: */
669 mutex_lock(&drm
->struct_mutex
);
670 gpu
->rb
= msm_ringbuffer_new(gpu
, ringsz
);
671 mutex_unlock(&drm
->struct_mutex
);
672 if (IS_ERR(gpu
->rb
)) {
673 ret
= PTR_ERR(gpu
->rb
);
675 dev_err(drm
->dev
, "could not create ringbuffer: %d\n", ret
);
687 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
689 DBG("%s", gpu
->name
);
691 WARN_ON(!list_empty(&gpu
->active_list
));
697 msm_gem_put_iova(gpu
->rb
->bo
, gpu
->id
);
698 msm_ringbuffer_destroy(gpu
->rb
);
702 gpu
->mmu
->funcs
->destroy(gpu
->mmu
);