2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
27 #ifdef CONFIG_MSM_BUS_SCALING
28 #include <mach/board.h>
29 static void bs_init(struct msm_gpu
*gpu
)
31 if (gpu
->bus_scale_table
) {
32 gpu
->bsc
= msm_bus_scale_register_client(gpu
->bus_scale_table
);
33 DBG("bus scale client: %08x", gpu
->bsc
);
37 static void bs_fini(struct msm_gpu
*gpu
)
40 msm_bus_scale_unregister_client(gpu
->bsc
);
45 static void bs_set(struct msm_gpu
*gpu
, int idx
)
48 DBG("set bus scaling: %d", idx
);
49 msm_bus_scale_client_update_request(gpu
->bsc
, idx
);
53 static void bs_init(struct msm_gpu
*gpu
) {}
54 static void bs_fini(struct msm_gpu
*gpu
) {}
55 static void bs_set(struct msm_gpu
*gpu
, int idx
) {}
58 static int enable_pwrrail(struct msm_gpu
*gpu
)
60 struct drm_device
*dev
= gpu
->dev
;
64 ret
= regulator_enable(gpu
->gpu_reg
);
66 dev_err(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
72 ret
= regulator_enable(gpu
->gpu_cx
);
74 dev_err(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
82 static int disable_pwrrail(struct msm_gpu
*gpu
)
85 regulator_disable(gpu
->gpu_cx
);
87 regulator_disable(gpu
->gpu_reg
);
91 static int enable_clk(struct msm_gpu
*gpu
)
93 struct clk
*rate_clk
= NULL
;
96 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
97 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--) {
98 if (gpu
->grp_clks
[i
]) {
99 clk_prepare(gpu
->grp_clks
[i
]);
100 rate_clk
= gpu
->grp_clks
[i
];
104 if (rate_clk
&& gpu
->fast_rate
)
105 clk_set_rate(rate_clk
, gpu
->fast_rate
);
107 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--)
108 if (gpu
->grp_clks
[i
])
109 clk_enable(gpu
->grp_clks
[i
]);
114 static int disable_clk(struct msm_gpu
*gpu
)
116 struct clk
*rate_clk
= NULL
;
119 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
120 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--) {
121 if (gpu
->grp_clks
[i
]) {
122 clk_disable(gpu
->grp_clks
[i
]);
123 rate_clk
= gpu
->grp_clks
[i
];
127 if (rate_clk
&& gpu
->slow_rate
)
128 clk_set_rate(rate_clk
, gpu
->slow_rate
);
130 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
> 0; i
--)
131 if (gpu
->grp_clks
[i
])
132 clk_unprepare(gpu
->grp_clks
[i
]);
137 static int enable_axi(struct msm_gpu
*gpu
)
140 clk_prepare_enable(gpu
->ebi1_clk
);
142 bs_set(gpu
, gpu
->bus_freq
);
146 static int disable_axi(struct msm_gpu
*gpu
)
149 clk_disable_unprepare(gpu
->ebi1_clk
);
155 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
157 struct drm_device
*dev
= gpu
->dev
;
160 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
162 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
164 if (gpu
->active_cnt
++ > 0)
167 if (WARN_ON(gpu
->active_cnt
<= 0))
170 ret
= enable_pwrrail(gpu
);
174 ret
= enable_clk(gpu
);
178 ret
= enable_axi(gpu
);
185 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
187 struct drm_device
*dev
= gpu
->dev
;
190 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
192 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
194 if (--gpu
->active_cnt
> 0)
197 if (WARN_ON(gpu
->active_cnt
< 0))
200 ret
= disable_axi(gpu
);
204 ret
= disable_clk(gpu
);
208 ret
= disable_pwrrail(gpu
);
216 * Inactivity detection (for suspend):
219 static void inactive_worker(struct work_struct
*work
)
221 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, inactive_work
);
222 struct drm_device
*dev
= gpu
->dev
;
227 DBG("%s: inactive!\n", gpu
->name
);
228 mutex_lock(&dev
->struct_mutex
);
229 if (!(msm_gpu_active(gpu
) || gpu
->inactive
)) {
232 gpu
->inactive
= true;
234 mutex_unlock(&dev
->struct_mutex
);
237 static void inactive_handler(unsigned long data
)
239 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
240 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
242 queue_work(priv
->wq
, &gpu
->inactive_work
);
245 /* cancel inactive timer and make sure we are awake: */
246 static void inactive_cancel(struct msm_gpu
*gpu
)
248 DBG("%s", gpu
->name
);
249 del_timer(&gpu
->inactive_timer
);
253 gpu
->inactive
= false;
257 static void inactive_start(struct msm_gpu
*gpu
)
259 DBG("%s", gpu
->name
);
260 mod_timer(&gpu
->inactive_timer
,
261 round_jiffies_up(jiffies
+ DRM_MSM_INACTIVE_JIFFIES
));
265 * Hangcheck detection for locked gpu:
268 static void recover_worker(struct work_struct
*work
)
270 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
271 struct drm_device
*dev
= gpu
->dev
;
273 dev_err(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
275 mutex_lock(&dev
->struct_mutex
);
276 if (msm_gpu_active(gpu
)) {
277 inactive_cancel(gpu
);
278 gpu
->funcs
->recover(gpu
);
280 mutex_unlock(&dev
->struct_mutex
);
285 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
287 DBG("%s", gpu
->name
);
288 mod_timer(&gpu
->hangcheck_timer
,
289 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
292 static void hangcheck_handler(unsigned long data
)
294 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
295 struct drm_device
*dev
= gpu
->dev
;
296 struct msm_drm_private
*priv
= dev
->dev_private
;
297 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
299 if (fence
!= gpu
->hangcheck_fence
) {
300 /* some progress has been made.. ya! */
301 gpu
->hangcheck_fence
= fence
;
302 } else if (fence
< gpu
->submitted_fence
) {
303 /* no progress and not done.. hung! */
304 gpu
->hangcheck_fence
= fence
;
305 dev_err(dev
->dev
, "%s: hangcheck detected gpu lockup!\n",
307 dev_err(dev
->dev
, "%s: completed fence: %u\n",
309 dev_err(dev
->dev
, "%s: submitted fence: %u\n",
310 gpu
->name
, gpu
->submitted_fence
);
311 queue_work(priv
->wq
, &gpu
->recover_work
);
314 /* if still more pending work, reset the hangcheck timer: */
315 if (gpu
->submitted_fence
> gpu
->hangcheck_fence
)
316 hangcheck_timer_reset(gpu
);
318 /* workaround for missing irq: */
319 queue_work(priv
->wq
, &gpu
->retire_work
);
323 * Performance Counters:
326 /* called under perf_lock */
327 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
329 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
330 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
332 /* read current values: */
333 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
334 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
337 for (i
= 0; i
< n
; i
++)
338 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
340 /* save current values: */
341 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
342 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
347 static void update_sw_cntrs(struct msm_gpu
*gpu
)
353 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
354 if (!gpu
->perfcntr_active
)
358 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
360 gpu
->totaltime
+= elapsed
;
361 if (gpu
->last_sample
.active
)
362 gpu
->activetime
+= elapsed
;
364 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
365 gpu
->last_sample
.time
= time
;
368 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
371 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
375 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
376 /* we could dynamically enable/disable perfcntr registers too.. */
377 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
378 gpu
->last_sample
.time
= ktime_get();
379 gpu
->activetime
= gpu
->totaltime
= 0;
380 gpu
->perfcntr_active
= true;
381 update_hw_cntrs(gpu
, 0, NULL
);
382 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
385 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
387 gpu
->perfcntr_active
= false;
390 /* returns -errno or # of cntrs sampled */
391 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
392 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
397 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
399 if (!gpu
->perfcntr_active
) {
404 *activetime
= gpu
->activetime
;
405 *totaltime
= gpu
->totaltime
;
407 gpu
->activetime
= gpu
->totaltime
= 0;
409 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
412 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
418 * Cmdstream submission/retirement:
421 static void retire_worker(struct work_struct
*work
)
423 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
424 struct drm_device
*dev
= gpu
->dev
;
425 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
427 msm_update_fence(gpu
->dev
, fence
);
429 mutex_lock(&dev
->struct_mutex
);
431 while (!list_empty(&gpu
->active_list
)) {
432 struct msm_gem_object
*obj
;
434 obj
= list_first_entry(&gpu
->active_list
,
435 struct msm_gem_object
, mm_list
);
437 if ((obj
->read_fence
<= fence
) &&
438 (obj
->write_fence
<= fence
)) {
439 /* move to inactive: */
440 msm_gem_move_to_inactive(&obj
->base
);
441 msm_gem_put_iova(&obj
->base
, gpu
->id
);
442 drm_gem_object_unreference(&obj
->base
);
448 mutex_unlock(&dev
->struct_mutex
);
450 if (!msm_gpu_active(gpu
))
454 /* call from irq handler to schedule work to retire bo's */
455 void msm_gpu_retire(struct msm_gpu
*gpu
)
457 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
458 queue_work(priv
->wq
, &gpu
->retire_work
);
459 update_sw_cntrs(gpu
);
462 /* add bo's to gpu's ring, and kick gpu: */
463 int msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
464 struct msm_file_private
*ctx
)
466 struct drm_device
*dev
= gpu
->dev
;
467 struct msm_drm_private
*priv
= dev
->dev_private
;
470 submit
->fence
= ++priv
->next_fence
;
472 gpu
->submitted_fence
= submit
->fence
;
474 inactive_cancel(gpu
);
476 msm_rd_dump_submit(submit
);
478 gpu
->submitted_fence
= submit
->fence
;
480 update_sw_cntrs(gpu
);
482 ret
= gpu
->funcs
->submit(gpu
, submit
, ctx
);
485 for (i
= 0; i
< submit
->nr_bos
; i
++) {
486 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
488 /* can't happen yet.. but when we add 2d support we'll have
489 * to deal w/ cross-ring synchronization:
491 WARN_ON(is_active(msm_obj
) && (msm_obj
->gpu
!= gpu
));
493 if (!is_active(msm_obj
)) {
496 /* ring takes a reference to the bo and iova: */
497 drm_gem_object_reference(&msm_obj
->base
);
498 msm_gem_get_iova_locked(&msm_obj
->base
,
499 submit
->gpu
->id
, &iova
);
502 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
503 msm_gem_move_to_active(&msm_obj
->base
, gpu
, false, submit
->fence
);
505 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
506 msm_gem_move_to_active(&msm_obj
->base
, gpu
, true, submit
->fence
);
508 hangcheck_timer_reset(gpu
);
517 static irqreturn_t
irq_handler(int irq
, void *data
)
519 struct msm_gpu
*gpu
= data
;
520 return gpu
->funcs
->irq(gpu
);
523 static const char *clk_names
[] = {
524 "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk",
527 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
528 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
529 const char *name
, const char *ioname
, const char *irqname
, int ringsz
)
531 struct iommu_domain
*iommu
;
534 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
535 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
540 gpu
->inactive
= true;
542 INIT_LIST_HEAD(&gpu
->active_list
);
543 INIT_WORK(&gpu
->retire_work
, retire_worker
);
544 INIT_WORK(&gpu
->inactive_work
, inactive_worker
);
545 INIT_WORK(&gpu
->recover_work
, recover_worker
);
547 setup_timer(&gpu
->inactive_timer
, inactive_handler
,
549 setup_timer(&gpu
->hangcheck_timer
, hangcheck_handler
,
552 spin_lock_init(&gpu
->perf_lock
);
554 BUG_ON(ARRAY_SIZE(clk_names
) != ARRAY_SIZE(gpu
->grp_clks
));
557 gpu
->mmio
= msm_ioremap(pdev
, ioname
, name
);
558 if (IS_ERR(gpu
->mmio
)) {
559 ret
= PTR_ERR(gpu
->mmio
);
564 gpu
->irq
= platform_get_irq_byname(pdev
, irqname
);
567 dev_err(drm
->dev
, "failed to get irq: %d\n", ret
);
571 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
572 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
574 dev_err(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
578 /* Acquire clocks: */
579 for (i
= 0; i
< ARRAY_SIZE(clk_names
); i
++) {
580 gpu
->grp_clks
[i
] = devm_clk_get(&pdev
->dev
, clk_names
[i
]);
581 DBG("grp_clks[%s]: %p", clk_names
[i
], gpu
->grp_clks
[i
]);
582 if (IS_ERR(gpu
->grp_clks
[i
]))
583 gpu
->grp_clks
[i
] = NULL
;
586 gpu
->ebi1_clk
= devm_clk_get(&pdev
->dev
, "bus_clk");
587 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
588 if (IS_ERR(gpu
->ebi1_clk
))
589 gpu
->ebi1_clk
= NULL
;
591 /* Acquire regulators: */
592 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
593 DBG("gpu_reg: %p", gpu
->gpu_reg
);
594 if (IS_ERR(gpu
->gpu_reg
))
597 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
598 DBG("gpu_cx: %p", gpu
->gpu_cx
);
599 if (IS_ERR(gpu
->gpu_cx
))
602 /* Setup IOMMU.. eventually we will (I think) do this once per context
603 * and have separate page tables per context. For now, to keep things
604 * simple and to get something working, just use a single address space:
606 iommu
= iommu_domain_alloc(&platform_bus_type
);
608 dev_info(drm
->dev
, "%s: using IOMMU\n", name
);
609 gpu
->mmu
= msm_iommu_new(drm
, iommu
);
611 dev_info(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
613 gpu
->id
= msm_register_mmu(drm
, gpu
->mmu
);
615 /* Create ringbuffer: */
616 gpu
->rb
= msm_ringbuffer_new(gpu
, ringsz
);
617 if (IS_ERR(gpu
->rb
)) {
618 ret
= PTR_ERR(gpu
->rb
);
620 dev_err(drm
->dev
, "could not create ringbuffer: %d\n", ret
);
624 ret
= msm_gem_get_iova_locked(gpu
->rb
->bo
, gpu
->id
, &gpu
->rb_iova
);
627 dev_err(drm
->dev
, "could not map ringbuffer: %d\n", ret
);
639 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
641 DBG("%s", gpu
->name
);
643 WARN_ON(!list_empty(&gpu
->active_list
));
649 msm_gem_put_iova(gpu
->rb
->bo
, gpu
->id
);
650 msm_ringbuffer_destroy(gpu
->rb
);
654 gpu
->mmu
->funcs
->destroy(gpu
->mmu
);