2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "msm_fence.h"
28 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
29 #include <mach/board.h>
30 static void bs_init(struct msm_gpu
*gpu
)
32 if (gpu
->bus_scale_table
) {
33 gpu
->bsc
= msm_bus_scale_register_client(gpu
->bus_scale_table
);
34 DBG("bus scale client: %08x", gpu
->bsc
);
38 static void bs_fini(struct msm_gpu
*gpu
)
41 msm_bus_scale_unregister_client(gpu
->bsc
);
46 static void bs_set(struct msm_gpu
*gpu
, int idx
)
49 DBG("set bus scaling: %d", idx
);
50 msm_bus_scale_client_update_request(gpu
->bsc
, idx
);
54 static void bs_init(struct msm_gpu
*gpu
) {}
55 static void bs_fini(struct msm_gpu
*gpu
) {}
56 static void bs_set(struct msm_gpu
*gpu
, int idx
) {}
59 static int enable_pwrrail(struct msm_gpu
*gpu
)
61 struct drm_device
*dev
= gpu
->dev
;
65 ret
= regulator_enable(gpu
->gpu_reg
);
67 dev_err(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
73 ret
= regulator_enable(gpu
->gpu_cx
);
75 dev_err(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
83 static int disable_pwrrail(struct msm_gpu
*gpu
)
86 regulator_disable(gpu
->gpu_cx
);
88 regulator_disable(gpu
->gpu_reg
);
92 static int enable_clk(struct msm_gpu
*gpu
)
96 if (gpu
->grp_clks
[0] && gpu
->fast_rate
)
97 clk_set_rate(gpu
->grp_clks
[0], gpu
->fast_rate
);
99 /* Set the RBBM timer rate to 19.2Mhz */
100 if (gpu
->grp_clks
[2])
101 clk_set_rate(gpu
->grp_clks
[2], 19200000);
103 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
>= 0; i
--)
104 if (gpu
->grp_clks
[i
])
105 clk_prepare(gpu
->grp_clks
[i
]);
107 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
>= 0; i
--)
108 if (gpu
->grp_clks
[i
])
109 clk_enable(gpu
->grp_clks
[i
]);
114 static int disable_clk(struct msm_gpu
*gpu
)
118 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
>= 0; i
--)
119 if (gpu
->grp_clks
[i
])
120 clk_disable(gpu
->grp_clks
[i
]);
122 for (i
= ARRAY_SIZE(gpu
->grp_clks
) - 1; i
>= 0; i
--)
123 if (gpu
->grp_clks
[i
])
124 clk_unprepare(gpu
->grp_clks
[i
]);
126 if (gpu
->grp_clks
[0] && gpu
->slow_rate
)
127 clk_set_rate(gpu
->grp_clks
[0], gpu
->slow_rate
);
129 if (gpu
->grp_clks
[2])
130 clk_set_rate(gpu
->grp_clks
[2], 0);
135 static int enable_axi(struct msm_gpu
*gpu
)
138 clk_prepare_enable(gpu
->ebi1_clk
);
140 bs_set(gpu
, gpu
->bus_freq
);
144 static int disable_axi(struct msm_gpu
*gpu
)
147 clk_disable_unprepare(gpu
->ebi1_clk
);
153 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
155 struct drm_device
*dev
= gpu
->dev
;
158 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
160 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
162 if (gpu
->active_cnt
++ > 0)
165 if (WARN_ON(gpu
->active_cnt
<= 0))
168 ret
= enable_pwrrail(gpu
);
172 ret
= enable_clk(gpu
);
176 ret
= enable_axi(gpu
);
183 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
185 struct drm_device
*dev
= gpu
->dev
;
188 DBG("%s: active_cnt=%d", gpu
->name
, gpu
->active_cnt
);
190 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
192 if (--gpu
->active_cnt
> 0)
195 if (WARN_ON(gpu
->active_cnt
< 0))
198 ret
= disable_axi(gpu
);
202 ret
= disable_clk(gpu
);
206 ret
= disable_pwrrail(gpu
);
214 * Inactivity detection (for suspend):
217 static void inactive_worker(struct work_struct
*work
)
219 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, inactive_work
);
220 struct drm_device
*dev
= gpu
->dev
;
225 DBG("%s: inactive!\n", gpu
->name
);
226 mutex_lock(&dev
->struct_mutex
);
227 if (!(msm_gpu_active(gpu
) || gpu
->inactive
)) {
230 gpu
->inactive
= true;
232 mutex_unlock(&dev
->struct_mutex
);
235 static void inactive_handler(unsigned long data
)
237 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
238 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
240 queue_work(priv
->wq
, &gpu
->inactive_work
);
243 /* cancel inactive timer and make sure we are awake: */
244 static void inactive_cancel(struct msm_gpu
*gpu
)
246 DBG("%s", gpu
->name
);
247 del_timer(&gpu
->inactive_timer
);
251 gpu
->inactive
= false;
255 static void inactive_start(struct msm_gpu
*gpu
)
257 DBG("%s", gpu
->name
);
258 mod_timer(&gpu
->inactive_timer
,
259 round_jiffies_up(jiffies
+ DRM_MSM_INACTIVE_JIFFIES
));
263 * Hangcheck detection for locked gpu:
266 static void retire_submits(struct msm_gpu
*gpu
);
268 static void recover_worker(struct work_struct
*work
)
270 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
271 struct drm_device
*dev
= gpu
->dev
;
272 struct msm_gem_submit
*submit
;
273 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
275 msm_update_fence(gpu
->fctx
, fence
+ 1);
277 mutex_lock(&dev
->struct_mutex
);
279 dev_err(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
280 list_for_each_entry(submit
, &gpu
->submit_list
, node
) {
281 if (submit
->fence
->seqno
== (fence
+ 1)) {
282 struct task_struct
*task
;
285 task
= pid_task(submit
->pid
, PIDTYPE_PID
);
287 dev_err(dev
->dev
, "%s: offending task: %s\n",
288 gpu
->name
, task
->comm
);
295 if (msm_gpu_active(gpu
)) {
296 /* retire completed submits, plus the one that hung: */
299 inactive_cancel(gpu
);
300 gpu
->funcs
->recover(gpu
);
302 /* replay the remaining submits after the one that hung: */
303 list_for_each_entry(submit
, &gpu
->submit_list
, node
) {
304 gpu
->funcs
->submit(gpu
, submit
, NULL
);
308 mutex_unlock(&dev
->struct_mutex
);
313 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
315 DBG("%s", gpu
->name
);
316 mod_timer(&gpu
->hangcheck_timer
,
317 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
320 static void hangcheck_handler(unsigned long data
)
322 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
323 struct drm_device
*dev
= gpu
->dev
;
324 struct msm_drm_private
*priv
= dev
->dev_private
;
325 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
327 if (fence
!= gpu
->hangcheck_fence
) {
328 /* some progress has been made.. ya! */
329 gpu
->hangcheck_fence
= fence
;
330 } else if (fence
< gpu
->fctx
->last_fence
) {
331 /* no progress and not done.. hung! */
332 gpu
->hangcheck_fence
= fence
;
333 dev_err(dev
->dev
, "%s: hangcheck detected gpu lockup!\n",
335 dev_err(dev
->dev
, "%s: completed fence: %u\n",
337 dev_err(dev
->dev
, "%s: submitted fence: %u\n",
338 gpu
->name
, gpu
->fctx
->last_fence
);
339 queue_work(priv
->wq
, &gpu
->recover_work
);
342 /* if still more pending work, reset the hangcheck timer: */
343 if (gpu
->fctx
->last_fence
> gpu
->hangcheck_fence
)
344 hangcheck_timer_reset(gpu
);
346 /* workaround for missing irq: */
347 queue_work(priv
->wq
, &gpu
->retire_work
);
351 * Performance Counters:
354 /* called under perf_lock */
355 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
357 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
358 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
360 /* read current values: */
361 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
362 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
365 for (i
= 0; i
< n
; i
++)
366 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
368 /* save current values: */
369 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
370 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
375 static void update_sw_cntrs(struct msm_gpu
*gpu
)
381 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
382 if (!gpu
->perfcntr_active
)
386 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
388 gpu
->totaltime
+= elapsed
;
389 if (gpu
->last_sample
.active
)
390 gpu
->activetime
+= elapsed
;
392 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
393 gpu
->last_sample
.time
= time
;
396 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
399 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
403 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
404 /* we could dynamically enable/disable perfcntr registers too.. */
405 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
406 gpu
->last_sample
.time
= ktime_get();
407 gpu
->activetime
= gpu
->totaltime
= 0;
408 gpu
->perfcntr_active
= true;
409 update_hw_cntrs(gpu
, 0, NULL
);
410 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
413 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
415 gpu
->perfcntr_active
= false;
418 /* returns -errno or # of cntrs sampled */
419 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
420 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
425 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
427 if (!gpu
->perfcntr_active
) {
432 *activetime
= gpu
->activetime
;
433 *totaltime
= gpu
->totaltime
;
435 gpu
->activetime
= gpu
->totaltime
= 0;
437 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
440 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
446 * Cmdstream submission/retirement:
449 static void retire_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
)
453 for (i
= 0; i
< submit
->nr_bos
; i
++) {
454 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
455 /* move to inactive: */
456 msm_gem_move_to_inactive(&msm_obj
->base
);
457 msm_gem_put_iova(&msm_obj
->base
, gpu
->id
);
458 drm_gem_object_unreference(&msm_obj
->base
);
461 msm_gem_submit_free(submit
);
464 static void retire_submits(struct msm_gpu
*gpu
)
466 struct drm_device
*dev
= gpu
->dev
;
468 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
470 while (!list_empty(&gpu
->submit_list
)) {
471 struct msm_gem_submit
*submit
;
473 submit
= list_first_entry(&gpu
->submit_list
,
474 struct msm_gem_submit
, node
);
476 if (dma_fence_is_signaled(submit
->fence
)) {
477 retire_submit(gpu
, submit
);
484 static void retire_worker(struct work_struct
*work
)
486 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
487 struct drm_device
*dev
= gpu
->dev
;
488 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
490 msm_update_fence(gpu
->fctx
, fence
);
492 mutex_lock(&dev
->struct_mutex
);
494 mutex_unlock(&dev
->struct_mutex
);
496 if (!msm_gpu_active(gpu
))
500 /* call from irq handler to schedule work to retire bo's */
501 void msm_gpu_retire(struct msm_gpu
*gpu
)
503 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
504 queue_work(priv
->wq
, &gpu
->retire_work
);
505 update_sw_cntrs(gpu
);
508 /* add bo's to gpu's ring, and kick gpu: */
509 void msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
510 struct msm_file_private
*ctx
)
512 struct drm_device
*dev
= gpu
->dev
;
513 struct msm_drm_private
*priv
= dev
->dev_private
;
516 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
518 inactive_cancel(gpu
);
520 list_add_tail(&submit
->node
, &gpu
->submit_list
);
522 msm_rd_dump_submit(submit
);
524 update_sw_cntrs(gpu
);
526 for (i
= 0; i
< submit
->nr_bos
; i
++) {
527 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
530 /* can't happen yet.. but when we add 2d support we'll have
531 * to deal w/ cross-ring synchronization:
533 WARN_ON(is_active(msm_obj
) && (msm_obj
->gpu
!= gpu
));
535 /* submit takes a reference to the bo and iova until retired: */
536 drm_gem_object_reference(&msm_obj
->base
);
537 msm_gem_get_iova_locked(&msm_obj
->base
,
538 submit
->gpu
->id
, &iova
);
540 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
541 msm_gem_move_to_active(&msm_obj
->base
, gpu
, true, submit
->fence
);
542 else if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
543 msm_gem_move_to_active(&msm_obj
->base
, gpu
, false, submit
->fence
);
546 gpu
->funcs
->submit(gpu
, submit
, ctx
);
549 hangcheck_timer_reset(gpu
);
556 static irqreturn_t
irq_handler(int irq
, void *data
)
558 struct msm_gpu
*gpu
= data
;
559 return gpu
->funcs
->irq(gpu
);
562 static const char *clk_names
[] = {
563 "core_clk", "iface_clk", "rbbmtimer_clk", "mem_clk",
564 "mem_iface_clk", "alt_mem_iface_clk",
567 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
568 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
569 const char *name
, const char *ioname
, const char *irqname
, int ringsz
)
571 struct iommu_domain
*iommu
;
574 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
575 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
580 gpu
->inactive
= true;
581 gpu
->fctx
= msm_fence_context_alloc(drm
, name
);
582 if (IS_ERR(gpu
->fctx
)) {
583 ret
= PTR_ERR(gpu
->fctx
);
588 INIT_LIST_HEAD(&gpu
->active_list
);
589 INIT_WORK(&gpu
->retire_work
, retire_worker
);
590 INIT_WORK(&gpu
->inactive_work
, inactive_worker
);
591 INIT_WORK(&gpu
->recover_work
, recover_worker
);
593 INIT_LIST_HEAD(&gpu
->submit_list
);
595 setup_timer(&gpu
->inactive_timer
, inactive_handler
,
597 setup_timer(&gpu
->hangcheck_timer
, hangcheck_handler
,
600 spin_lock_init(&gpu
->perf_lock
);
602 BUG_ON(ARRAY_SIZE(clk_names
) != ARRAY_SIZE(gpu
->grp_clks
));
605 gpu
->mmio
= msm_ioremap(pdev
, ioname
, name
);
606 if (IS_ERR(gpu
->mmio
)) {
607 ret
= PTR_ERR(gpu
->mmio
);
612 gpu
->irq
= platform_get_irq_byname(pdev
, irqname
);
615 dev_err(drm
->dev
, "failed to get irq: %d\n", ret
);
619 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
620 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
622 dev_err(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
626 /* Acquire clocks: */
627 for (i
= 0; i
< ARRAY_SIZE(clk_names
); i
++) {
628 gpu
->grp_clks
[i
] = devm_clk_get(&pdev
->dev
, clk_names
[i
]);
629 DBG("grp_clks[%s]: %p", clk_names
[i
], gpu
->grp_clks
[i
]);
630 if (IS_ERR(gpu
->grp_clks
[i
]))
631 gpu
->grp_clks
[i
] = NULL
;
634 gpu
->ebi1_clk
= devm_clk_get(&pdev
->dev
, "bus_clk");
635 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
636 if (IS_ERR(gpu
->ebi1_clk
))
637 gpu
->ebi1_clk
= NULL
;
639 /* Acquire regulators: */
640 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
641 DBG("gpu_reg: %p", gpu
->gpu_reg
);
642 if (IS_ERR(gpu
->gpu_reg
))
645 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
646 DBG("gpu_cx: %p", gpu
->gpu_cx
);
647 if (IS_ERR(gpu
->gpu_cx
))
650 /* Setup IOMMU.. eventually we will (I think) do this once per context
651 * and have separate page tables per context. For now, to keep things
652 * simple and to get something working, just use a single address space:
654 iommu
= iommu_domain_alloc(&platform_bus_type
);
656 /* TODO 32b vs 64b address space.. */
657 iommu
->geometry
.aperture_start
= SZ_16M
;
658 iommu
->geometry
.aperture_end
= 0xffffffff;
660 dev_info(drm
->dev
, "%s: using IOMMU\n", name
);
661 gpu
->aspace
= msm_gem_address_space_create(&pdev
->dev
,
663 if (IS_ERR(gpu
->aspace
)) {
664 ret
= PTR_ERR(gpu
->aspace
);
665 dev_err(drm
->dev
, "failed to init iommu: %d\n", ret
);
667 iommu_domain_free(iommu
);
672 dev_info(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
674 gpu
->id
= msm_register_address_space(drm
, gpu
->aspace
);
677 /* Create ringbuffer: */
678 mutex_lock(&drm
->struct_mutex
);
679 gpu
->rb
= msm_ringbuffer_new(gpu
, ringsz
);
680 mutex_unlock(&drm
->struct_mutex
);
681 if (IS_ERR(gpu
->rb
)) {
682 ret
= PTR_ERR(gpu
->rb
);
684 dev_err(drm
->dev
, "could not create ringbuffer: %d\n", ret
);
696 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
698 DBG("%s", gpu
->name
);
700 WARN_ON(!list_empty(&gpu
->active_list
));
706 msm_gem_put_iova(gpu
->rb
->bo
, gpu
->id
);
707 msm_ringbuffer_destroy(gpu
->rb
);
711 msm_gem_address_space_destroy(gpu
->aspace
);
714 msm_fence_context_free(gpu
->fctx
);