2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * You should have received a copy of the GNU General Public License along with
17 * this program. If not, see <http://www.gnu.org/licenses/>.
20 #include <linux/pm_opp.h>
21 #include "adreno_gpu.h"
25 int adreno_get_param(struct msm_gpu
*gpu
, uint32_t param
, uint64_t *value
)
27 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
30 case MSM_PARAM_GPU_ID
:
31 *value
= adreno_gpu
->info
->revn
;
33 case MSM_PARAM_GMEM_SIZE
:
34 *value
= adreno_gpu
->gmem
;
36 case MSM_PARAM_GMEM_BASE
:
39 case MSM_PARAM_CHIP_ID
:
40 *value
= adreno_gpu
->rev
.patchid
|
41 (adreno_gpu
->rev
.minor
<< 8) |
42 (adreno_gpu
->rev
.major
<< 16) |
43 (adreno_gpu
->rev
.core
<< 24);
45 case MSM_PARAM_MAX_FREQ
:
46 *value
= adreno_gpu
->base
.fast_rate
;
48 case MSM_PARAM_TIMESTAMP
:
49 if (adreno_gpu
->funcs
->get_timestamp
) {
52 pm_runtime_get_sync(&gpu
->pdev
->dev
);
53 ret
= adreno_gpu
->funcs
->get_timestamp(gpu
, value
);
54 pm_runtime_put_autosuspend(&gpu
->pdev
->dev
);
59 case MSM_PARAM_NR_RINGS
:
60 *value
= gpu
->nr_rings
;
63 DBG("%s: invalid param: %u", gpu
->name
, param
);
68 const struct firmware
*
69 adreno_request_fw(struct adreno_gpu
*adreno_gpu
, const char *fwname
)
71 struct drm_device
*drm
= adreno_gpu
->base
.dev
;
72 const struct firmware
*fw
= NULL
;
73 char newname
[strlen("qcom/") + strlen(fwname
) + 1];
76 sprintf(newname
, "qcom/%s", fwname
);
79 * Try first to load from qcom/$fwfile using a direct load (to avoid
80 * a potential timeout waiting for usermode helper)
82 if ((adreno_gpu
->fwloc
== FW_LOCATION_UNKNOWN
) ||
83 (adreno_gpu
->fwloc
== FW_LOCATION_NEW
)) {
85 ret
= request_firmware_direct(&fw
, newname
, drm
->dev
);
87 dev_info(drm
->dev
, "loaded %s from new location\n",
89 adreno_gpu
->fwloc
= FW_LOCATION_NEW
;
91 } else if (adreno_gpu
->fwloc
!= FW_LOCATION_UNKNOWN
) {
92 dev_err(drm
->dev
, "failed to load %s: %d\n",
99 * Then try the legacy location without qcom/ prefix
101 if ((adreno_gpu
->fwloc
== FW_LOCATION_UNKNOWN
) ||
102 (adreno_gpu
->fwloc
== FW_LOCATION_LEGACY
)) {
104 ret
= request_firmware_direct(&fw
, fwname
, drm
->dev
);
106 dev_info(drm
->dev
, "loaded %s from legacy location\n",
108 adreno_gpu
->fwloc
= FW_LOCATION_LEGACY
;
110 } else if (adreno_gpu
->fwloc
!= FW_LOCATION_UNKNOWN
) {
111 dev_err(drm
->dev
, "failed to load %s: %d\n",
118 * Finally fall back to request_firmware() for cases where the
119 * usermode helper is needed (I think mainly android)
121 if ((adreno_gpu
->fwloc
== FW_LOCATION_UNKNOWN
) ||
122 (adreno_gpu
->fwloc
== FW_LOCATION_HELPER
)) {
124 ret
= request_firmware(&fw
, newname
, drm
->dev
);
126 dev_info(drm
->dev
, "loaded %s with helper\n",
128 adreno_gpu
->fwloc
= FW_LOCATION_HELPER
;
130 } else if (adreno_gpu
->fwloc
!= FW_LOCATION_UNKNOWN
) {
131 dev_err(drm
->dev
, "failed to load %s: %d\n",
137 dev_err(drm
->dev
, "failed to load %s\n", fwname
);
138 return ERR_PTR(-ENOENT
);
141 static int adreno_load_fw(struct adreno_gpu
*adreno_gpu
)
143 const struct firmware
*fw
;
148 fw
= adreno_request_fw(adreno_gpu
, adreno_gpu
->info
->pm4fw
);
151 adreno_gpu
->pm4
= fw
;
153 fw
= adreno_request_fw(adreno_gpu
, adreno_gpu
->info
->pfpfw
);
155 release_firmware(adreno_gpu
->pm4
);
156 adreno_gpu
->pm4
= NULL
;
159 adreno_gpu
->pfp
= fw
;
164 int adreno_hw_init(struct msm_gpu
*gpu
)
166 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
169 DBG("%s", gpu
->name
);
171 ret
= adreno_load_fw(adreno_gpu
);
175 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
176 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
181 ret
= msm_gem_get_iova(ring
->bo
, gpu
->aspace
, &ring
->iova
);
184 dev_err(gpu
->dev
->dev
,
185 "could not map ringbuffer %d: %d\n", i
, ret
);
189 ring
->cur
= ring
->start
;
190 ring
->next
= ring
->start
;
192 /* reset completed fence seqno: */
193 ring
->memptrs
->fence
= ring
->seqno
;
194 ring
->memptrs
->rptr
= 0;
198 * Setup REG_CP_RB_CNTL. The same value is used across targets (with
199 * the excpetion of A430 that disables the RPTR shadow) - the cacluation
200 * for the ringbuffer size and block size is moved to msm_gpu.h for the
201 * pre-processor to deal with and the A430 variant is ORed in here
203 adreno_gpu_write(adreno_gpu
, REG_ADRENO_CP_RB_CNTL
,
204 MSM_GPU_RB_CNTL_DEFAULT
|
205 (adreno_is_a430(adreno_gpu
) ? AXXX_CP_RB_CNTL_NO_UPDATE
: 0));
207 /* Setup ringbuffer address - use ringbuffer[0] for GPU init */
208 adreno_gpu_write64(adreno_gpu
, REG_ADRENO_CP_RB_BASE
,
209 REG_ADRENO_CP_RB_BASE_HI
, gpu
->rb
[0]->iova
);
211 if (!adreno_is_a430(adreno_gpu
)) {
212 adreno_gpu_write64(adreno_gpu
, REG_ADRENO_CP_RB_RPTR_ADDR
,
213 REG_ADRENO_CP_RB_RPTR_ADDR_HI
,
214 rbmemptr(gpu
->rb
[0], rptr
));
220 /* Use this helper to read rptr, since a430 doesn't update rptr in memory */
221 static uint32_t get_rptr(struct adreno_gpu
*adreno_gpu
,
222 struct msm_ringbuffer
*ring
)
224 if (adreno_is_a430(adreno_gpu
))
225 return ring
->memptrs
->rptr
= adreno_gpu_read(
226 adreno_gpu
, REG_ADRENO_CP_RB_RPTR
);
228 return ring
->memptrs
->rptr
;
231 struct msm_ringbuffer
*adreno_active_ring(struct msm_gpu
*gpu
)
236 void adreno_recover(struct msm_gpu
*gpu
)
238 struct drm_device
*dev
= gpu
->dev
;
241 // XXX pm-runtime?? we *need* the device to be off after this
242 // so maybe continuing to call ->pm_suspend/resume() is better?
244 gpu
->funcs
->pm_suspend(gpu
);
245 gpu
->funcs
->pm_resume(gpu
);
247 ret
= msm_gpu_hw_init(gpu
);
249 dev_err(dev
->dev
, "gpu hw init failed: %d\n", ret
);
254 void adreno_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
255 struct msm_file_private
*ctx
)
257 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
258 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
259 struct msm_ringbuffer
*ring
= submit
->ring
;
262 for (i
= 0; i
< submit
->nr_cmds
; i
++) {
263 switch (submit
->cmd
[i
].type
) {
264 case MSM_SUBMIT_CMD_IB_TARGET_BUF
:
265 /* ignore IB-targets */
267 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF
:
268 /* ignore if there has not been a ctx switch: */
269 if (priv
->lastctx
== ctx
)
271 case MSM_SUBMIT_CMD_BUF
:
272 OUT_PKT3(ring
, adreno_is_a430(adreno_gpu
) ?
273 CP_INDIRECT_BUFFER_PFE
: CP_INDIRECT_BUFFER_PFD
, 2);
274 OUT_RING(ring
, lower_32_bits(submit
->cmd
[i
].iova
));
275 OUT_RING(ring
, submit
->cmd
[i
].size
);
281 OUT_PKT0(ring
, REG_AXXX_CP_SCRATCH_REG2
, 1);
282 OUT_RING(ring
, submit
->seqno
);
284 if (adreno_is_a3xx(adreno_gpu
) || adreno_is_a4xx(adreno_gpu
)) {
285 /* Flush HLSQ lazy updates to make sure there is nothing
286 * pending for indirect loads after the timestamp has
289 OUT_PKT3(ring
, CP_EVENT_WRITE
, 1);
290 OUT_RING(ring
, HLSQ_FLUSH
);
292 OUT_PKT3(ring
, CP_WAIT_FOR_IDLE
, 1);
293 OUT_RING(ring
, 0x00000000);
296 OUT_PKT3(ring
, CP_EVENT_WRITE
, 3);
297 OUT_RING(ring
, CACHE_FLUSH_TS
);
298 OUT_RING(ring
, rbmemptr(ring
, fence
));
299 OUT_RING(ring
, submit
->seqno
);
301 /* we could maybe be clever and only CP_COND_EXEC the interrupt: */
302 OUT_PKT3(ring
, CP_INTERRUPT
, 1);
303 OUT_RING(ring
, 0x80000000);
305 /* Workaround for missing irq issue on 8x16/a306. Unsure if the
306 * root cause is a platform issue or some a306 quirk, but this
307 * keeps things humming along:
309 if (adreno_is_a306(adreno_gpu
)) {
310 OUT_PKT3(ring
, CP_WAIT_FOR_IDLE
, 1);
311 OUT_RING(ring
, 0x00000000);
312 OUT_PKT3(ring
, CP_INTERRUPT
, 1);
313 OUT_RING(ring
, 0x80000000);
317 if (adreno_is_a3xx(adreno_gpu
)) {
318 /* Dummy set-constant to trigger context rollover */
319 OUT_PKT3(ring
, CP_SET_CONSTANT
, 2);
320 OUT_RING(ring
, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG
));
321 OUT_RING(ring
, 0x00000000);
325 gpu
->funcs
->flush(gpu
, ring
);
328 void adreno_flush(struct msm_gpu
*gpu
, struct msm_ringbuffer
*ring
)
330 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
333 /* Copy the shadow to the actual register */
334 ring
->cur
= ring
->next
;
337 * Mask wptr value that we calculate to fit in the HW range. This is
338 * to account for the possibility that the last command fit exactly into
339 * the ringbuffer and rb->next hasn't wrapped to zero yet
341 wptr
= get_wptr(ring
);
343 /* ensure writes to ringbuffer have hit system memory: */
346 adreno_gpu_write(adreno_gpu
, REG_ADRENO_CP_RB_WPTR
, wptr
);
349 bool adreno_idle(struct msm_gpu
*gpu
, struct msm_ringbuffer
*ring
)
351 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
352 uint32_t wptr
= get_wptr(ring
);
354 /* wait for CP to drain ringbuffer: */
355 if (!spin_until(get_rptr(adreno_gpu
, ring
) == wptr
))
358 /* TODO maybe we need to reset GPU here to recover from hang? */
359 DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n",
360 gpu
->name
, ring
->id
, get_rptr(adreno_gpu
, ring
), wptr
);
365 #ifdef CONFIG_DEBUG_FS
366 void adreno_show(struct msm_gpu
*gpu
, struct seq_file
*m
)
368 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
371 seq_printf(m
, "revision: %d (%d.%d.%d.%d)\n",
372 adreno_gpu
->info
->revn
, adreno_gpu
->rev
.core
,
373 adreno_gpu
->rev
.major
, adreno_gpu
->rev
.minor
,
374 adreno_gpu
->rev
.patchid
);
376 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
377 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
379 seq_printf(m
, "rb %d: fence: %d/%d\n", i
,
380 ring
->memptrs
->fence
, ring
->seqno
);
382 seq_printf(m
, " rptr: %d\n",
383 get_rptr(adreno_gpu
, ring
));
384 seq_printf(m
, "rb wptr: %d\n", get_wptr(ring
));
387 /* dump these out in a form that can be parsed by demsm: */
388 seq_printf(m
, "IO:region %s 00000000 00020000\n", gpu
->name
);
389 for (i
= 0; adreno_gpu
->registers
[i
] != ~0; i
+= 2) {
390 uint32_t start
= adreno_gpu
->registers
[i
];
391 uint32_t end
= adreno_gpu
->registers
[i
+1];
394 for (addr
= start
; addr
<= end
; addr
++) {
395 uint32_t val
= gpu_read(gpu
, addr
);
396 seq_printf(m
, "IO:R %08x %08x\n", addr
<<2, val
);
402 /* Dump common gpu status and scratch registers on any hang, to make
403 * the hangcheck logs more useful. The scratch registers seem always
404 * safe to read when GPU has hung (unlike some other regs, depending
405 * on how the GPU hung), and they are useful to match up to cmdstream
406 * dumps when debugging hangs:
408 void adreno_dump_info(struct msm_gpu
*gpu
)
410 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
413 printk("revision: %d (%d.%d.%d.%d)\n",
414 adreno_gpu
->info
->revn
, adreno_gpu
->rev
.core
,
415 adreno_gpu
->rev
.major
, adreno_gpu
->rev
.minor
,
416 adreno_gpu
->rev
.patchid
);
418 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
419 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
421 printk("rb %d: fence: %d/%d\n", i
,
422 ring
->memptrs
->fence
,
425 printk("rptr: %d\n", get_rptr(adreno_gpu
, ring
));
426 printk("rb wptr: %d\n", get_wptr(ring
));
430 /* would be nice to not have to duplicate the _show() stuff with printk(): */
431 void adreno_dump(struct msm_gpu
*gpu
)
433 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(gpu
);
436 /* dump these out in a form that can be parsed by demsm: */
437 printk("IO:region %s 00000000 00020000\n", gpu
->name
);
438 for (i
= 0; adreno_gpu
->registers
[i
] != ~0; i
+= 2) {
439 uint32_t start
= adreno_gpu
->registers
[i
];
440 uint32_t end
= adreno_gpu
->registers
[i
+1];
443 for (addr
= start
; addr
<= end
; addr
++) {
444 uint32_t val
= gpu_read(gpu
, addr
);
445 printk("IO:R %08x %08x\n", addr
<<2, val
);
450 static uint32_t ring_freewords(struct msm_ringbuffer
*ring
)
452 struct adreno_gpu
*adreno_gpu
= to_adreno_gpu(ring
->gpu
);
453 uint32_t size
= MSM_GPU_RINGBUFFER_SZ
>> 2;
454 /* Use ring->next to calculate free size */
455 uint32_t wptr
= ring
->next
- ring
->start
;
456 uint32_t rptr
= get_rptr(adreno_gpu
, ring
);
457 return (rptr
+ (size
- 1) - wptr
) % size
;
460 void adreno_wait_ring(struct msm_ringbuffer
*ring
, uint32_t ndwords
)
462 if (spin_until(ring_freewords(ring
) >= ndwords
))
463 DRM_DEV_ERROR(ring
->gpu
->dev
->dev
,
464 "timeout waiting for space in ringbuffer %d\n",
468 /* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */
469 static int adreno_get_legacy_pwrlevels(struct device
*dev
)
471 struct device_node
*child
, *node
;
474 node
= of_find_compatible_node(dev
->of_node
, NULL
,
475 "qcom,gpu-pwrlevels");
477 dev_err(dev
, "Could not find the GPU powerlevels\n");
481 for_each_child_of_node(node
, child
) {
484 ret
= of_property_read_u32(child
, "qcom,gpu-freq", &val
);
489 * Skip the intentionally bogus clock value found at the bottom
490 * of most legacy frequency tables
493 dev_pm_opp_add(dev
, val
, 0);
499 static int adreno_get_pwrlevels(struct device
*dev
,
502 unsigned long freq
= ULONG_MAX
;
503 struct dev_pm_opp
*opp
;
508 /* You down with OPP? */
509 if (!of_find_property(dev
->of_node
, "operating-points-v2", NULL
))
510 ret
= adreno_get_legacy_pwrlevels(dev
);
512 ret
= dev_pm_opp_of_add_table(dev
);
514 dev_err(dev
, "Unable to set the OPP table\n");
518 /* Find the fastest defined rate */
519 opp
= dev_pm_opp_find_freq_floor(dev
, &freq
);
521 gpu
->fast_rate
= freq
;
526 if (!gpu
->fast_rate
) {
528 "Could not find a clock rate. Using a reasonable default\n");
529 /* Pick a suitably safe clock speed for any target */
530 gpu
->fast_rate
= 200000000;
533 DBG("fast_rate=%u, slow_rate=27000000", gpu
->fast_rate
);
538 int adreno_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
539 struct adreno_gpu
*adreno_gpu
,
540 const struct adreno_gpu_funcs
*funcs
, int nr_rings
)
542 struct adreno_platform_config
*config
= pdev
->dev
.platform_data
;
543 struct msm_gpu_config adreno_gpu_config
= { 0 };
544 struct msm_gpu
*gpu
= &adreno_gpu
->base
;
546 adreno_gpu
->funcs
= funcs
;
547 adreno_gpu
->info
= adreno_info(config
->rev
);
548 adreno_gpu
->gmem
= adreno_gpu
->info
->gmem
;
549 adreno_gpu
->revn
= adreno_gpu
->info
->revn
;
550 adreno_gpu
->rev
= config
->rev
;
552 adreno_gpu_config
.ioname
= "kgsl_3d0_reg_memory";
553 adreno_gpu_config
.irqname
= "kgsl_3d0_irq";
555 adreno_gpu_config
.va_start
= SZ_16M
;
556 adreno_gpu_config
.va_end
= 0xffffffff;
558 adreno_gpu_config
.nr_rings
= nr_rings
;
560 adreno_get_pwrlevels(&pdev
->dev
, gpu
);
562 pm_runtime_set_autosuspend_delay(&pdev
->dev
, DRM_MSM_INACTIVE_PERIOD
);
563 pm_runtime_use_autosuspend(&pdev
->dev
);
564 pm_runtime_enable(&pdev
->dev
);
566 return msm_gpu_init(drm
, pdev
, &adreno_gpu
->base
, &funcs
->base
,
567 adreno_gpu
->info
->name
, &adreno_gpu_config
);
570 void adreno_gpu_cleanup(struct adreno_gpu
*adreno_gpu
)
572 release_firmware(adreno_gpu
->pm4
);
573 release_firmware(adreno_gpu
->pfp
);
575 msm_gpu_cleanup(&adreno_gpu
->base
);