1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
4 * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
7 #include <linux/cleanup.h>
8 #include <linux/device.h>
9 #include <linux/interconnect.h>
10 #include <linux/firmware/qcom/qcom_scm.h>
11 #include <linux/iopoll.h>
12 #include <linux/list.h>
13 #include <linux/mod_devicetable.h>
14 #include <linux/mutex.h>
15 #include <linux/platform_device.h>
16 #include <linux/ratelimit.h>
17 #include <linux/spinlock.h>
20 #include "arm-smmu-qcom.h"
22 #define TBU_DBG_TIMEOUT_US 100
23 #define DEBUG_AXUSER_REG 0x30
24 #define DEBUG_AXUSER_CDMID GENMASK_ULL(43, 36)
25 #define DEBUG_AXUSER_CDMID_VAL 0xff
26 #define DEBUG_PAR_REG 0x28
27 #define DEBUG_PAR_FAULT_VAL BIT(0)
28 #define DEBUG_PAR_PA GENMASK_ULL(47, 12)
29 #define DEBUG_SID_HALT_REG 0x0
30 #define DEBUG_SID_HALT_VAL BIT(16)
31 #define DEBUG_SID_HALT_SID GENMASK(9, 0)
32 #define DEBUG_SR_HALT_ACK_REG 0x20
33 #define DEBUG_SR_HALT_ACK_VAL BIT(1)
34 #define DEBUG_SR_ECATS_RUNNING_VAL BIT(0)
35 #define DEBUG_TXN_AXCACHE GENMASK(5, 2)
36 #define DEBUG_TXN_AXPROT GENMASK(8, 6)
37 #define DEBUG_TXN_AXPROT_PRIV 0x1
38 #define DEBUG_TXN_AXPROT_NSEC 0x2
39 #define DEBUG_TXN_TRIGG_REG 0x18
40 #define DEBUG_TXN_TRIGGER BIT(0)
41 #define DEBUG_VA_ADDR_REG 0x8
43 static LIST_HEAD(tbu_list
);
44 static DEFINE_MUTEX(tbu_list_lock
);
45 static DEFINE_SPINLOCK(atos_lock
);
49 struct device_node
*smmu_np
;
51 struct list_head list
;
53 struct icc_path
*path
;
55 spinlock_t halt_lock
; /* multiple halt or resume can't execute concurrently */
59 static struct qcom_smmu
*to_qcom_smmu(struct arm_smmu_device
*smmu
)
61 return container_of(smmu
, struct qcom_smmu
, smmu
);
64 void qcom_smmu_tlb_sync_debug(struct arm_smmu_device
*smmu
)
67 u32 tbu_pwr_status
, sync_inv_ack
, sync_inv_progress
;
68 struct qcom_smmu
*qsmmu
= container_of(smmu
, struct qcom_smmu
, smmu
);
69 const struct qcom_smmu_config
*cfg
;
70 static DEFINE_RATELIMIT_STATE(rs
, DEFAULT_RATELIMIT_INTERVAL
,
71 DEFAULT_RATELIMIT_BURST
);
73 if (__ratelimit(&rs
)) {
74 dev_err(smmu
->dev
, "TLB sync timed out -- SMMU may be deadlocked\n");
80 ret
= qcom_scm_io_readl(smmu
->ioaddr
+ cfg
->reg_offset
[QCOM_SMMU_TBU_PWR_STATUS
],
84 "Failed to read TBU power status: %d\n", ret
);
86 ret
= qcom_scm_io_readl(smmu
->ioaddr
+ cfg
->reg_offset
[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK
],
90 "Failed to read TBU sync/inv ack status: %d\n", ret
);
92 ret
= qcom_scm_io_readl(smmu
->ioaddr
+ cfg
->reg_offset
[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR
],
96 "Failed to read TCU syn/inv progress: %d\n", ret
);
99 "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
100 tbu_pwr_status
, sync_inv_ack
, sync_inv_progress
);
104 static struct qcom_tbu
*qcom_find_tbu(struct qcom_smmu
*qsmmu
, u32 sid
)
106 struct qcom_tbu
*tbu
;
109 guard(mutex
)(&tbu_list_lock
);
111 if (list_empty(&tbu_list
))
114 list_for_each_entry(tbu
, &tbu_list
, list
) {
115 start
= tbu
->sid_range
[0];
116 end
= start
+ tbu
->sid_range
[1];
118 if (qsmmu
->smmu
.dev
->of_node
== tbu
->smmu_np
&&
119 start
<= sid
&& sid
< end
)
122 dev_err(qsmmu
->smmu
.dev
, "Unable to find TBU for sid 0x%x\n", sid
);
127 static int qcom_tbu_halt(struct qcom_tbu
*tbu
, struct arm_smmu_domain
*smmu_domain
)
129 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
130 int ret
= 0, idx
= smmu_domain
->cfg
.cbndx
;
131 u32 val
, fsr
, status
;
133 guard(spinlock_irqsave
)(&tbu
->halt_lock
);
134 if (tbu
->halt_count
) {
139 val
= readl_relaxed(tbu
->base
+ DEBUG_SID_HALT_REG
);
140 val
|= DEBUG_SID_HALT_VAL
;
141 writel_relaxed(val
, tbu
->base
+ DEBUG_SID_HALT_REG
);
143 fsr
= arm_smmu_cb_read(smmu
, idx
, ARM_SMMU_CB_FSR
);
144 if ((fsr
& ARM_SMMU_CB_FSR_FAULT
) && (fsr
& ARM_SMMU_CB_FSR_SS
)) {
145 u32 sctlr_orig
, sctlr
;
148 * We are in a fault. Our request to halt the bus will not
149 * complete until transactions in front of us (such as the fault
150 * itself) have completed. Disable iommu faults and terminate
151 * any existing transactions.
153 sctlr_orig
= arm_smmu_cb_read(smmu
, idx
, ARM_SMMU_CB_SCTLR
);
154 sctlr
= sctlr_orig
& ~(ARM_SMMU_SCTLR_CFCFG
| ARM_SMMU_SCTLR_CFIE
);
155 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_SCTLR
, sctlr
);
156 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_FSR
, fsr
);
157 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_RESUME
, ARM_SMMU_RESUME_TERMINATE
);
158 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_SCTLR
, sctlr_orig
);
161 if (readl_poll_timeout_atomic(tbu
->base
+ DEBUG_SR_HALT_ACK_REG
, status
,
162 (status
& DEBUG_SR_HALT_ACK_VAL
),
163 0, TBU_DBG_TIMEOUT_US
)) {
164 dev_err(tbu
->dev
, "Timeout while trying to halt TBU!\n");
167 val
= readl_relaxed(tbu
->base
+ DEBUG_SID_HALT_REG
);
168 val
&= ~DEBUG_SID_HALT_VAL
;
169 writel_relaxed(val
, tbu
->base
+ DEBUG_SID_HALT_REG
);
179 static void qcom_tbu_resume(struct qcom_tbu
*tbu
)
183 guard(spinlock_irqsave
)(&tbu
->halt_lock
);
184 if (!tbu
->halt_count
) {
185 WARN(1, "%s: halt_count is 0", dev_name(tbu
->dev
));
189 if (tbu
->halt_count
> 1) {
194 val
= readl_relaxed(tbu
->base
+ DEBUG_SID_HALT_REG
);
195 val
&= ~DEBUG_SID_HALT_VAL
;
196 writel_relaxed(val
, tbu
->base
+ DEBUG_SID_HALT_REG
);
201 static phys_addr_t
qcom_tbu_trigger_atos(struct arm_smmu_domain
*smmu_domain
,
202 struct qcom_tbu
*tbu
, dma_addr_t iova
, u32 sid
)
204 bool atos_timedout
= false;
205 phys_addr_t phys
= 0;
209 /* Set address and stream-id */
210 val
= readq_relaxed(tbu
->base
+ DEBUG_SID_HALT_REG
);
211 val
&= ~DEBUG_SID_HALT_SID
;
212 val
|= FIELD_PREP(DEBUG_SID_HALT_SID
, sid
);
213 writeq_relaxed(val
, tbu
->base
+ DEBUG_SID_HALT_REG
);
214 writeq_relaxed(iova
, tbu
->base
+ DEBUG_VA_ADDR_REG
);
215 val
= FIELD_PREP(DEBUG_AXUSER_CDMID
, DEBUG_AXUSER_CDMID_VAL
);
216 writeq_relaxed(val
, tbu
->base
+ DEBUG_AXUSER_REG
);
218 /* Write-back read and write-allocate */
219 val
= FIELD_PREP(DEBUG_TXN_AXCACHE
, 0xf);
221 /* Non-secure access */
222 val
|= FIELD_PREP(DEBUG_TXN_AXPROT
, DEBUG_TXN_AXPROT_NSEC
);
224 /* Privileged access */
225 val
|= FIELD_PREP(DEBUG_TXN_AXPROT
, DEBUG_TXN_AXPROT_PRIV
);
227 val
|= DEBUG_TXN_TRIGGER
;
228 writeq_relaxed(val
, tbu
->base
+ DEBUG_TXN_TRIGG_REG
);
230 timeout
= ktime_add_us(ktime_get(), TBU_DBG_TIMEOUT_US
);
232 val
= readl_relaxed(tbu
->base
+ DEBUG_SR_HALT_ACK_REG
);
233 if (!(val
& DEBUG_SR_ECATS_RUNNING_VAL
))
235 val
= readl_relaxed(tbu
->base
+ DEBUG_PAR_REG
);
236 if (val
& DEBUG_PAR_FAULT_VAL
)
238 if (ktime_compare(ktime_get(), timeout
) > 0) {
239 atos_timedout
= true;
244 val
= readq_relaxed(tbu
->base
+ DEBUG_PAR_REG
);
245 if (val
& DEBUG_PAR_FAULT_VAL
)
246 dev_err(tbu
->dev
, "ATOS generated a fault interrupt! PAR = %llx, SID=0x%x\n",
248 else if (atos_timedout
)
249 dev_err_ratelimited(tbu
->dev
, "ATOS translation timed out!\n");
251 phys
= FIELD_GET(DEBUG_PAR_PA
, val
);
254 writeq_relaxed(0, tbu
->base
+ DEBUG_TXN_TRIGG_REG
);
255 writeq_relaxed(0, tbu
->base
+ DEBUG_VA_ADDR_REG
);
256 val
= readl_relaxed(tbu
->base
+ DEBUG_SID_HALT_REG
);
257 val
&= ~DEBUG_SID_HALT_SID
;
258 writel_relaxed(val
, tbu
->base
+ DEBUG_SID_HALT_REG
);
263 static phys_addr_t
qcom_iova_to_phys(struct arm_smmu_domain
*smmu_domain
,
264 dma_addr_t iova
, u32 sid
)
266 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
267 struct qcom_smmu
*qsmmu
= to_qcom_smmu(smmu
);
268 int idx
= smmu_domain
->cfg
.cbndx
;
269 struct qcom_tbu
*tbu
;
270 u32 sctlr_orig
, sctlr
;
271 phys_addr_t phys
= 0;
276 tbu
= qcom_find_tbu(qsmmu
, sid
);
280 ret
= icc_set_bw(tbu
->path
, 0, UINT_MAX
);
284 ret
= clk_prepare_enable(tbu
->clk
);
288 ret
= qcom_tbu_halt(tbu
, smmu_domain
);
293 * ATOS/ECATS can trigger the fault interrupt, so disable it temporarily
294 * and check for an interrupt manually.
296 sctlr_orig
= arm_smmu_cb_read(smmu
, idx
, ARM_SMMU_CB_SCTLR
);
297 sctlr
= sctlr_orig
& ~(ARM_SMMU_SCTLR_CFCFG
| ARM_SMMU_SCTLR_CFIE
);
298 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_SCTLR
, sctlr
);
300 fsr
= arm_smmu_cb_read(smmu
, idx
, ARM_SMMU_CB_FSR
);
301 if (fsr
& ARM_SMMU_CB_FSR_FAULT
) {
302 /* Clear pending interrupts */
303 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_FSR
, fsr
);
306 * TBU halt takes care of resuming any stalled transcation.
307 * Kept it here for completeness sake.
309 if (fsr
& ARM_SMMU_CB_FSR_SS
)
310 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_RESUME
,
311 ARM_SMMU_RESUME_TERMINATE
);
314 /* Only one concurrent atos operation */
315 scoped_guard(spinlock_irqsave
, &atos_lock
) {
317 * If the translation fails, attempt the lookup more time."
320 phys
= qcom_tbu_trigger_atos(smmu_domain
, tbu
, iova
, sid
);
322 fsr
= arm_smmu_cb_read(smmu
, idx
, ARM_SMMU_CB_FSR
);
323 if (fsr
& ARM_SMMU_CB_FSR_FAULT
) {
324 /* Clear pending interrupts */
325 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_FSR
, fsr
);
327 if (fsr
& ARM_SMMU_CB_FSR_SS
)
328 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_RESUME
,
329 ARM_SMMU_RESUME_TERMINATE
);
331 } while (!phys
&& attempt
++ < 2);
333 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_SCTLR
, sctlr_orig
);
335 qcom_tbu_resume(tbu
);
337 /* Read to complete prior write transcations */
338 readl_relaxed(tbu
->base
+ DEBUG_SR_HALT_ACK_REG
);
341 clk_disable_unprepare(tbu
->clk
);
343 icc_set_bw(tbu
->path
, 0, 0);
348 static phys_addr_t
qcom_smmu_iova_to_phys_hard(struct arm_smmu_domain
*smmu_domain
, dma_addr_t iova
)
350 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
351 int idx
= smmu_domain
->cfg
.cbndx
;
355 frsynra
= arm_smmu_gr1_read(smmu
, ARM_SMMU_GR1_CBFRSYNRA(idx
));
356 sid
= FIELD_GET(ARM_SMMU_CBFRSYNRA_SID
, frsynra
);
358 return qcom_iova_to_phys(smmu_domain
, iova
, sid
);
361 static phys_addr_t
qcom_smmu_verify_fault(struct arm_smmu_domain
*smmu_domain
, dma_addr_t iova
, u32 fsr
)
363 struct io_pgtable
*iop
= io_pgtable_ops_to_pgtable(smmu_domain
->pgtbl_ops
);
364 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
365 phys_addr_t phys_post_tlbiall
;
368 phys
= qcom_smmu_iova_to_phys_hard(smmu_domain
, iova
);
369 io_pgtable_tlb_flush_all(iop
);
370 phys_post_tlbiall
= qcom_smmu_iova_to_phys_hard(smmu_domain
, iova
);
372 if (phys
!= phys_post_tlbiall
) {
374 "ATOS results differed across TLBIALL... (before: %pa after: %pa)\n",
375 &phys
, &phys_post_tlbiall
);
378 return (phys
== 0 ? phys_post_tlbiall
: phys
);
381 irqreturn_t
qcom_smmu_context_fault(int irq
, void *dev
)
383 struct arm_smmu_domain
*smmu_domain
= dev
;
384 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
385 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
386 struct arm_smmu_context_fault_info cfi
;
388 int idx
= smmu_domain
->cfg
.cbndx
;
389 phys_addr_t phys_soft
;
392 static DEFINE_RATELIMIT_STATE(_rs
,
393 DEFAULT_RATELIMIT_INTERVAL
,
394 DEFAULT_RATELIMIT_BURST
);
396 arm_smmu_read_context_fault_info(smmu
, idx
, &cfi
);
398 if (!(cfi
.fsr
& ARM_SMMU_CB_FSR_FAULT
))
401 if (list_empty(&tbu_list
)) {
402 ret
= report_iommu_fault(&smmu_domain
->domain
, NULL
, cfi
.iova
,
403 cfi
.fsynr
& ARM_SMMU_CB_FSYNR0_WNR
? IOMMU_FAULT_WRITE
: IOMMU_FAULT_READ
);
406 arm_smmu_print_context_fault_info(smmu
, idx
, &cfi
);
408 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_FSR
, cfi
.fsr
);
412 phys_soft
= ops
->iova_to_phys(ops
, cfi
.iova
);
414 tmp
= report_iommu_fault(&smmu_domain
->domain
, NULL
, cfi
.iova
,
415 cfi
.fsynr
& ARM_SMMU_CB_FSYNR0_WNR
? IOMMU_FAULT_WRITE
: IOMMU_FAULT_READ
);
416 if (!tmp
|| tmp
== -EBUSY
) {
418 resume
= ARM_SMMU_RESUME_TERMINATE
;
420 phys_addr_t phys_atos
= qcom_smmu_verify_fault(smmu_domain
, cfi
.iova
, cfi
.fsr
);
422 if (__ratelimit(&_rs
)) {
423 arm_smmu_print_context_fault_info(smmu
, idx
, &cfi
);
426 "soft iova-to-phys=%pa\n", &phys_soft
);
429 "SOFTWARE TABLE WALK FAILED! Looks like %s accessed an unmapped address!\n",
430 dev_name(smmu
->dev
));
432 dev_err(smmu
->dev
, "hard iova-to-phys (ATOS)=%pa\n",
435 dev_err(smmu
->dev
, "hard iova-to-phys (ATOS) failed\n");
438 resume
= ARM_SMMU_RESUME_TERMINATE
;
442 * If the client returns -EBUSY, do not clear FSR and do not RESUME
443 * if stalled. This is required to keep the IOMMU client stalled on
444 * the outstanding fault. This gives the client a chance to take any
445 * debug action and then terminate the stalled transaction.
446 * So, the sequence in case of stall on fault should be:
447 * 1) Do not clear FSR or write to RESUME here
448 * 2) Client takes any debug action
449 * 3) Client terminates the stalled transaction and resumes the IOMMU
450 * 4) Client clears FSR. The FSR should only be cleared after 3) and
451 * not before so that the fault remains outstanding. This ensures
452 * SCTLR.HUPCF has the desired effect if subsequent transactions also
453 * need to be terminated.
456 /* Clear the faulting FSR */
457 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_FSR
, cfi
.fsr
);
459 /* Retry or terminate any stalled transactions */
460 if (cfi
.fsr
& ARM_SMMU_CB_FSR_SS
)
461 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_RESUME
, resume
);
467 int qcom_tbu_probe(struct platform_device
*pdev
)
469 struct of_phandle_args args
= { .args_count
= 2 };
470 struct device_node
*np
= pdev
->dev
.of_node
;
471 struct device
*dev
= &pdev
->dev
;
472 struct qcom_tbu
*tbu
;
474 tbu
= devm_kzalloc(dev
, sizeof(*tbu
), GFP_KERNEL
);
479 INIT_LIST_HEAD(&tbu
->list
);
480 spin_lock_init(&tbu
->halt_lock
);
482 if (of_parse_phandle_with_args(np
, "qcom,stream-id-range", "#iommu-cells", 0, &args
)) {
483 dev_err(dev
, "Cannot parse the 'qcom,stream-id-range' DT property\n");
487 tbu
->smmu_np
= args
.np
;
488 tbu
->sid_range
[0] = args
.args
[0];
489 tbu
->sid_range
[1] = args
.args
[1];
490 of_node_put(args
.np
);
492 tbu
->base
= devm_of_iomap(dev
, np
, 0, NULL
);
493 if (IS_ERR(tbu
->base
))
494 return PTR_ERR(tbu
->base
);
496 tbu
->clk
= devm_clk_get_optional(dev
, NULL
);
497 if (IS_ERR(tbu
->clk
))
498 return PTR_ERR(tbu
->clk
);
500 tbu
->path
= devm_of_icc_get(dev
, NULL
);
501 if (IS_ERR(tbu
->path
))
502 return PTR_ERR(tbu
->path
);
504 guard(mutex
)(&tbu_list_lock
);
505 list_add_tail(&tbu
->list
, &tbu_list
);