1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 #include <kunit/visibility.h>
30 #include <uapi/linux/iommufd.h>
32 #include "arm-smmu-v3.h"
33 #include "../../dma-iommu.h"
35 static bool disable_msipolling
;
36 module_param(disable_msipolling
, bool, 0444);
37 MODULE_PARM_DESC(disable_msipolling
,
38 "Disable MSI-based polling for CMD_SYNC completion.");
40 static struct iommu_ops arm_smmu_ops
;
41 static struct iommu_dirty_ops arm_smmu_dirty_ops
;
43 enum arm_smmu_msi_index
{
50 #define NUM_ENTRY_QWORDS 8
51 static_assert(sizeof(struct arm_smmu_ste
) == NUM_ENTRY_QWORDS
* sizeof(u64
));
52 static_assert(sizeof(struct arm_smmu_cd
) == NUM_ENTRY_QWORDS
* sizeof(u64
));
54 static phys_addr_t arm_smmu_msi_cfg
[ARM_SMMU_MAX_MSIS
][3] = {
56 ARM_SMMU_EVTQ_IRQ_CFG0
,
57 ARM_SMMU_EVTQ_IRQ_CFG1
,
58 ARM_SMMU_EVTQ_IRQ_CFG2
,
60 [GERROR_MSI_INDEX
] = {
61 ARM_SMMU_GERROR_IRQ_CFG0
,
62 ARM_SMMU_GERROR_IRQ_CFG1
,
63 ARM_SMMU_GERROR_IRQ_CFG2
,
66 ARM_SMMU_PRIQ_IRQ_CFG0
,
67 ARM_SMMU_PRIQ_IRQ_CFG1
,
68 ARM_SMMU_PRIQ_IRQ_CFG2
,
72 struct arm_smmu_option_prop
{
77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa
);
78 DEFINE_MUTEX(arm_smmu_asid_lock
);
80 static struct arm_smmu_option_prop arm_smmu_options
[] = {
81 { ARM_SMMU_OPT_SKIP_PREFETCH
, "hisilicon,broken-prefetch-cmd" },
82 { ARM_SMMU_OPT_PAGE0_REGS_ONLY
, "cavium,cn9900-broken-page1-regspace"},
86 static int arm_smmu_domain_finalise(struct arm_smmu_domain
*smmu_domain
,
87 struct arm_smmu_device
*smmu
, u32 flags
);
88 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master
*master
);
90 static void parse_driver_options(struct arm_smmu_device
*smmu
)
95 if (of_property_read_bool(smmu
->dev
->of_node
,
96 arm_smmu_options
[i
].prop
)) {
97 smmu
->options
|= arm_smmu_options
[i
].opt
;
98 dev_notice(smmu
->dev
, "option %s\n",
99 arm_smmu_options
[i
].prop
);
101 } while (arm_smmu_options
[++i
].opt
);
104 /* Low-level queue manipulation functions */
105 static bool queue_has_space(struct arm_smmu_ll_queue
*q
, u32 n
)
107 u32 space
, prod
, cons
;
109 prod
= Q_IDX(q
, q
->prod
);
110 cons
= Q_IDX(q
, q
->cons
);
112 if (Q_WRP(q
, q
->prod
) == Q_WRP(q
, q
->cons
))
113 space
= (1 << q
->max_n_shift
) - (prod
- cons
);
120 static bool queue_full(struct arm_smmu_ll_queue
*q
)
122 return Q_IDX(q
, q
->prod
) == Q_IDX(q
, q
->cons
) &&
123 Q_WRP(q
, q
->prod
) != Q_WRP(q
, q
->cons
);
126 static bool queue_empty(struct arm_smmu_ll_queue
*q
)
128 return Q_IDX(q
, q
->prod
) == Q_IDX(q
, q
->cons
) &&
129 Q_WRP(q
, q
->prod
) == Q_WRP(q
, q
->cons
);
132 static bool queue_consumed(struct arm_smmu_ll_queue
*q
, u32 prod
)
134 return ((Q_WRP(q
, q
->cons
) == Q_WRP(q
, prod
)) &&
135 (Q_IDX(q
, q
->cons
) > Q_IDX(q
, prod
))) ||
136 ((Q_WRP(q
, q
->cons
) != Q_WRP(q
, prod
)) &&
137 (Q_IDX(q
, q
->cons
) <= Q_IDX(q
, prod
)));
140 static void queue_sync_cons_out(struct arm_smmu_queue
*q
)
143 * Ensure that all CPU accesses (reads and writes) to the queue
144 * are complete before we update the cons pointer.
147 writel_relaxed(q
->llq
.cons
, q
->cons_reg
);
150 static void queue_inc_cons(struct arm_smmu_ll_queue
*q
)
152 u32 cons
= (Q_WRP(q
, q
->cons
) | Q_IDX(q
, q
->cons
)) + 1;
153 q
->cons
= Q_OVF(q
->cons
) | Q_WRP(q
, cons
) | Q_IDX(q
, cons
);
156 static void queue_sync_cons_ovf(struct arm_smmu_queue
*q
)
158 struct arm_smmu_ll_queue
*llq
= &q
->llq
;
160 if (likely(Q_OVF(llq
->prod
) == Q_OVF(llq
->cons
)))
163 llq
->cons
= Q_OVF(llq
->prod
) | Q_WRP(llq
, llq
->cons
) |
164 Q_IDX(llq
, llq
->cons
);
165 queue_sync_cons_out(q
);
168 static int queue_sync_prod_in(struct arm_smmu_queue
*q
)
174 * We can't use the _relaxed() variant here, as we must prevent
175 * speculative reads of the queue before we have determined that
176 * prod has indeed moved.
178 prod
= readl(q
->prod_reg
);
180 if (Q_OVF(prod
) != Q_OVF(q
->llq
.prod
))
187 static u32
queue_inc_prod_n(struct arm_smmu_ll_queue
*q
, int n
)
189 u32 prod
= (Q_WRP(q
, q
->prod
) | Q_IDX(q
, q
->prod
)) + n
;
190 return Q_OVF(q
->prod
) | Q_WRP(q
, prod
) | Q_IDX(q
, prod
);
193 static void queue_poll_init(struct arm_smmu_device
*smmu
,
194 struct arm_smmu_queue_poll
*qp
)
198 qp
->wfe
= !!(smmu
->features
& ARM_SMMU_FEAT_SEV
);
199 qp
->timeout
= ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US
);
202 static int queue_poll(struct arm_smmu_queue_poll
*qp
)
204 if (ktime_compare(ktime_get(), qp
->timeout
) > 0)
209 } else if (++qp
->spin_cnt
< ARM_SMMU_POLL_SPIN_COUNT
) {
220 static void queue_write(__le64
*dst
, u64
*src
, size_t n_dwords
)
224 for (i
= 0; i
< n_dwords
; ++i
)
225 *dst
++ = cpu_to_le64(*src
++);
228 static void queue_read(u64
*dst
, __le64
*src
, size_t n_dwords
)
232 for (i
= 0; i
< n_dwords
; ++i
)
233 *dst
++ = le64_to_cpu(*src
++);
236 static int queue_remove_raw(struct arm_smmu_queue
*q
, u64
*ent
)
238 if (queue_empty(&q
->llq
))
241 queue_read(ent
, Q_ENT(q
, q
->llq
.cons
), q
->ent_dwords
);
242 queue_inc_cons(&q
->llq
);
243 queue_sync_cons_out(q
);
247 /* High-level queue accessors */
248 static int arm_smmu_cmdq_build_cmd(u64
*cmd
, struct arm_smmu_cmdq_ent
*ent
)
250 memset(cmd
, 0, 1 << CMDQ_ENT_SZ_SHIFT
);
251 cmd
[0] |= FIELD_PREP(CMDQ_0_OP
, ent
->opcode
);
253 switch (ent
->opcode
) {
254 case CMDQ_OP_TLBI_EL2_ALL
:
255 case CMDQ_OP_TLBI_NSNH_ALL
:
257 case CMDQ_OP_PREFETCH_CFG
:
258 cmd
[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID
, ent
->prefetch
.sid
);
260 case CMDQ_OP_CFGI_CD
:
261 cmd
[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID
, ent
->cfgi
.ssid
);
263 case CMDQ_OP_CFGI_STE
:
264 cmd
[0] |= FIELD_PREP(CMDQ_CFGI_0_SID
, ent
->cfgi
.sid
);
265 cmd
[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF
, ent
->cfgi
.leaf
);
267 case CMDQ_OP_CFGI_CD_ALL
:
268 cmd
[0] |= FIELD_PREP(CMDQ_CFGI_0_SID
, ent
->cfgi
.sid
);
270 case CMDQ_OP_CFGI_ALL
:
271 /* Cover the entire SID range */
272 cmd
[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE
, 31);
274 case CMDQ_OP_TLBI_NH_VA
:
275 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID
, ent
->tlbi
.vmid
);
277 case CMDQ_OP_TLBI_EL2_VA
:
278 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM
, ent
->tlbi
.num
);
279 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE
, ent
->tlbi
.scale
);
280 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID
, ent
->tlbi
.asid
);
281 cmd
[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF
, ent
->tlbi
.leaf
);
282 cmd
[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL
, ent
->tlbi
.ttl
);
283 cmd
[1] |= FIELD_PREP(CMDQ_TLBI_1_TG
, ent
->tlbi
.tg
);
284 cmd
[1] |= ent
->tlbi
.addr
& CMDQ_TLBI_1_VA_MASK
;
286 case CMDQ_OP_TLBI_S2_IPA
:
287 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM
, ent
->tlbi
.num
);
288 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE
, ent
->tlbi
.scale
);
289 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID
, ent
->tlbi
.vmid
);
290 cmd
[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF
, ent
->tlbi
.leaf
);
291 cmd
[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL
, ent
->tlbi
.ttl
);
292 cmd
[1] |= FIELD_PREP(CMDQ_TLBI_1_TG
, ent
->tlbi
.tg
);
293 cmd
[1] |= ent
->tlbi
.addr
& CMDQ_TLBI_1_IPA_MASK
;
295 case CMDQ_OP_TLBI_NH_ASID
:
296 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID
, ent
->tlbi
.asid
);
298 case CMDQ_OP_TLBI_NH_ALL
:
299 case CMDQ_OP_TLBI_S12_VMALL
:
300 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID
, ent
->tlbi
.vmid
);
302 case CMDQ_OP_TLBI_EL2_ASID
:
303 cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID
, ent
->tlbi
.asid
);
305 case CMDQ_OP_ATC_INV
:
306 cmd
[0] |= FIELD_PREP(CMDQ_0_SSV
, ent
->substream_valid
);
307 cmd
[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL
, ent
->atc
.global
);
308 cmd
[0] |= FIELD_PREP(CMDQ_ATC_0_SSID
, ent
->atc
.ssid
);
309 cmd
[0] |= FIELD_PREP(CMDQ_ATC_0_SID
, ent
->atc
.sid
);
310 cmd
[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE
, ent
->atc
.size
);
311 cmd
[1] |= ent
->atc
.addr
& CMDQ_ATC_1_ADDR_MASK
;
313 case CMDQ_OP_PRI_RESP
:
314 cmd
[0] |= FIELD_PREP(CMDQ_0_SSV
, ent
->substream_valid
);
315 cmd
[0] |= FIELD_PREP(CMDQ_PRI_0_SSID
, ent
->pri
.ssid
);
316 cmd
[0] |= FIELD_PREP(CMDQ_PRI_0_SID
, ent
->pri
.sid
);
317 cmd
[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID
, ent
->pri
.grpid
);
318 switch (ent
->pri
.resp
) {
326 cmd
[1] |= FIELD_PREP(CMDQ_PRI_1_RESP
, ent
->pri
.resp
);
329 cmd
[0] |= FIELD_PREP(CMDQ_RESUME_0_SID
, ent
->resume
.sid
);
330 cmd
[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP
, ent
->resume
.resp
);
331 cmd
[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG
, ent
->resume
.stag
);
333 case CMDQ_OP_CMD_SYNC
:
334 if (ent
->sync
.msiaddr
) {
335 cmd
[0] |= FIELD_PREP(CMDQ_SYNC_0_CS
, CMDQ_SYNC_0_CS_IRQ
);
336 cmd
[1] |= ent
->sync
.msiaddr
& CMDQ_SYNC_1_MSIADDR_MASK
;
338 cmd
[0] |= FIELD_PREP(CMDQ_SYNC_0_CS
, CMDQ_SYNC_0_CS_SEV
);
340 cmd
[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH
, ARM_SMMU_SH_ISH
);
341 cmd
[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR
, ARM_SMMU_MEMATTR_OIWB
);
350 static struct arm_smmu_cmdq
*arm_smmu_get_cmdq(struct arm_smmu_device
*smmu
,
351 struct arm_smmu_cmdq_ent
*ent
)
353 struct arm_smmu_cmdq
*cmdq
= NULL
;
355 if (smmu
->impl_ops
&& smmu
->impl_ops
->get_secondary_cmdq
)
356 cmdq
= smmu
->impl_ops
->get_secondary_cmdq(smmu
, ent
);
358 return cmdq
?: &smmu
->cmdq
;
361 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device
*smmu
,
362 struct arm_smmu_cmdq
*cmdq
)
364 if (cmdq
== &smmu
->cmdq
)
367 return smmu
->options
& ARM_SMMU_OPT_TEGRA241_CMDQV
;
370 static void arm_smmu_cmdq_build_sync_cmd(u64
*cmd
, struct arm_smmu_device
*smmu
,
371 struct arm_smmu_cmdq
*cmdq
, u32 prod
)
373 struct arm_smmu_queue
*q
= &cmdq
->q
;
374 struct arm_smmu_cmdq_ent ent
= {
375 .opcode
= CMDQ_OP_CMD_SYNC
,
379 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
380 * payload, so the write will zero the entire command on that platform.
382 if (smmu
->options
& ARM_SMMU_OPT_MSIPOLL
) {
383 ent
.sync
.msiaddr
= q
->base_dma
+ Q_IDX(&q
->llq
, prod
) *
387 arm_smmu_cmdq_build_cmd(cmd
, &ent
);
388 if (arm_smmu_cmdq_needs_busy_polling(smmu
, cmdq
))
389 u64p_replace_bits(cmd
, CMDQ_SYNC_0_CS_NONE
, CMDQ_SYNC_0_CS
);
392 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device
*smmu
,
393 struct arm_smmu_cmdq
*cmdq
)
395 static const char * const cerror_str
[] = {
396 [CMDQ_ERR_CERROR_NONE_IDX
] = "No error",
397 [CMDQ_ERR_CERROR_ILL_IDX
] = "Illegal command",
398 [CMDQ_ERR_CERROR_ABT_IDX
] = "Abort on command fetch",
399 [CMDQ_ERR_CERROR_ATC_INV_IDX
] = "ATC invalidate timeout",
401 struct arm_smmu_queue
*q
= &cmdq
->q
;
404 u64 cmd
[CMDQ_ENT_DWORDS
];
405 u32 cons
= readl_relaxed(q
->cons_reg
);
406 u32 idx
= FIELD_GET(CMDQ_CONS_ERR
, cons
);
407 struct arm_smmu_cmdq_ent cmd_sync
= {
408 .opcode
= CMDQ_OP_CMD_SYNC
,
411 dev_err(smmu
->dev
, "CMDQ error (cons 0x%08x): %s\n", cons
,
412 idx
< ARRAY_SIZE(cerror_str
) ? cerror_str
[idx
] : "Unknown");
415 case CMDQ_ERR_CERROR_ABT_IDX
:
416 dev_err(smmu
->dev
, "retrying command fetch\n");
418 case CMDQ_ERR_CERROR_NONE_IDX
:
420 case CMDQ_ERR_CERROR_ATC_INV_IDX
:
422 * ATC Invalidation Completion timeout. CONS is still pointing
423 * at the CMD_SYNC. Attempt to complete other pending commands
424 * by repeating the CMD_SYNC, though we might well end up back
425 * here since the ATC invalidation may still be pending.
428 case CMDQ_ERR_CERROR_ILL_IDX
:
434 * We may have concurrent producers, so we need to be careful
435 * not to touch any of the shadow cmdq state.
437 queue_read(cmd
, Q_ENT(q
, cons
), q
->ent_dwords
);
438 dev_err(smmu
->dev
, "skipping command in error state:\n");
439 for (i
= 0; i
< ARRAY_SIZE(cmd
); ++i
)
440 dev_err(smmu
->dev
, "\t0x%016llx\n", (unsigned long long)cmd
[i
]);
442 /* Convert the erroneous command into a CMD_SYNC */
443 arm_smmu_cmdq_build_cmd(cmd
, &cmd_sync
);
444 if (arm_smmu_cmdq_needs_busy_polling(smmu
, cmdq
))
445 u64p_replace_bits(cmd
, CMDQ_SYNC_0_CS_NONE
, CMDQ_SYNC_0_CS
);
447 queue_write(Q_ENT(q
, cons
), cmd
, q
->ent_dwords
);
450 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device
*smmu
)
452 __arm_smmu_cmdq_skip_err(smmu
, &smmu
->cmdq
);
456 * Command queue locking.
457 * This is a form of bastardised rwlock with the following major changes:
459 * - The only LOCK routines are exclusive_trylock() and shared_lock().
460 * Neither have barrier semantics, and instead provide only a control
463 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
464 * fails if the caller appears to be the last lock holder (yes, this is
465 * racy). All successful UNLOCK routines have RELEASE semantics.
467 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq
*cmdq
)
472 * We can try to avoid the cmpxchg() loop by simply incrementing the
473 * lock counter. When held in exclusive state, the lock counter is set
474 * to INT_MIN so these increments won't hurt as the value will remain
477 if (atomic_fetch_inc_relaxed(&cmdq
->lock
) >= 0)
481 val
= atomic_cond_read_relaxed(&cmdq
->lock
, VAL
>= 0);
482 } while (atomic_cmpxchg_relaxed(&cmdq
->lock
, val
, val
+ 1) != val
);
485 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq
*cmdq
)
487 (void)atomic_dec_return_release(&cmdq
->lock
);
490 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq
*cmdq
)
492 if (atomic_read(&cmdq
->lock
) == 1)
495 arm_smmu_cmdq_shared_unlock(cmdq
);
499 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
502 local_irq_save(flags); \
503 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
505 local_irq_restore(flags); \
509 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
511 atomic_set_release(&cmdq->lock, 0); \
512 local_irq_restore(flags); \
517 * Command queue insertion.
518 * This is made fiddly by our attempts to achieve some sort of scalability
519 * since there is one queue shared amongst all of the CPUs in the system. If
520 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
521 * then you'll *love* this monstrosity.
523 * The basic idea is to split the queue up into ranges of commands that are
524 * owned by a given CPU; the owner may not have written all of the commands
525 * itself, but is responsible for advancing the hardware prod pointer when
526 * the time comes. The algorithm is roughly:
528 * 1. Allocate some space in the queue. At this point we also discover
529 * whether the head of the queue is currently owned by another CPU,
530 * or whether we are the owner.
532 * 2. Write our commands into our allocated slots in the queue.
534 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
536 * 4. If we are an owner:
537 * a. Wait for the previous owner to finish.
538 * b. Mark the queue head as unowned, which tells us the range
539 * that we are responsible for publishing.
540 * c. Wait for all commands in our owned range to become valid.
541 * d. Advance the hardware prod pointer.
542 * e. Tell the next owner we've finished.
544 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
545 * owner), then we need to stick around until it has completed:
546 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
547 * to clear the first 4 bytes.
548 * b. Otherwise, we spin waiting for the hardware cons pointer to
549 * advance past our command.
551 * The devil is in the details, particularly the use of locking for handling
552 * SYNC completion and freeing up space in the queue before we think that it is
555 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq
*cmdq
,
556 u32 sprod
, u32 eprod
, bool set
)
558 u32 swidx
, sbidx
, ewidx
, ebidx
;
559 struct arm_smmu_ll_queue llq
= {
560 .max_n_shift
= cmdq
->q
.llq
.max_n_shift
,
564 ewidx
= BIT_WORD(Q_IDX(&llq
, eprod
));
565 ebidx
= Q_IDX(&llq
, eprod
) % BITS_PER_LONG
;
567 while (llq
.prod
!= eprod
) {
570 u32 limit
= BITS_PER_LONG
;
572 swidx
= BIT_WORD(Q_IDX(&llq
, llq
.prod
));
573 sbidx
= Q_IDX(&llq
, llq
.prod
) % BITS_PER_LONG
;
575 ptr
= &cmdq
->valid_map
[swidx
];
577 if ((swidx
== ewidx
) && (sbidx
< ebidx
))
580 mask
= GENMASK(limit
- 1, sbidx
);
583 * The valid bit is the inverse of the wrap bit. This means
584 * that a zero-initialised queue is invalid and, after marking
585 * all entries as valid, they become invalid again when we
589 atomic_long_xor(mask
, ptr
);
593 valid
= (ULONG_MAX
+ !!Q_WRP(&llq
, llq
.prod
)) & mask
;
594 atomic_long_cond_read_relaxed(ptr
, (VAL
& mask
) == valid
);
597 llq
.prod
= queue_inc_prod_n(&llq
, limit
- sbidx
);
601 /* Mark all entries in the range [sprod, eprod) as valid */
602 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq
*cmdq
,
603 u32 sprod
, u32 eprod
)
605 __arm_smmu_cmdq_poll_set_valid_map(cmdq
, sprod
, eprod
, true);
608 /* Wait for all entries in the range [sprod, eprod) to become valid */
609 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq
*cmdq
,
610 u32 sprod
, u32 eprod
)
612 __arm_smmu_cmdq_poll_set_valid_map(cmdq
, sprod
, eprod
, false);
615 /* Wait for the command queue to become non-full */
616 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device
*smmu
,
617 struct arm_smmu_cmdq
*cmdq
,
618 struct arm_smmu_ll_queue
*llq
)
621 struct arm_smmu_queue_poll qp
;
625 * Try to update our copy of cons by grabbing exclusive cmdq access. If
626 * that fails, spin until somebody else updates it for us.
628 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq
, flags
)) {
629 WRITE_ONCE(cmdq
->q
.llq
.cons
, readl_relaxed(cmdq
->q
.cons_reg
));
630 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq
, flags
);
631 llq
->val
= READ_ONCE(cmdq
->q
.llq
.val
);
635 queue_poll_init(smmu
, &qp
);
637 llq
->val
= READ_ONCE(cmdq
->q
.llq
.val
);
638 if (!queue_full(llq
))
641 ret
= queue_poll(&qp
);
648 * Wait until the SMMU signals a CMD_SYNC completion MSI.
649 * Must be called with the cmdq lock held in some capacity.
651 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device
*smmu
,
652 struct arm_smmu_cmdq
*cmdq
,
653 struct arm_smmu_ll_queue
*llq
)
656 struct arm_smmu_queue_poll qp
;
657 u32
*cmd
= (u32
*)(Q_ENT(&cmdq
->q
, llq
->prod
));
659 queue_poll_init(smmu
, &qp
);
662 * The MSI won't generate an event, since it's being written back
663 * into the command queue.
666 smp_cond_load_relaxed(cmd
, !VAL
|| (ret
= queue_poll(&qp
)));
667 llq
->cons
= ret
? llq
->prod
: queue_inc_prod_n(llq
, 1);
672 * Wait until the SMMU cons index passes llq->prod.
673 * Must be called with the cmdq lock held in some capacity.
675 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device
*smmu
,
676 struct arm_smmu_cmdq
*cmdq
,
677 struct arm_smmu_ll_queue
*llq
)
679 struct arm_smmu_queue_poll qp
;
680 u32 prod
= llq
->prod
;
683 queue_poll_init(smmu
, &qp
);
684 llq
->val
= READ_ONCE(cmdq
->q
.llq
.val
);
686 if (queue_consumed(llq
, prod
))
689 ret
= queue_poll(&qp
);
692 * This needs to be a readl() so that our subsequent call
693 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
695 * Specifically, we need to ensure that we observe all
696 * shared_lock()s by other CMD_SYNCs that share our owner,
697 * so that a failing call to tryunlock() means that we're
698 * the last one out and therefore we can safely advance
699 * cmdq->q.llq.cons. Roughly speaking:
701 * CPU 0 CPU1 CPU2 (us)
711 * <control dependency>
717 * Requires us to see CPU 0's shared_lock() acquisition.
719 llq
->cons
= readl(cmdq
->q
.cons_reg
);
725 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device
*smmu
,
726 struct arm_smmu_cmdq
*cmdq
,
727 struct arm_smmu_ll_queue
*llq
)
729 if (smmu
->options
& ARM_SMMU_OPT_MSIPOLL
&&
730 !arm_smmu_cmdq_needs_busy_polling(smmu
, cmdq
))
731 return __arm_smmu_cmdq_poll_until_msi(smmu
, cmdq
, llq
);
733 return __arm_smmu_cmdq_poll_until_consumed(smmu
, cmdq
, llq
);
736 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq
*cmdq
, u64
*cmds
,
740 struct arm_smmu_ll_queue llq
= {
741 .max_n_shift
= cmdq
->q
.llq
.max_n_shift
,
745 for (i
= 0; i
< n
; ++i
) {
746 u64
*cmd
= &cmds
[i
* CMDQ_ENT_DWORDS
];
748 prod
= queue_inc_prod_n(&llq
, i
);
749 queue_write(Q_ENT(&cmdq
->q
, prod
), cmd
, CMDQ_ENT_DWORDS
);
754 * This is the actual insertion function, and provides the following
755 * ordering guarantees to callers:
757 * - There is a dma_wmb() before publishing any commands to the queue.
758 * This can be relied upon to order prior writes to data structures
759 * in memory (such as a CD or an STE) before the command.
761 * - On completion of a CMD_SYNC, there is a control dependency.
762 * This can be relied upon to order subsequent writes to memory (e.g.
763 * freeing an IOVA) after completion of the CMD_SYNC.
765 * - Command insertion is totally ordered, so if two CPUs each race to
766 * insert their own list of commands then all of the commands from one
767 * CPU will appear before any of the commands from the other CPU.
769 int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device
*smmu
,
770 struct arm_smmu_cmdq
*cmdq
, u64
*cmds
, int n
,
773 u64 cmd_sync
[CMDQ_ENT_DWORDS
];
777 struct arm_smmu_ll_queue llq
, head
;
780 llq
.max_n_shift
= cmdq
->q
.llq
.max_n_shift
;
782 /* 1. Allocate some space in the queue */
783 local_irq_save(flags
);
784 llq
.val
= READ_ONCE(cmdq
->q
.llq
.val
);
788 while (!queue_has_space(&llq
, n
+ sync
)) {
789 local_irq_restore(flags
);
790 if (arm_smmu_cmdq_poll_until_not_full(smmu
, cmdq
, &llq
))
791 dev_err_ratelimited(smmu
->dev
, "CMDQ timeout\n");
792 local_irq_save(flags
);
795 head
.cons
= llq
.cons
;
796 head
.prod
= queue_inc_prod_n(&llq
, n
+ sync
) |
797 CMDQ_PROD_OWNED_FLAG
;
799 old
= cmpxchg_relaxed(&cmdq
->q
.llq
.val
, llq
.val
, head
.val
);
805 owner
= !(llq
.prod
& CMDQ_PROD_OWNED_FLAG
);
806 head
.prod
&= ~CMDQ_PROD_OWNED_FLAG
;
807 llq
.prod
&= ~CMDQ_PROD_OWNED_FLAG
;
810 * 2. Write our commands into the queue
811 * Dependency ordering from the cmpxchg() loop above.
813 arm_smmu_cmdq_write_entries(cmdq
, cmds
, llq
.prod
, n
);
815 prod
= queue_inc_prod_n(&llq
, n
);
816 arm_smmu_cmdq_build_sync_cmd(cmd_sync
, smmu
, cmdq
, prod
);
817 queue_write(Q_ENT(&cmdq
->q
, prod
), cmd_sync
, CMDQ_ENT_DWORDS
);
820 * In order to determine completion of our CMD_SYNC, we must
821 * ensure that the queue can't wrap twice without us noticing.
822 * We achieve that by taking the cmdq lock as shared before
823 * marking our slot as valid.
825 arm_smmu_cmdq_shared_lock(cmdq
);
828 /* 3. Mark our slots as valid, ensuring commands are visible first */
830 arm_smmu_cmdq_set_valid_map(cmdq
, llq
.prod
, head
.prod
);
832 /* 4. If we are the owner, take control of the SMMU hardware */
834 /* a. Wait for previous owner to finish */
835 atomic_cond_read_relaxed(&cmdq
->owner_prod
, VAL
== llq
.prod
);
837 /* b. Stop gathering work by clearing the owned flag */
838 prod
= atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG
,
839 &cmdq
->q
.llq
.atomic
.prod
);
840 prod
&= ~CMDQ_PROD_OWNED_FLAG
;
843 * c. Wait for any gathered work to be written to the queue.
844 * Note that we read our own entries so that we have the control
845 * dependency required by (d).
847 arm_smmu_cmdq_poll_valid_map(cmdq
, llq
.prod
, prod
);
850 * d. Advance the hardware prod pointer
851 * Control dependency ordering from the entries becoming valid.
853 writel_relaxed(prod
, cmdq
->q
.prod_reg
);
856 * e. Tell the next owner we're done
857 * Make sure we've updated the hardware first, so that we don't
858 * race to update prod and potentially move it backwards.
860 atomic_set_release(&cmdq
->owner_prod
, prod
);
863 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
865 llq
.prod
= queue_inc_prod_n(&llq
, n
);
866 ret
= arm_smmu_cmdq_poll_until_sync(smmu
, cmdq
, &llq
);
868 dev_err_ratelimited(smmu
->dev
,
869 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
871 readl_relaxed(cmdq
->q
.prod_reg
),
872 readl_relaxed(cmdq
->q
.cons_reg
));
876 * Try to unlock the cmdq lock. This will fail if we're the last
877 * reader, in which case we can safely update cmdq->q.llq.cons
879 if (!arm_smmu_cmdq_shared_tryunlock(cmdq
)) {
880 WRITE_ONCE(cmdq
->q
.llq
.cons
, llq
.cons
);
881 arm_smmu_cmdq_shared_unlock(cmdq
);
885 local_irq_restore(flags
);
889 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device
*smmu
,
890 struct arm_smmu_cmdq_ent
*ent
,
893 u64 cmd
[CMDQ_ENT_DWORDS
];
895 if (unlikely(arm_smmu_cmdq_build_cmd(cmd
, ent
))) {
896 dev_warn(smmu
->dev
, "ignoring unknown CMDQ opcode 0x%x\n",
901 return arm_smmu_cmdq_issue_cmdlist(
902 smmu
, arm_smmu_get_cmdq(smmu
, ent
), cmd
, 1, sync
);
905 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device
*smmu
,
906 struct arm_smmu_cmdq_ent
*ent
)
908 return __arm_smmu_cmdq_issue_cmd(smmu
, ent
, false);
911 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device
*smmu
,
912 struct arm_smmu_cmdq_ent
*ent
)
914 return __arm_smmu_cmdq_issue_cmd(smmu
, ent
, true);
917 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device
*smmu
,
918 struct arm_smmu_cmdq_batch
*cmds
,
919 struct arm_smmu_cmdq_ent
*ent
)
922 cmds
->cmdq
= arm_smmu_get_cmdq(smmu
, ent
);
925 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device
*smmu
,
926 struct arm_smmu_cmdq_batch
*cmds
,
927 struct arm_smmu_cmdq_ent
*cmd
)
929 bool unsupported_cmd
= !arm_smmu_cmdq_supports_cmd(cmds
->cmdq
, cmd
);
930 bool force_sync
= (cmds
->num
== CMDQ_BATCH_ENTRIES
- 1) &&
931 (smmu
->options
& ARM_SMMU_OPT_CMDQ_FORCE_SYNC
);
934 if (force_sync
|| unsupported_cmd
) {
935 arm_smmu_cmdq_issue_cmdlist(smmu
, cmds
->cmdq
, cmds
->cmds
,
937 arm_smmu_cmdq_batch_init(smmu
, cmds
, cmd
);
940 if (cmds
->num
== CMDQ_BATCH_ENTRIES
) {
941 arm_smmu_cmdq_issue_cmdlist(smmu
, cmds
->cmdq
, cmds
->cmds
,
943 arm_smmu_cmdq_batch_init(smmu
, cmds
, cmd
);
946 index
= cmds
->num
* CMDQ_ENT_DWORDS
;
947 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds
->cmds
[index
], cmd
))) {
948 dev_warn(smmu
->dev
, "ignoring unknown CMDQ opcode 0x%x\n",
956 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device
*smmu
,
957 struct arm_smmu_cmdq_batch
*cmds
)
959 return arm_smmu_cmdq_issue_cmdlist(smmu
, cmds
->cmdq
, cmds
->cmds
,
963 static void arm_smmu_page_response(struct device
*dev
, struct iopf_fault
*unused
,
964 struct iommu_page_response
*resp
)
966 struct arm_smmu_cmdq_ent cmd
= {0};
967 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
968 int sid
= master
->streams
[0].id
;
970 if (WARN_ON(!master
->stall_enabled
))
973 cmd
.opcode
= CMDQ_OP_RESUME
;
974 cmd
.resume
.sid
= sid
;
975 cmd
.resume
.stag
= resp
->grpid
;
976 switch (resp
->code
) {
977 case IOMMU_PAGE_RESP_INVALID
:
978 case IOMMU_PAGE_RESP_FAILURE
:
979 cmd
.resume
.resp
= CMDQ_RESUME_0_RESP_ABORT
;
981 case IOMMU_PAGE_RESP_SUCCESS
:
982 cmd
.resume
.resp
= CMDQ_RESUME_0_RESP_RETRY
;
988 arm_smmu_cmdq_issue_cmd(master
->smmu
, &cmd
);
990 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
991 * RESUME consumption guarantees that the stalled transaction will be
992 * terminated... at some point in the future. PRI_RESP is fire and
997 /* Context descriptor manipulation functions */
998 void arm_smmu_tlb_inv_asid(struct arm_smmu_device
*smmu
, u16 asid
)
1000 struct arm_smmu_cmdq_ent cmd
= {
1001 .opcode
= smmu
->features
& ARM_SMMU_FEAT_E2H
?
1002 CMDQ_OP_TLBI_EL2_ASID
: CMDQ_OP_TLBI_NH_ASID
,
1006 arm_smmu_cmdq_issue_cmd_with_sync(smmu
, &cmd
);
1010 * Based on the value of ent report which bits of the STE the HW will access. It
1011 * would be nice if this was complete according to the spec, but minimally it
1012 * has to capture the bits this driver uses.
1015 void arm_smmu_get_ste_used(const __le64
*ent
, __le64
*used_bits
)
1017 unsigned int cfg
= FIELD_GET(STRTAB_STE_0_CFG
, le64_to_cpu(ent
[0]));
1019 used_bits
[0] = cpu_to_le64(STRTAB_STE_0_V
);
1020 if (!(ent
[0] & cpu_to_le64(STRTAB_STE_0_V
)))
1023 used_bits
[0] |= cpu_to_le64(STRTAB_STE_0_CFG
);
1027 used_bits
[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT
|
1028 STRTAB_STE_0_S1CTXPTR_MASK
|
1029 STRTAB_STE_0_S1CDMAX
);
1031 cpu_to_le64(STRTAB_STE_1_S1DSS
| STRTAB_STE_1_S1CIR
|
1032 STRTAB_STE_1_S1COR
| STRTAB_STE_1_S1CSH
|
1033 STRTAB_STE_1_S1STALLD
| STRTAB_STE_1_STRW
|
1035 used_bits
[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID
);
1038 * See 13.5 Summary of attribute/permission configuration fields
1039 * for the SHCFG behavior.
1041 if (FIELD_GET(STRTAB_STE_1_S1DSS
, le64_to_cpu(ent
[1])) ==
1042 STRTAB_STE_1_S1DSS_BYPASS
)
1043 used_bits
[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG
);
1049 cpu_to_le64(STRTAB_STE_1_S2FWB
| STRTAB_STE_1_EATS
|
1050 STRTAB_STE_1_SHCFG
);
1052 cpu_to_le64(STRTAB_STE_2_S2VMID
| STRTAB_STE_2_VTCR
|
1053 STRTAB_STE_2_S2AA64
| STRTAB_STE_2_S2ENDI
|
1054 STRTAB_STE_2_S2PTW
| STRTAB_STE_2_S2S
|
1056 used_bits
[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK
);
1059 if (cfg
== STRTAB_STE_0_CFG_BYPASS
)
1060 used_bits
[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG
);
1062 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used
);
1065 * Figure out if we can do a hitless update of entry to become target. Returns a
1066 * bit mask where 1 indicates that qword needs to be set disruptively.
1067 * unused_update is an intermediate value of entry that has unused bits set to
1070 static u8
arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer
*writer
,
1071 const __le64
*entry
, const __le64
*target
,
1072 __le64
*unused_update
)
1074 __le64 target_used
[NUM_ENTRY_QWORDS
] = {};
1075 __le64 cur_used
[NUM_ENTRY_QWORDS
] = {};
1076 u8 used_qword_diff
= 0;
1079 writer
->ops
->get_used(entry
, cur_used
);
1080 writer
->ops
->get_used(target
, target_used
);
1082 for (i
= 0; i
!= NUM_ENTRY_QWORDS
; i
++) {
1084 * Check that masks are up to date, the make functions are not
1085 * allowed to set a bit to 1 if the used function doesn't say it
1088 WARN_ON_ONCE(target
[i
] & ~target_used
[i
]);
1090 /* Bits can change because they are not currently being used */
1091 unused_update
[i
] = (entry
[i
] & cur_used
[i
]) |
1092 (target
[i
] & ~cur_used
[i
]);
1094 * Each bit indicates that a used bit in a qword needs to be
1095 * changed after unused_update is applied.
1097 if ((unused_update
[i
] & target_used
[i
]) != target
[i
])
1098 used_qword_diff
|= 1 << i
;
1100 return used_qword_diff
;
1103 static bool entry_set(struct arm_smmu_entry_writer
*writer
, __le64
*entry
,
1104 const __le64
*target
, unsigned int start
,
1107 bool changed
= false;
1110 for (i
= start
; len
!= 0; len
--, i
++) {
1111 if (entry
[i
] != target
[i
]) {
1112 WRITE_ONCE(entry
[i
], target
[i
]);
1118 writer
->ops
->sync(writer
);
1123 * Update the STE/CD to the target configuration. The transition from the
1124 * current entry to the target entry takes place over multiple steps that
1125 * attempts to make the transition hitless if possible. This function takes care
1126 * not to create a situation where the HW can perceive a corrupted entry. HW is
1127 * only required to have a 64 bit atomicity with stores from the CPU, while
1128 * entries are many 64 bit values big.
1130 * The difference between the current value and the target value is analyzed to
1131 * determine which of three updates are required - disruptive, hitless or no
1134 * In the most general disruptive case we can make any update in three steps:
1135 * - Disrupting the entry (V=0)
1136 * - Fill now unused qwords, execpt qword 0 which contains V
1137 * - Make qword 0 have the final value and valid (V=1) with a single 64
1140 * However this disrupts the HW while it is happening. There are several
1141 * interesting cases where a STE/CD can be updated without disturbing the HW
1142 * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1143 * because the used bits don't intersect. We can detect this by calculating how
1144 * many 64 bit values need update after adjusting the unused bits and skip the
1145 * V=0 process. This relies on the IGNORED behavior described in the
1149 void arm_smmu_write_entry(struct arm_smmu_entry_writer
*writer
, __le64
*entry
,
1150 const __le64
*target
)
1152 __le64 unused_update
[NUM_ENTRY_QWORDS
];
1156 arm_smmu_entry_qword_diff(writer
, entry
, target
, unused_update
);
1157 if (hweight8(used_qword_diff
) == 1) {
1159 * Only one qword needs its used bits to be changed. This is a
1160 * hitless update, update all bits the current STE/CD is
1161 * ignoring to their new values, then update a single "critical
1162 * qword" to change the STE/CD and finally 0 out any bits that
1163 * are now unused in the target configuration.
1165 unsigned int critical_qword_index
= ffs(used_qword_diff
) - 1;
1168 * Skip writing unused bits in the critical qword since we'll be
1169 * writing it in the next step anyways. This can save a sync
1170 * when the only change is in that qword.
1172 unused_update
[critical_qword_index
] =
1173 entry
[critical_qword_index
];
1174 entry_set(writer
, entry
, unused_update
, 0, NUM_ENTRY_QWORDS
);
1175 entry_set(writer
, entry
, target
, critical_qword_index
, 1);
1176 entry_set(writer
, entry
, target
, 0, NUM_ENTRY_QWORDS
);
1177 } else if (used_qword_diff
) {
1179 * At least two qwords need their inuse bits to be changed. This
1180 * requires a breaking update, zero the V bit, write all qwords
1181 * but 0, then set qword 0
1183 unused_update
[0] = 0;
1184 entry_set(writer
, entry
, unused_update
, 0, 1);
1185 entry_set(writer
, entry
, target
, 1, NUM_ENTRY_QWORDS
- 1);
1186 entry_set(writer
, entry
, target
, 0, 1);
1189 * No inuse bit changed. Sanity check that all unused bits are 0
1190 * in the entry. The target was already sanity checked by
1191 * compute_qword_diff().
1194 entry_set(writer
, entry
, target
, 0, NUM_ENTRY_QWORDS
));
1197 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry
);
1199 static void arm_smmu_sync_cd(struct arm_smmu_master
*master
,
1200 int ssid
, bool leaf
)
1203 struct arm_smmu_cmdq_batch cmds
;
1204 struct arm_smmu_device
*smmu
= master
->smmu
;
1205 struct arm_smmu_cmdq_ent cmd
= {
1206 .opcode
= CMDQ_OP_CFGI_CD
,
1213 arm_smmu_cmdq_batch_init(smmu
, &cmds
, &cmd
);
1214 for (i
= 0; i
< master
->num_streams
; i
++) {
1215 cmd
.cfgi
.sid
= master
->streams
[i
].id
;
1216 arm_smmu_cmdq_batch_add(smmu
, &cmds
, &cmd
);
1219 arm_smmu_cmdq_batch_submit(smmu
, &cmds
);
1222 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1
*dst
,
1223 dma_addr_t l2ptr_dma
)
1225 u64 val
= (l2ptr_dma
& CTXDESC_L1_DESC_L2PTR_MASK
) | CTXDESC_L1_DESC_V
;
1227 /* The HW has 64 bit atomicity with stores to the L2 CD table */
1228 WRITE_ONCE(dst
->l2ptr
, cpu_to_le64(val
));
1231 static dma_addr_t
arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1
*src
)
1233 return le64_to_cpu(src
->l2ptr
) & CTXDESC_L1_DESC_L2PTR_MASK
;
1236 struct arm_smmu_cd
*arm_smmu_get_cd_ptr(struct arm_smmu_master
*master
,
1239 struct arm_smmu_cdtab_l2
*l2
;
1240 struct arm_smmu_ctx_desc_cfg
*cd_table
= &master
->cd_table
;
1242 if (!arm_smmu_cdtab_allocated(cd_table
))
1245 if (cd_table
->s1fmt
== STRTAB_STE_0_S1FMT_LINEAR
)
1246 return &cd_table
->linear
.table
[ssid
];
1248 l2
= cd_table
->l2
.l2ptrs
[arm_smmu_cdtab_l1_idx(ssid
)];
1251 return &l2
->cds
[arm_smmu_cdtab_l2_idx(ssid
)];
1254 static struct arm_smmu_cd
*arm_smmu_alloc_cd_ptr(struct arm_smmu_master
*master
,
1257 struct arm_smmu_ctx_desc_cfg
*cd_table
= &master
->cd_table
;
1258 struct arm_smmu_device
*smmu
= master
->smmu
;
1261 iommu_group_mutex_assert(master
->dev
);
1263 if (!arm_smmu_cdtab_allocated(cd_table
)) {
1264 if (arm_smmu_alloc_cd_tables(master
))
1268 if (cd_table
->s1fmt
== STRTAB_STE_0_S1FMT_64K_L2
) {
1269 unsigned int idx
= arm_smmu_cdtab_l1_idx(ssid
);
1270 struct arm_smmu_cdtab_l2
**l2ptr
= &cd_table
->l2
.l2ptrs
[idx
];
1273 dma_addr_t l2ptr_dma
;
1275 *l2ptr
= dma_alloc_coherent(smmu
->dev
, sizeof(**l2ptr
),
1276 &l2ptr_dma
, GFP_KERNEL
);
1280 arm_smmu_write_cd_l1_desc(&cd_table
->l2
.l1tab
[idx
],
1282 /* An invalid L1CD can be cached */
1283 arm_smmu_sync_cd(master
, ssid
, false);
1286 return arm_smmu_get_cd_ptr(master
, ssid
);
1289 struct arm_smmu_cd_writer
{
1290 struct arm_smmu_entry_writer writer
;
1295 void arm_smmu_get_cd_used(const __le64
*ent
, __le64
*used_bits
)
1297 used_bits
[0] = cpu_to_le64(CTXDESC_CD_0_V
);
1298 if (!(ent
[0] & cpu_to_le64(CTXDESC_CD_0_V
)))
1300 memset(used_bits
, 0xFF, sizeof(struct arm_smmu_cd
));
1303 * If EPD0 is set by the make function it means
1304 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1306 if (ent
[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0
)) {
1307 used_bits
[0] &= ~cpu_to_le64(
1308 CTXDESC_CD_0_TCR_T0SZ
| CTXDESC_CD_0_TCR_TG0
|
1309 CTXDESC_CD_0_TCR_IRGN0
| CTXDESC_CD_0_TCR_ORGN0
|
1310 CTXDESC_CD_0_TCR_SH0
);
1311 used_bits
[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK
);
1314 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used
);
1316 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer
*writer
)
1318 struct arm_smmu_cd_writer
*cd_writer
=
1319 container_of(writer
, struct arm_smmu_cd_writer
, writer
);
1321 arm_smmu_sync_cd(writer
->master
, cd_writer
->ssid
, true);
1324 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops
= {
1325 .sync
= arm_smmu_cd_writer_sync_entry
,
1326 .get_used
= arm_smmu_get_cd_used
,
1329 void arm_smmu_write_cd_entry(struct arm_smmu_master
*master
, int ssid
,
1330 struct arm_smmu_cd
*cdptr
,
1331 const struct arm_smmu_cd
*target
)
1333 bool target_valid
= target
->data
[0] & cpu_to_le64(CTXDESC_CD_0_V
);
1334 bool cur_valid
= cdptr
->data
[0] & cpu_to_le64(CTXDESC_CD_0_V
);
1335 struct arm_smmu_cd_writer cd_writer
= {
1337 .ops
= &arm_smmu_cd_writer_ops
,
1343 if (ssid
!= IOMMU_NO_PASID
&& cur_valid
!= target_valid
) {
1345 master
->cd_table
.used_ssids
--;
1347 master
->cd_table
.used_ssids
++;
1350 arm_smmu_write_entry(&cd_writer
.writer
, cdptr
->data
, target
->data
);
1353 void arm_smmu_make_s1_cd(struct arm_smmu_cd
*target
,
1354 struct arm_smmu_master
*master
,
1355 struct arm_smmu_domain
*smmu_domain
)
1357 struct arm_smmu_ctx_desc
*cd
= &smmu_domain
->cd
;
1358 const struct io_pgtable_cfg
*pgtbl_cfg
=
1359 &io_pgtable_ops_to_pgtable(smmu_domain
->pgtbl_ops
)->cfg
;
1360 typeof(&pgtbl_cfg
->arm_lpae_s1_cfg
.tcr
) tcr
=
1361 &pgtbl_cfg
->arm_lpae_s1_cfg
.tcr
;
1363 memset(target
, 0, sizeof(*target
));
1365 target
->data
[0] = cpu_to_le64(
1366 FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ
, tcr
->tsz
) |
1367 FIELD_PREP(CTXDESC_CD_0_TCR_TG0
, tcr
->tg
) |
1368 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0
, tcr
->irgn
) |
1369 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0
, tcr
->orgn
) |
1370 FIELD_PREP(CTXDESC_CD_0_TCR_SH0
, tcr
->sh
) |
1374 CTXDESC_CD_0_TCR_EPD1
|
1376 FIELD_PREP(CTXDESC_CD_0_TCR_IPS
, tcr
->ips
) |
1378 (master
->stall_enabled
? CTXDESC_CD_0_S
: 0) |
1382 FIELD_PREP(CTXDESC_CD_0_ASID
, cd
->asid
)
1385 /* To enable dirty flag update, set both Access flag and dirty state update */
1386 if (pgtbl_cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_HD
)
1387 target
->data
[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA
|
1388 CTXDESC_CD_0_TCR_HD
);
1390 target
->data
[1] = cpu_to_le64(pgtbl_cfg
->arm_lpae_s1_cfg
.ttbr
&
1391 CTXDESC_CD_1_TTB0_MASK
);
1392 target
->data
[3] = cpu_to_le64(pgtbl_cfg
->arm_lpae_s1_cfg
.mair
);
1394 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd
);
1396 void arm_smmu_clear_cd(struct arm_smmu_master
*master
, ioasid_t ssid
)
1398 struct arm_smmu_cd target
= {};
1399 struct arm_smmu_cd
*cdptr
;
1401 if (!arm_smmu_cdtab_allocated(&master
->cd_table
))
1403 cdptr
= arm_smmu_get_cd_ptr(master
, ssid
);
1404 if (WARN_ON(!cdptr
))
1406 arm_smmu_write_cd_entry(master
, ssid
, cdptr
, &target
);
1409 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master
*master
)
1413 size_t max_contexts
;
1414 struct arm_smmu_device
*smmu
= master
->smmu
;
1415 struct arm_smmu_ctx_desc_cfg
*cd_table
= &master
->cd_table
;
1417 cd_table
->s1cdmax
= master
->ssid_bits
;
1418 max_contexts
= 1 << cd_table
->s1cdmax
;
1420 if (!(smmu
->features
& ARM_SMMU_FEAT_2_LVL_CDTAB
) ||
1421 max_contexts
<= CTXDESC_L2_ENTRIES
) {
1422 cd_table
->s1fmt
= STRTAB_STE_0_S1FMT_LINEAR
;
1423 cd_table
->linear
.num_ents
= max_contexts
;
1425 l1size
= max_contexts
* sizeof(struct arm_smmu_cd
);
1426 cd_table
->linear
.table
= dma_alloc_coherent(smmu
->dev
, l1size
,
1427 &cd_table
->cdtab_dma
,
1429 if (!cd_table
->linear
.table
)
1432 cd_table
->s1fmt
= STRTAB_STE_0_S1FMT_64K_L2
;
1433 cd_table
->l2
.num_l1_ents
=
1434 DIV_ROUND_UP(max_contexts
, CTXDESC_L2_ENTRIES
);
1436 cd_table
->l2
.l2ptrs
= kcalloc(cd_table
->l2
.num_l1_ents
,
1437 sizeof(*cd_table
->l2
.l2ptrs
),
1439 if (!cd_table
->l2
.l2ptrs
)
1442 l1size
= cd_table
->l2
.num_l1_ents
* sizeof(struct arm_smmu_cdtab_l1
);
1443 cd_table
->l2
.l1tab
= dma_alloc_coherent(smmu
->dev
, l1size
,
1444 &cd_table
->cdtab_dma
,
1446 if (!cd_table
->l2
.l2ptrs
) {
1448 goto err_free_l2ptrs
;
1454 kfree(cd_table
->l2
.l2ptrs
);
1455 cd_table
->l2
.l2ptrs
= NULL
;
1459 static void arm_smmu_free_cd_tables(struct arm_smmu_master
*master
)
1462 struct arm_smmu_device
*smmu
= master
->smmu
;
1463 struct arm_smmu_ctx_desc_cfg
*cd_table
= &master
->cd_table
;
1465 if (cd_table
->s1fmt
!= STRTAB_STE_0_S1FMT_LINEAR
) {
1466 for (i
= 0; i
< cd_table
->l2
.num_l1_ents
; i
++) {
1467 if (!cd_table
->l2
.l2ptrs
[i
])
1470 dma_free_coherent(smmu
->dev
,
1471 sizeof(*cd_table
->l2
.l2ptrs
[i
]),
1472 cd_table
->l2
.l2ptrs
[i
],
1473 arm_smmu_cd_l1_get_desc(&cd_table
->l2
.l1tab
[i
]));
1475 kfree(cd_table
->l2
.l2ptrs
);
1477 dma_free_coherent(smmu
->dev
,
1478 cd_table
->l2
.num_l1_ents
*
1479 sizeof(struct arm_smmu_cdtab_l1
),
1480 cd_table
->l2
.l1tab
, cd_table
->cdtab_dma
);
1482 dma_free_coherent(smmu
->dev
,
1483 cd_table
->linear
.num_ents
*
1484 sizeof(struct arm_smmu_cd
),
1485 cd_table
->linear
.table
, cd_table
->cdtab_dma
);
1489 /* Stream table manipulation functions */
1490 static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1
*dst
,
1491 dma_addr_t l2ptr_dma
)
1495 val
|= FIELD_PREP(STRTAB_L1_DESC_SPAN
, STRTAB_SPLIT
+ 1);
1496 val
|= l2ptr_dma
& STRTAB_L1_DESC_L2PTR_MASK
;
1498 /* The HW has 64 bit atomicity with stores to the L2 STE table */
1499 WRITE_ONCE(dst
->l2ptr
, cpu_to_le64(val
));
1502 struct arm_smmu_ste_writer
{
1503 struct arm_smmu_entry_writer writer
;
1507 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer
*writer
)
1509 struct arm_smmu_ste_writer
*ste_writer
=
1510 container_of(writer
, struct arm_smmu_ste_writer
, writer
);
1511 struct arm_smmu_cmdq_ent cmd
= {
1512 .opcode
= CMDQ_OP_CFGI_STE
,
1514 .sid
= ste_writer
->sid
,
1519 arm_smmu_cmdq_issue_cmd_with_sync(writer
->master
->smmu
, &cmd
);
1522 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops
= {
1523 .sync
= arm_smmu_ste_writer_sync_entry
,
1524 .get_used
= arm_smmu_get_ste_used
,
1527 static void arm_smmu_write_ste(struct arm_smmu_master
*master
, u32 sid
,
1528 struct arm_smmu_ste
*ste
,
1529 const struct arm_smmu_ste
*target
)
1531 struct arm_smmu_device
*smmu
= master
->smmu
;
1532 struct arm_smmu_ste_writer ste_writer
= {
1534 .ops
= &arm_smmu_ste_writer_ops
,
1540 arm_smmu_write_entry(&ste_writer
.writer
, ste
->data
, target
->data
);
1542 /* It's likely that we'll want to use the new STE soon */
1543 if (!(smmu
->options
& ARM_SMMU_OPT_SKIP_PREFETCH
)) {
1544 struct arm_smmu_cmdq_ent
1545 prefetch_cmd
= { .opcode
= CMDQ_OP_PREFETCH_CFG
,
1550 arm_smmu_cmdq_issue_cmd(smmu
, &prefetch_cmd
);
1554 void arm_smmu_make_abort_ste(struct arm_smmu_ste
*target
)
1556 memset(target
, 0, sizeof(*target
));
1557 target
->data
[0] = cpu_to_le64(
1559 FIELD_PREP(STRTAB_STE_0_CFG
, STRTAB_STE_0_CFG_ABORT
));
1561 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste
);
1564 void arm_smmu_make_bypass_ste(struct arm_smmu_device
*smmu
,
1565 struct arm_smmu_ste
*target
)
1567 memset(target
, 0, sizeof(*target
));
1568 target
->data
[0] = cpu_to_le64(
1570 FIELD_PREP(STRTAB_STE_0_CFG
, STRTAB_STE_0_CFG_BYPASS
));
1572 if (smmu
->features
& ARM_SMMU_FEAT_ATTR_TYPES_OVR
)
1573 target
->data
[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG
,
1574 STRTAB_STE_1_SHCFG_INCOMING
));
1576 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste
);
1579 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste
*target
,
1580 struct arm_smmu_master
*master
, bool ats_enabled
,
1583 struct arm_smmu_ctx_desc_cfg
*cd_table
= &master
->cd_table
;
1584 struct arm_smmu_device
*smmu
= master
->smmu
;
1586 memset(target
, 0, sizeof(*target
));
1587 target
->data
[0] = cpu_to_le64(
1589 FIELD_PREP(STRTAB_STE_0_CFG
, STRTAB_STE_0_CFG_S1_TRANS
) |
1590 FIELD_PREP(STRTAB_STE_0_S1FMT
, cd_table
->s1fmt
) |
1591 (cd_table
->cdtab_dma
& STRTAB_STE_0_S1CTXPTR_MASK
) |
1592 FIELD_PREP(STRTAB_STE_0_S1CDMAX
, cd_table
->s1cdmax
));
1594 target
->data
[1] = cpu_to_le64(
1595 FIELD_PREP(STRTAB_STE_1_S1DSS
, s1dss
) |
1596 FIELD_PREP(STRTAB_STE_1_S1CIR
, STRTAB_STE_1_S1C_CACHE_WBRA
) |
1597 FIELD_PREP(STRTAB_STE_1_S1COR
, STRTAB_STE_1_S1C_CACHE_WBRA
) |
1598 FIELD_PREP(STRTAB_STE_1_S1CSH
, ARM_SMMU_SH_ISH
) |
1599 ((smmu
->features
& ARM_SMMU_FEAT_STALLS
&&
1600 !master
->stall_enabled
) ?
1601 STRTAB_STE_1_S1STALLD
:
1603 FIELD_PREP(STRTAB_STE_1_EATS
,
1604 ats_enabled
? STRTAB_STE_1_EATS_TRANS
: 0));
1606 if ((smmu
->features
& ARM_SMMU_FEAT_ATTR_TYPES_OVR
) &&
1607 s1dss
== STRTAB_STE_1_S1DSS_BYPASS
)
1608 target
->data
[1] |= cpu_to_le64(FIELD_PREP(
1609 STRTAB_STE_1_SHCFG
, STRTAB_STE_1_SHCFG_INCOMING
));
1611 if (smmu
->features
& ARM_SMMU_FEAT_E2H
) {
1613 * To support BTM the streamworld needs to match the
1614 * configuration of the CPU so that the ASID broadcasts are
1615 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1616 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1617 * PASID this should always use a BTM compatible configuration
1618 * if the HW supports it.
1620 target
->data
[1] |= cpu_to_le64(
1621 FIELD_PREP(STRTAB_STE_1_STRW
, STRTAB_STE_1_STRW_EL2
));
1623 target
->data
[1] |= cpu_to_le64(
1624 FIELD_PREP(STRTAB_STE_1_STRW
, STRTAB_STE_1_STRW_NSEL1
));
1627 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1628 * arm_smmu_domain_alloc_id()
1631 cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID
, 0));
1634 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste
);
1636 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste
*target
,
1637 struct arm_smmu_master
*master
,
1638 struct arm_smmu_domain
*smmu_domain
,
1641 struct arm_smmu_s2_cfg
*s2_cfg
= &smmu_domain
->s2_cfg
;
1642 const struct io_pgtable_cfg
*pgtbl_cfg
=
1643 &io_pgtable_ops_to_pgtable(smmu_domain
->pgtbl_ops
)->cfg
;
1644 typeof(&pgtbl_cfg
->arm_lpae_s2_cfg
.vtcr
) vtcr
=
1645 &pgtbl_cfg
->arm_lpae_s2_cfg
.vtcr
;
1647 struct arm_smmu_device
*smmu
= master
->smmu
;
1649 memset(target
, 0, sizeof(*target
));
1650 target
->data
[0] = cpu_to_le64(
1652 FIELD_PREP(STRTAB_STE_0_CFG
, STRTAB_STE_0_CFG_S2_TRANS
));
1654 target
->data
[1] = cpu_to_le64(
1655 FIELD_PREP(STRTAB_STE_1_EATS
,
1656 ats_enabled
? STRTAB_STE_1_EATS_TRANS
: 0));
1658 if (pgtbl_cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_S2FWB
)
1659 target
->data
[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB
);
1660 if (smmu
->features
& ARM_SMMU_FEAT_ATTR_TYPES_OVR
)
1661 target
->data
[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG
,
1662 STRTAB_STE_1_SHCFG_INCOMING
));
1664 vtcr_val
= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ
, vtcr
->tsz
) |
1665 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0
, vtcr
->sl
) |
1666 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0
, vtcr
->irgn
) |
1667 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0
, vtcr
->orgn
) |
1668 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0
, vtcr
->sh
) |
1669 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG
, vtcr
->tg
) |
1670 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS
, vtcr
->ps
);
1671 target
->data
[2] = cpu_to_le64(
1672 FIELD_PREP(STRTAB_STE_2_S2VMID
, s2_cfg
->vmid
) |
1673 FIELD_PREP(STRTAB_STE_2_VTCR
, vtcr_val
) |
1674 STRTAB_STE_2_S2AA64
|
1676 STRTAB_STE_2_S2ENDI
|
1678 STRTAB_STE_2_S2PTW
|
1679 (master
->stall_enabled
? STRTAB_STE_2_S2S
: 0) |
1682 target
->data
[3] = cpu_to_le64(pgtbl_cfg
->arm_lpae_s2_cfg
.vttbr
&
1683 STRTAB_STE_3_S2TTB_MASK
);
1685 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste
);
1688 * This can safely directly manipulate the STE memory without a sync sequence
1689 * because the STE table has not been installed in the SMMU yet.
1691 static void arm_smmu_init_initial_stes(struct arm_smmu_ste
*strtab
,
1696 for (i
= 0; i
< nent
; ++i
) {
1697 arm_smmu_make_abort_ste(strtab
);
1702 static int arm_smmu_init_l2_strtab(struct arm_smmu_device
*smmu
, u32 sid
)
1704 dma_addr_t l2ptr_dma
;
1705 struct arm_smmu_strtab_cfg
*cfg
= &smmu
->strtab_cfg
;
1706 struct arm_smmu_strtab_l2
**l2table
;
1708 l2table
= &cfg
->l2
.l2ptrs
[arm_smmu_strtab_l1_idx(sid
)];
1712 *l2table
= dmam_alloc_coherent(smmu
->dev
, sizeof(**l2table
),
1713 &l2ptr_dma
, GFP_KERNEL
);
1716 "failed to allocate l2 stream table for SID %u\n",
1721 arm_smmu_init_initial_stes((*l2table
)->stes
,
1722 ARRAY_SIZE((*l2table
)->stes
));
1723 arm_smmu_write_strtab_l1_desc(&cfg
->l2
.l1tab
[arm_smmu_strtab_l1_idx(sid
)],
1728 static int arm_smmu_streams_cmp_key(const void *lhs
, const struct rb_node
*rhs
)
1730 struct arm_smmu_stream
*stream_rhs
=
1731 rb_entry(rhs
, struct arm_smmu_stream
, node
);
1732 const u32
*sid_lhs
= lhs
;
1734 if (*sid_lhs
< stream_rhs
->id
)
1736 if (*sid_lhs
> stream_rhs
->id
)
1741 static int arm_smmu_streams_cmp_node(struct rb_node
*lhs
,
1742 const struct rb_node
*rhs
)
1744 return arm_smmu_streams_cmp_key(
1745 &rb_entry(lhs
, struct arm_smmu_stream
, node
)->id
, rhs
);
1748 static struct arm_smmu_master
*
1749 arm_smmu_find_master(struct arm_smmu_device
*smmu
, u32 sid
)
1751 struct rb_node
*node
;
1753 lockdep_assert_held(&smmu
->streams_mutex
);
1755 node
= rb_find(&sid
, &smmu
->streams
, arm_smmu_streams_cmp_key
);
1758 return rb_entry(node
, struct arm_smmu_stream
, node
)->master
;
1761 /* IRQ and event handlers */
1762 static int arm_smmu_handle_evt(struct arm_smmu_device
*smmu
, u64
*evt
)
1766 struct arm_smmu_master
*master
;
1767 bool ssid_valid
= evt
[0] & EVTQ_0_SSV
;
1768 u32 sid
= FIELD_GET(EVTQ_0_SID
, evt
[0]);
1769 struct iopf_fault fault_evt
= { };
1770 struct iommu_fault
*flt
= &fault_evt
.fault
;
1772 switch (FIELD_GET(EVTQ_0_ID
, evt
[0])) {
1773 case EVT_ID_TRANSLATION_FAULT
:
1774 case EVT_ID_ADDR_SIZE_FAULT
:
1775 case EVT_ID_ACCESS_FAULT
:
1776 case EVT_ID_PERMISSION_FAULT
:
1782 if (!(evt
[1] & EVTQ_1_STALL
))
1785 if (evt
[1] & EVTQ_1_RnW
)
1786 perm
|= IOMMU_FAULT_PERM_READ
;
1788 perm
|= IOMMU_FAULT_PERM_WRITE
;
1790 if (evt
[1] & EVTQ_1_InD
)
1791 perm
|= IOMMU_FAULT_PERM_EXEC
;
1793 if (evt
[1] & EVTQ_1_PnU
)
1794 perm
|= IOMMU_FAULT_PERM_PRIV
;
1796 flt
->type
= IOMMU_FAULT_PAGE_REQ
;
1797 flt
->prm
= (struct iommu_fault_page_request
) {
1798 .flags
= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE
,
1799 .grpid
= FIELD_GET(EVTQ_1_STAG
, evt
[1]),
1801 .addr
= FIELD_GET(EVTQ_2_ADDR
, evt
[2]),
1805 flt
->prm
.flags
|= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID
;
1806 flt
->prm
.pasid
= FIELD_GET(EVTQ_0_SSID
, evt
[0]);
1809 mutex_lock(&smmu
->streams_mutex
);
1810 master
= arm_smmu_find_master(smmu
, sid
);
1816 ret
= iommu_report_device_fault(master
->dev
, &fault_evt
);
1818 mutex_unlock(&smmu
->streams_mutex
);
1822 static irqreturn_t
arm_smmu_evtq_thread(int irq
, void *dev
)
1825 struct arm_smmu_device
*smmu
= dev
;
1826 struct arm_smmu_queue
*q
= &smmu
->evtq
.q
;
1827 struct arm_smmu_ll_queue
*llq
= &q
->llq
;
1828 static DEFINE_RATELIMIT_STATE(rs
, DEFAULT_RATELIMIT_INTERVAL
,
1829 DEFAULT_RATELIMIT_BURST
);
1830 u64 evt
[EVTQ_ENT_DWORDS
];
1833 while (!queue_remove_raw(q
, evt
)) {
1834 u8 id
= FIELD_GET(EVTQ_0_ID
, evt
[0]);
1836 ret
= arm_smmu_handle_evt(smmu
, evt
);
1837 if (!ret
|| !__ratelimit(&rs
))
1840 dev_info(smmu
->dev
, "event 0x%02x received:\n", id
);
1841 for (i
= 0; i
< ARRAY_SIZE(evt
); ++i
)
1842 dev_info(smmu
->dev
, "\t0x%016llx\n",
1843 (unsigned long long)evt
[i
]);
1849 * Not much we can do on overflow, so scream and pretend we're
1852 if (queue_sync_prod_in(q
) == -EOVERFLOW
)
1853 dev_err(smmu
->dev
, "EVTQ overflow detected -- events lost\n");
1854 } while (!queue_empty(llq
));
1856 /* Sync our overflow flag, as we believe we're up to speed */
1857 queue_sync_cons_ovf(q
);
1861 static void arm_smmu_handle_ppr(struct arm_smmu_device
*smmu
, u64
*evt
)
1867 sid
= FIELD_GET(PRIQ_0_SID
, evt
[0]);
1868 ssv
= FIELD_GET(PRIQ_0_SSID_V
, evt
[0]);
1869 ssid
= ssv
? FIELD_GET(PRIQ_0_SSID
, evt
[0]) : IOMMU_NO_PASID
;
1870 last
= FIELD_GET(PRIQ_0_PRG_LAST
, evt
[0]);
1871 grpid
= FIELD_GET(PRIQ_1_PRG_IDX
, evt
[1]);
1873 dev_info(smmu
->dev
, "unexpected PRI request received:\n");
1875 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1876 sid
, ssid
, grpid
, last
? "L" : "",
1877 evt
[0] & PRIQ_0_PERM_PRIV
? "" : "un",
1878 evt
[0] & PRIQ_0_PERM_READ
? "R" : "",
1879 evt
[0] & PRIQ_0_PERM_WRITE
? "W" : "",
1880 evt
[0] & PRIQ_0_PERM_EXEC
? "X" : "",
1881 evt
[1] & PRIQ_1_ADDR_MASK
);
1884 struct arm_smmu_cmdq_ent cmd
= {
1885 .opcode
= CMDQ_OP_PRI_RESP
,
1886 .substream_valid
= ssv
,
1891 .resp
= PRI_RESP_DENY
,
1895 arm_smmu_cmdq_issue_cmd(smmu
, &cmd
);
1899 static irqreturn_t
arm_smmu_priq_thread(int irq
, void *dev
)
1901 struct arm_smmu_device
*smmu
= dev
;
1902 struct arm_smmu_queue
*q
= &smmu
->priq
.q
;
1903 struct arm_smmu_ll_queue
*llq
= &q
->llq
;
1904 u64 evt
[PRIQ_ENT_DWORDS
];
1907 while (!queue_remove_raw(q
, evt
))
1908 arm_smmu_handle_ppr(smmu
, evt
);
1910 if (queue_sync_prod_in(q
) == -EOVERFLOW
)
1911 dev_err(smmu
->dev
, "PRIQ overflow detected -- requests lost\n");
1912 } while (!queue_empty(llq
));
1914 /* Sync our overflow flag, as we believe we're up to speed */
1915 queue_sync_cons_ovf(q
);
1919 static int arm_smmu_device_disable(struct arm_smmu_device
*smmu
);
1921 static irqreturn_t
arm_smmu_gerror_handler(int irq
, void *dev
)
1923 u32 gerror
, gerrorn
, active
;
1924 struct arm_smmu_device
*smmu
= dev
;
1926 gerror
= readl_relaxed(smmu
->base
+ ARM_SMMU_GERROR
);
1927 gerrorn
= readl_relaxed(smmu
->base
+ ARM_SMMU_GERRORN
);
1929 active
= gerror
^ gerrorn
;
1930 if (!(active
& GERROR_ERR_MASK
))
1931 return IRQ_NONE
; /* No errors pending */
1934 "unexpected global error reported (0x%08x), this could be serious\n",
1937 if (active
& GERROR_SFM_ERR
) {
1938 dev_err(smmu
->dev
, "device has entered Service Failure Mode!\n");
1939 arm_smmu_device_disable(smmu
);
1942 if (active
& GERROR_MSI_GERROR_ABT_ERR
)
1943 dev_warn(smmu
->dev
, "GERROR MSI write aborted\n");
1945 if (active
& GERROR_MSI_PRIQ_ABT_ERR
)
1946 dev_warn(smmu
->dev
, "PRIQ MSI write aborted\n");
1948 if (active
& GERROR_MSI_EVTQ_ABT_ERR
)
1949 dev_warn(smmu
->dev
, "EVTQ MSI write aborted\n");
1951 if (active
& GERROR_MSI_CMDQ_ABT_ERR
)
1952 dev_warn(smmu
->dev
, "CMDQ MSI write aborted\n");
1954 if (active
& GERROR_PRIQ_ABT_ERR
)
1955 dev_err(smmu
->dev
, "PRIQ write aborted -- events may have been lost\n");
1957 if (active
& GERROR_EVTQ_ABT_ERR
)
1958 dev_err(smmu
->dev
, "EVTQ write aborted -- events may have been lost\n");
1960 if (active
& GERROR_CMDQ_ERR
)
1961 arm_smmu_cmdq_skip_err(smmu
);
1963 writel(gerror
, smmu
->base
+ ARM_SMMU_GERRORN
);
1967 static irqreturn_t
arm_smmu_combined_irq_thread(int irq
, void *dev
)
1969 struct arm_smmu_device
*smmu
= dev
;
1971 arm_smmu_evtq_thread(irq
, dev
);
1972 if (smmu
->features
& ARM_SMMU_FEAT_PRI
)
1973 arm_smmu_priq_thread(irq
, dev
);
1978 static irqreturn_t
arm_smmu_combined_irq_handler(int irq
, void *dev
)
1980 arm_smmu_gerror_handler(irq
, dev
);
1981 return IRQ_WAKE_THREAD
;
1985 arm_smmu_atc_inv_to_cmd(int ssid
, unsigned long iova
, size_t size
,
1986 struct arm_smmu_cmdq_ent
*cmd
)
1990 /* ATC invalidates are always on 4096-bytes pages */
1991 size_t inval_grain_shift
= 12;
1992 unsigned long page_start
, page_end
;
1997 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1998 * prefix. In that case all ATC entries within the address range are
1999 * invalidated, including those that were requested with a PASID! There
2000 * is no way to invalidate only entries without PASID.
2002 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
2003 * traffic), translation requests without PASID create ATC entries
2004 * without PASID, which must be invalidated with substream_valid clear.
2005 * This has the unpleasant side-effect of invalidating all PASID-tagged
2006 * ATC entries within the address range.
2008 *cmd
= (struct arm_smmu_cmdq_ent
) {
2009 .opcode
= CMDQ_OP_ATC_INV
,
2010 .substream_valid
= (ssid
!= IOMMU_NO_PASID
),
2015 cmd
->atc
.size
= ATC_INV_SIZE_ALL
;
2019 page_start
= iova
>> inval_grain_shift
;
2020 page_end
= (iova
+ size
- 1) >> inval_grain_shift
;
2023 * In an ATS Invalidate Request, the address must be aligned on the
2024 * range size, which must be a power of two number of page sizes. We
2025 * thus have to choose between grossly over-invalidating the region, or
2026 * splitting the invalidation into multiple commands. For simplicity
2027 * we'll go with the first solution, but should refine it in the future
2028 * if multiple commands are shown to be more efficient.
2030 * Find the smallest power of two that covers the range. The most
2031 * significant differing bit between the start and end addresses,
2032 * fls(start ^ end), indicates the required span. For example:
2034 * We want to invalidate pages [8; 11]. This is already the ideal range:
2035 * x = 0b1000 ^ 0b1011 = 0b11
2036 * span = 1 << fls(x) = 4
2038 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2039 * x = 0b0111 ^ 0b1010 = 0b1101
2040 * span = 1 << fls(x) = 16
2042 log2_span
= fls_long(page_start
^ page_end
);
2043 span_mask
= (1ULL << log2_span
) - 1;
2045 page_start
&= ~span_mask
;
2047 cmd
->atc
.addr
= page_start
<< inval_grain_shift
;
2048 cmd
->atc
.size
= log2_span
;
2051 static int arm_smmu_atc_inv_master(struct arm_smmu_master
*master
,
2055 struct arm_smmu_cmdq_ent cmd
;
2056 struct arm_smmu_cmdq_batch cmds
;
2058 arm_smmu_atc_inv_to_cmd(ssid
, 0, 0, &cmd
);
2060 arm_smmu_cmdq_batch_init(master
->smmu
, &cmds
, &cmd
);
2061 for (i
= 0; i
< master
->num_streams
; i
++) {
2062 cmd
.atc
.sid
= master
->streams
[i
].id
;
2063 arm_smmu_cmdq_batch_add(master
->smmu
, &cmds
, &cmd
);
2066 return arm_smmu_cmdq_batch_submit(master
->smmu
, &cmds
);
2069 int arm_smmu_atc_inv_domain(struct arm_smmu_domain
*smmu_domain
,
2070 unsigned long iova
, size_t size
)
2072 struct arm_smmu_master_domain
*master_domain
;
2074 unsigned long flags
;
2075 struct arm_smmu_cmdq_ent cmd
= {
2076 .opcode
= CMDQ_OP_ATC_INV
,
2078 struct arm_smmu_cmdq_batch cmds
;
2080 if (!(smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_ATS
))
2084 * Ensure that we've completed prior invalidation of the main TLBs
2085 * before we read 'nr_ats_masters' in case of a concurrent call to
2086 * arm_smmu_enable_ats():
2088 * // unmap() // arm_smmu_enable_ats()
2089 * TLBI+SYNC atomic_inc(&nr_ats_masters);
2091 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2093 * Ensures that we always see the incremented 'nr_ats_masters' count if
2094 * ATS was enabled at the PCI device before completion of the TLBI.
2097 if (!atomic_read(&smmu_domain
->nr_ats_masters
))
2100 arm_smmu_cmdq_batch_init(smmu_domain
->smmu
, &cmds
, &cmd
);
2102 spin_lock_irqsave(&smmu_domain
->devices_lock
, flags
);
2103 list_for_each_entry(master_domain
, &smmu_domain
->devices
,
2105 struct arm_smmu_master
*master
= master_domain
->master
;
2107 if (!master
->ats_enabled
)
2110 if (master_domain
->nested_ats_flush
) {
2112 * If a S2 used as a nesting parent is changed we have
2113 * no option but to completely flush the ATC.
2115 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID
, 0, 0, &cmd
);
2117 arm_smmu_atc_inv_to_cmd(master_domain
->ssid
, iova
, size
,
2121 for (i
= 0; i
< master
->num_streams
; i
++) {
2122 cmd
.atc
.sid
= master
->streams
[i
].id
;
2123 arm_smmu_cmdq_batch_add(smmu_domain
->smmu
, &cmds
, &cmd
);
2126 spin_unlock_irqrestore(&smmu_domain
->devices_lock
, flags
);
2128 return arm_smmu_cmdq_batch_submit(smmu_domain
->smmu
, &cmds
);
2131 /* IO_PGTABLE API */
2132 static void arm_smmu_tlb_inv_context(void *cookie
)
2134 struct arm_smmu_domain
*smmu_domain
= cookie
;
2135 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
2136 struct arm_smmu_cmdq_ent cmd
;
2139 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2140 * PTEs previously cleared by unmaps on the current CPU not yet visible
2141 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2142 * insertion to guarantee those are observed before the TLBI. Do be
2145 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
) {
2146 arm_smmu_tlb_inv_asid(smmu
, smmu_domain
->cd
.asid
);
2148 cmd
.opcode
= CMDQ_OP_TLBI_S12_VMALL
;
2149 cmd
.tlbi
.vmid
= smmu_domain
->s2_cfg
.vmid
;
2150 arm_smmu_cmdq_issue_cmd_with_sync(smmu
, &cmd
);
2152 arm_smmu_atc_inv_domain(smmu_domain
, 0, 0);
2155 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent
*cmd
,
2156 unsigned long iova
, size_t size
,
2158 struct arm_smmu_domain
*smmu_domain
)
2160 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
2161 unsigned long end
= iova
+ size
, num_pages
= 0, tg
= 0;
2162 size_t inv_range
= granule
;
2163 struct arm_smmu_cmdq_batch cmds
;
2168 if (smmu
->features
& ARM_SMMU_FEAT_RANGE_INV
) {
2169 /* Get the leaf page size */
2170 tg
= __ffs(smmu_domain
->domain
.pgsize_bitmap
);
2172 num_pages
= size
>> tg
;
2174 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
2175 cmd
->tlbi
.tg
= (tg
- 10) / 2;
2178 * Determine what level the granule is at. For non-leaf, both
2179 * io-pgtable and SVA pass a nominal last-level granule because
2180 * they don't know what level(s) actually apply, so ignore that
2181 * and leave TTL=0. However for various errata reasons we still
2182 * want to use a range command, so avoid the SVA corner case
2183 * where both scale and num could be 0 as well.
2186 cmd
->tlbi
.ttl
= 4 - ((ilog2(granule
) - 3) / (tg
- 3));
2187 else if ((num_pages
& CMDQ_TLBI_RANGE_NUM_MAX
) == 1)
2191 arm_smmu_cmdq_batch_init(smmu
, &cmds
, cmd
);
2193 while (iova
< end
) {
2194 if (smmu
->features
& ARM_SMMU_FEAT_RANGE_INV
) {
2196 * On each iteration of the loop, the range is 5 bits
2197 * worth of the aligned size remaining.
2198 * The range in pages is:
2200 * range = (num_pages & (0x1f << __ffs(num_pages)))
2202 unsigned long scale
, num
;
2204 /* Determine the power of 2 multiple number of pages */
2205 scale
= __ffs(num_pages
);
2206 cmd
->tlbi
.scale
= scale
;
2208 /* Determine how many chunks of 2^scale size we have */
2209 num
= (num_pages
>> scale
) & CMDQ_TLBI_RANGE_NUM_MAX
;
2210 cmd
->tlbi
.num
= num
- 1;
2212 /* range is num * 2^scale * pgsize */
2213 inv_range
= num
<< (scale
+ tg
);
2215 /* Clear out the lower order bits for the next iteration */
2216 num_pages
-= num
<< scale
;
2219 cmd
->tlbi
.addr
= iova
;
2220 arm_smmu_cmdq_batch_add(smmu
, &cmds
, cmd
);
2223 arm_smmu_cmdq_batch_submit(smmu
, &cmds
);
2226 static void arm_smmu_tlb_inv_range_domain(unsigned long iova
, size_t size
,
2227 size_t granule
, bool leaf
,
2228 struct arm_smmu_domain
*smmu_domain
)
2230 struct arm_smmu_cmdq_ent cmd
= {
2236 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
) {
2237 cmd
.opcode
= smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_E2H
?
2238 CMDQ_OP_TLBI_EL2_VA
: CMDQ_OP_TLBI_NH_VA
;
2239 cmd
.tlbi
.asid
= smmu_domain
->cd
.asid
;
2241 cmd
.opcode
= CMDQ_OP_TLBI_S2_IPA
;
2242 cmd
.tlbi
.vmid
= smmu_domain
->s2_cfg
.vmid
;
2244 __arm_smmu_tlb_inv_range(&cmd
, iova
, size
, granule
, smmu_domain
);
2246 if (smmu_domain
->nest_parent
) {
2248 * When the S2 domain changes all the nested S1 ASIDs have to be
2251 cmd
.opcode
= CMDQ_OP_TLBI_NH_ALL
;
2252 arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain
->smmu
, &cmd
);
2256 * Unfortunately, this can't be leaf-only since we may have
2257 * zapped an entire table.
2259 arm_smmu_atc_inv_domain(smmu_domain
, iova
, size
);
2262 void arm_smmu_tlb_inv_range_asid(unsigned long iova
, size_t size
, int asid
,
2263 size_t granule
, bool leaf
,
2264 struct arm_smmu_domain
*smmu_domain
)
2266 struct arm_smmu_cmdq_ent cmd
= {
2267 .opcode
= smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_E2H
?
2268 CMDQ_OP_TLBI_EL2_VA
: CMDQ_OP_TLBI_NH_VA
,
2275 __arm_smmu_tlb_inv_range(&cmd
, iova
, size
, granule
, smmu_domain
);
2278 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather
*gather
,
2279 unsigned long iova
, size_t granule
,
2282 struct arm_smmu_domain
*smmu_domain
= cookie
;
2283 struct iommu_domain
*domain
= &smmu_domain
->domain
;
2285 iommu_iotlb_gather_add_page(domain
, gather
, iova
, granule
);
2288 static void arm_smmu_tlb_inv_walk(unsigned long iova
, size_t size
,
2289 size_t granule
, void *cookie
)
2291 arm_smmu_tlb_inv_range_domain(iova
, size
, granule
, false, cookie
);
2294 static const struct iommu_flush_ops arm_smmu_flush_ops
= {
2295 .tlb_flush_all
= arm_smmu_tlb_inv_context
,
2296 .tlb_flush_walk
= arm_smmu_tlb_inv_walk
,
2297 .tlb_add_page
= arm_smmu_tlb_inv_page_nosync
,
2300 static bool arm_smmu_dbm_capable(struct arm_smmu_device
*smmu
)
2302 u32 features
= (ARM_SMMU_FEAT_HD
| ARM_SMMU_FEAT_COHERENCY
);
2304 return (smmu
->features
& features
) == features
;
2308 static bool arm_smmu_capable(struct device
*dev
, enum iommu_cap cap
)
2310 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
2313 case IOMMU_CAP_CACHE_COHERENCY
:
2314 /* Assume that a coherent TCU implies coherent TBUs */
2315 return master
->smmu
->features
& ARM_SMMU_FEAT_COHERENCY
;
2316 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY
:
2317 return arm_smmu_master_canwbs(master
);
2318 case IOMMU_CAP_NOEXEC
:
2319 case IOMMU_CAP_DEFERRED_FLUSH
:
2321 case IOMMU_CAP_DIRTY_TRACKING
:
2322 return arm_smmu_dbm_capable(master
->smmu
);
2328 static bool arm_smmu_enforce_cache_coherency(struct iommu_domain
*domain
)
2330 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
2331 struct arm_smmu_master_domain
*master_domain
;
2332 unsigned long flags
;
2335 spin_lock_irqsave(&smmu_domain
->devices_lock
, flags
);
2336 list_for_each_entry(master_domain
, &smmu_domain
->devices
,
2338 if (!arm_smmu_master_canwbs(master_domain
->master
)) {
2343 smmu_domain
->enforce_cache_coherency
= ret
;
2344 spin_unlock_irqrestore(&smmu_domain
->devices_lock
, flags
);
2348 struct arm_smmu_domain
*arm_smmu_domain_alloc(void)
2350 struct arm_smmu_domain
*smmu_domain
;
2352 smmu_domain
= kzalloc(sizeof(*smmu_domain
), GFP_KERNEL
);
2354 return ERR_PTR(-ENOMEM
);
2356 mutex_init(&smmu_domain
->init_mutex
);
2357 INIT_LIST_HEAD(&smmu_domain
->devices
);
2358 spin_lock_init(&smmu_domain
->devices_lock
);
2363 static struct iommu_domain
*arm_smmu_domain_alloc_paging(struct device
*dev
)
2365 struct arm_smmu_domain
*smmu_domain
;
2368 * Allocate the domain and initialise some of its data structures.
2369 * We can't really do anything meaningful until we've added a
2372 smmu_domain
= arm_smmu_domain_alloc();
2373 if (IS_ERR(smmu_domain
))
2374 return ERR_CAST(smmu_domain
);
2377 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
2380 ret
= arm_smmu_domain_finalise(smmu_domain
, master
->smmu
, 0);
2383 return ERR_PTR(ret
);
2386 return &smmu_domain
->domain
;
2389 static void arm_smmu_domain_free_paging(struct iommu_domain
*domain
)
2391 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
2392 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
2394 free_io_pgtable_ops(smmu_domain
->pgtbl_ops
);
2396 /* Free the ASID or VMID */
2397 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
) {
2398 /* Prevent SVA from touching the CD while we're freeing it */
2399 mutex_lock(&arm_smmu_asid_lock
);
2400 xa_erase(&arm_smmu_asid_xa
, smmu_domain
->cd
.asid
);
2401 mutex_unlock(&arm_smmu_asid_lock
);
2403 struct arm_smmu_s2_cfg
*cfg
= &smmu_domain
->s2_cfg
;
2405 ida_free(&smmu
->vmid_map
, cfg
->vmid
);
2411 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device
*smmu
,
2412 struct arm_smmu_domain
*smmu_domain
)
2416 struct arm_smmu_ctx_desc
*cd
= &smmu_domain
->cd
;
2418 /* Prevent SVA from modifying the ASID until it is written to the CD */
2419 mutex_lock(&arm_smmu_asid_lock
);
2420 ret
= xa_alloc(&arm_smmu_asid_xa
, &asid
, smmu_domain
,
2421 XA_LIMIT(1, (1 << smmu
->asid_bits
) - 1), GFP_KERNEL
);
2422 cd
->asid
= (u16
)asid
;
2423 mutex_unlock(&arm_smmu_asid_lock
);
2427 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device
*smmu
,
2428 struct arm_smmu_domain
*smmu_domain
)
2431 struct arm_smmu_s2_cfg
*cfg
= &smmu_domain
->s2_cfg
;
2433 /* Reserve VMID 0 for stage-2 bypass STEs */
2434 vmid
= ida_alloc_range(&smmu
->vmid_map
, 1, (1 << smmu
->vmid_bits
) - 1,
2439 cfg
->vmid
= (u16
)vmid
;
2443 static int arm_smmu_domain_finalise(struct arm_smmu_domain
*smmu_domain
,
2444 struct arm_smmu_device
*smmu
, u32 flags
)
2447 enum io_pgtable_fmt fmt
;
2448 struct io_pgtable_cfg pgtbl_cfg
;
2449 struct io_pgtable_ops
*pgtbl_ops
;
2450 int (*finalise_stage_fn
)(struct arm_smmu_device
*smmu
,
2451 struct arm_smmu_domain
*smmu_domain
);
2452 bool enable_dirty
= flags
& IOMMU_HWPT_ALLOC_DIRTY_TRACKING
;
2454 /* Restrict the stage to what we can actually support */
2455 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
))
2456 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S2
;
2457 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
))
2458 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S1
;
2460 pgtbl_cfg
= (struct io_pgtable_cfg
) {
2461 .pgsize_bitmap
= smmu
->pgsize_bitmap
,
2462 .coherent_walk
= smmu
->features
& ARM_SMMU_FEAT_COHERENCY
,
2463 .tlb
= &arm_smmu_flush_ops
,
2464 .iommu_dev
= smmu
->dev
,
2467 switch (smmu_domain
->stage
) {
2468 case ARM_SMMU_DOMAIN_S1
: {
2469 unsigned long ias
= (smmu
->features
&
2470 ARM_SMMU_FEAT_VAX
) ? 52 : 48;
2472 pgtbl_cfg
.ias
= min_t(unsigned long, ias
, VA_BITS
);
2473 pgtbl_cfg
.oas
= smmu
->ias
;
2475 pgtbl_cfg
.quirks
|= IO_PGTABLE_QUIRK_ARM_HD
;
2476 fmt
= ARM_64_LPAE_S1
;
2477 finalise_stage_fn
= arm_smmu_domain_finalise_s1
;
2480 case ARM_SMMU_DOMAIN_S2
:
2483 pgtbl_cfg
.ias
= smmu
->ias
;
2484 pgtbl_cfg
.oas
= smmu
->oas
;
2485 fmt
= ARM_64_LPAE_S2
;
2486 finalise_stage_fn
= arm_smmu_domain_finalise_s2
;
2487 if ((smmu
->features
& ARM_SMMU_FEAT_S2FWB
) &&
2488 (flags
& IOMMU_HWPT_ALLOC_NEST_PARENT
))
2489 pgtbl_cfg
.quirks
|= IO_PGTABLE_QUIRK_ARM_S2FWB
;
2495 pgtbl_ops
= alloc_io_pgtable_ops(fmt
, &pgtbl_cfg
, smmu_domain
);
2499 smmu_domain
->domain
.pgsize_bitmap
= pgtbl_cfg
.pgsize_bitmap
;
2500 smmu_domain
->domain
.geometry
.aperture_end
= (1UL << pgtbl_cfg
.ias
) - 1;
2501 smmu_domain
->domain
.geometry
.force_aperture
= true;
2502 if (enable_dirty
&& smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
)
2503 smmu_domain
->domain
.dirty_ops
= &arm_smmu_dirty_ops
;
2505 ret
= finalise_stage_fn(smmu
, smmu_domain
);
2507 free_io_pgtable_ops(pgtbl_ops
);
2511 smmu_domain
->pgtbl_ops
= pgtbl_ops
;
2512 smmu_domain
->smmu
= smmu
;
2516 static struct arm_smmu_ste
*
2517 arm_smmu_get_step_for_sid(struct arm_smmu_device
*smmu
, u32 sid
)
2519 struct arm_smmu_strtab_cfg
*cfg
= &smmu
->strtab_cfg
;
2521 if (smmu
->features
& ARM_SMMU_FEAT_2_LVL_STRTAB
) {
2522 /* Two-level walk */
2523 return &cfg
->l2
.l2ptrs
[arm_smmu_strtab_l1_idx(sid
)]
2524 ->stes
[arm_smmu_strtab_l2_idx(sid
)];
2526 /* Simple linear lookup */
2527 return &cfg
->linear
.table
[sid
];
2531 void arm_smmu_install_ste_for_dev(struct arm_smmu_master
*master
,
2532 const struct arm_smmu_ste
*target
)
2535 struct arm_smmu_device
*smmu
= master
->smmu
;
2537 master
->cd_table
.in_ste
=
2538 FIELD_GET(STRTAB_STE_0_CFG
, le64_to_cpu(target
->data
[0])) ==
2539 STRTAB_STE_0_CFG_S1_TRANS
;
2540 master
->ste_ats_enabled
=
2541 FIELD_GET(STRTAB_STE_1_EATS
, le64_to_cpu(target
->data
[1])) ==
2542 STRTAB_STE_1_EATS_TRANS
;
2544 for (i
= 0; i
< master
->num_streams
; ++i
) {
2545 u32 sid
= master
->streams
[i
].id
;
2546 struct arm_smmu_ste
*step
=
2547 arm_smmu_get_step_for_sid(smmu
, sid
);
2549 /* Bridged PCI devices may end up with duplicated IDs */
2550 for (j
= 0; j
< i
; j
++)
2551 if (master
->streams
[j
].id
== sid
)
2556 arm_smmu_write_ste(master
, sid
, step
, target
);
2560 static bool arm_smmu_ats_supported(struct arm_smmu_master
*master
)
2562 struct device
*dev
= master
->dev
;
2563 struct arm_smmu_device
*smmu
= master
->smmu
;
2564 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
2566 if (!(smmu
->features
& ARM_SMMU_FEAT_ATS
))
2569 if (!(fwspec
->flags
& IOMMU_FWSPEC_PCI_RC_ATS
))
2572 return dev_is_pci(dev
) && pci_ats_supported(to_pci_dev(dev
));
2575 static void arm_smmu_enable_ats(struct arm_smmu_master
*master
)
2578 struct pci_dev
*pdev
;
2579 struct arm_smmu_device
*smmu
= master
->smmu
;
2581 /* Smallest Translation Unit: log2 of the smallest supported granule */
2582 stu
= __ffs(smmu
->pgsize_bitmap
);
2583 pdev
= to_pci_dev(master
->dev
);
2586 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2588 arm_smmu_atc_inv_master(master
, IOMMU_NO_PASID
);
2589 if (pci_enable_ats(pdev
, stu
))
2590 dev_err(master
->dev
, "Failed to enable ATS (STU %zu)\n", stu
);
2593 static int arm_smmu_enable_pasid(struct arm_smmu_master
*master
)
2598 struct pci_dev
*pdev
;
2600 if (!dev_is_pci(master
->dev
))
2603 pdev
= to_pci_dev(master
->dev
);
2605 features
= pci_pasid_features(pdev
);
2609 num_pasids
= pci_max_pasids(pdev
);
2610 if (num_pasids
<= 0)
2613 ret
= pci_enable_pasid(pdev
, features
);
2615 dev_err(&pdev
->dev
, "Failed to enable PASID\n");
2619 master
->ssid_bits
= min_t(u8
, ilog2(num_pasids
),
2620 master
->smmu
->ssid_bits
);
2624 static void arm_smmu_disable_pasid(struct arm_smmu_master
*master
)
2626 struct pci_dev
*pdev
;
2628 if (!dev_is_pci(master
->dev
))
2631 pdev
= to_pci_dev(master
->dev
);
2633 if (!pdev
->pasid_enabled
)
2636 master
->ssid_bits
= 0;
2637 pci_disable_pasid(pdev
);
2640 static struct arm_smmu_master_domain
*
2641 arm_smmu_find_master_domain(struct arm_smmu_domain
*smmu_domain
,
2642 struct arm_smmu_master
*master
,
2643 ioasid_t ssid
, bool nested_ats_flush
)
2645 struct arm_smmu_master_domain
*master_domain
;
2647 lockdep_assert_held(&smmu_domain
->devices_lock
);
2649 list_for_each_entry(master_domain
, &smmu_domain
->devices
,
2651 if (master_domain
->master
== master
&&
2652 master_domain
->ssid
== ssid
&&
2653 master_domain
->nested_ats_flush
== nested_ats_flush
)
2654 return master_domain
;
2660 * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
2661 * structure, otherwise NULL. These domains track attached devices so they can
2662 * issue invalidations.
2664 static struct arm_smmu_domain
*
2665 to_smmu_domain_devices(struct iommu_domain
*domain
)
2667 /* The domain can be NULL only when processing the first attach */
2670 if ((domain
->type
& __IOMMU_DOMAIN_PAGING
) ||
2671 domain
->type
== IOMMU_DOMAIN_SVA
)
2672 return to_smmu_domain(domain
);
2673 if (domain
->type
== IOMMU_DOMAIN_NESTED
)
2674 return to_smmu_nested_domain(domain
)->vsmmu
->s2_parent
;
2678 static void arm_smmu_remove_master_domain(struct arm_smmu_master
*master
,
2679 struct iommu_domain
*domain
,
2682 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain_devices(domain
);
2683 struct arm_smmu_master_domain
*master_domain
;
2684 bool nested_ats_flush
= false;
2685 unsigned long flags
;
2690 if (domain
->type
== IOMMU_DOMAIN_NESTED
)
2691 nested_ats_flush
= to_smmu_nested_domain(domain
)->enable_ats
;
2693 spin_lock_irqsave(&smmu_domain
->devices_lock
, flags
);
2694 master_domain
= arm_smmu_find_master_domain(smmu_domain
, master
, ssid
,
2696 if (master_domain
) {
2697 list_del(&master_domain
->devices_elm
);
2698 kfree(master_domain
);
2699 if (master
->ats_enabled
)
2700 atomic_dec(&smmu_domain
->nr_ats_masters
);
2702 spin_unlock_irqrestore(&smmu_domain
->devices_lock
, flags
);
2706 * Start the sequence to attach a domain to a master. The sequence contains three
2708 * arm_smmu_attach_prepare()
2709 * arm_smmu_install_ste_for_dev()
2710 * arm_smmu_attach_commit()
2712 * If prepare succeeds then the sequence must be completed. The STE installed
2713 * must set the STE.EATS field according to state.ats_enabled.
2715 * If the device supports ATS then this determines if EATS should be enabled
2716 * in the STE, and starts sequencing EATS disable if required.
2718 * The change of the EATS in the STE and the PCI ATS config space is managed by
2719 * this sequence to be in the right order so that if PCI ATS is enabled then
2720 * STE.ETAS is enabled.
2722 * new_domain can be a non-paging domain. In this case ATS will not be enabled,
2723 * and invalidations won't be tracked.
2725 int arm_smmu_attach_prepare(struct arm_smmu_attach_state
*state
,
2726 struct iommu_domain
*new_domain
)
2728 struct arm_smmu_master
*master
= state
->master
;
2729 struct arm_smmu_master_domain
*master_domain
;
2730 struct arm_smmu_domain
*smmu_domain
=
2731 to_smmu_domain_devices(new_domain
);
2732 unsigned long flags
;
2735 * arm_smmu_share_asid() must not see two domains pointing to the same
2736 * arm_smmu_master_domain contents otherwise it could randomly write one
2737 * or the other to the CD.
2739 lockdep_assert_held(&arm_smmu_asid_lock
);
2741 if (smmu_domain
|| state
->cd_needs_ats
) {
2743 * The SMMU does not support enabling ATS with bypass/abort.
2744 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
2745 * Translation Requests and Translated transactions are denied
2746 * as though ATS is disabled for the stream (STE.EATS == 0b00),
2747 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
2748 * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
2749 * enabled if we have arm_smmu_domain, those always have page
2752 state
->ats_enabled
= !state
->disable_ats
&&
2753 arm_smmu_ats_supported(master
);
2757 master_domain
= kzalloc(sizeof(*master_domain
), GFP_KERNEL
);
2760 master_domain
->master
= master
;
2761 master_domain
->ssid
= state
->ssid
;
2762 if (new_domain
->type
== IOMMU_DOMAIN_NESTED
)
2763 master_domain
->nested_ats_flush
=
2764 to_smmu_nested_domain(new_domain
)->enable_ats
;
2767 * During prepare we want the current smmu_domain and new
2768 * smmu_domain to be in the devices list before we change any
2769 * HW. This ensures that both domains will send ATS
2770 * invalidations to the master until we are done.
2772 * It is tempting to make this list only track masters that are
2773 * using ATS, but arm_smmu_share_asid() also uses this to change
2774 * the ASID of a domain, unrelated to ATS.
2776 * Notice if we are re-attaching the same domain then the list
2777 * will have two identical entries and commit will remove only
2780 spin_lock_irqsave(&smmu_domain
->devices_lock
, flags
);
2781 if (smmu_domain
->enforce_cache_coherency
&&
2782 !arm_smmu_master_canwbs(master
)) {
2783 spin_unlock_irqrestore(&smmu_domain
->devices_lock
,
2785 kfree(master_domain
);
2789 if (state
->ats_enabled
)
2790 atomic_inc(&smmu_domain
->nr_ats_masters
);
2791 list_add(&master_domain
->devices_elm
, &smmu_domain
->devices
);
2792 spin_unlock_irqrestore(&smmu_domain
->devices_lock
, flags
);
2795 if (!state
->ats_enabled
&& master
->ats_enabled
) {
2796 pci_disable_ats(to_pci_dev(master
->dev
));
2798 * This is probably overkill, but the config write for disabling
2799 * ATS should complete before the STE is configured to generate
2800 * UR to avoid AER noise.
2808 * Commit is done after the STE/CD are configured with the EATS setting. It
2809 * completes synchronizing the PCI device's ATC and finishes manipulating the
2810 * smmu_domain->devices list.
2812 void arm_smmu_attach_commit(struct arm_smmu_attach_state
*state
)
2814 struct arm_smmu_master
*master
= state
->master
;
2816 lockdep_assert_held(&arm_smmu_asid_lock
);
2818 if (state
->ats_enabled
&& !master
->ats_enabled
) {
2819 arm_smmu_enable_ats(master
);
2820 } else if (state
->ats_enabled
&& master
->ats_enabled
) {
2822 * The translation has changed, flush the ATC. At this point the
2823 * SMMU is translating for the new domain and both the old&new
2824 * domain will issue invalidations.
2826 arm_smmu_atc_inv_master(master
, state
->ssid
);
2827 } else if (!state
->ats_enabled
&& master
->ats_enabled
) {
2828 /* ATS is being switched off, invalidate the entire ATC */
2829 arm_smmu_atc_inv_master(master
, IOMMU_NO_PASID
);
2831 master
->ats_enabled
= state
->ats_enabled
;
2833 arm_smmu_remove_master_domain(master
, state
->old_domain
, state
->ssid
);
2836 static int arm_smmu_attach_dev(struct iommu_domain
*domain
, struct device
*dev
)
2839 struct arm_smmu_ste target
;
2840 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
2841 struct arm_smmu_device
*smmu
;
2842 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
2843 struct arm_smmu_attach_state state
= {
2844 .old_domain
= iommu_get_domain_for_dev(dev
),
2845 .ssid
= IOMMU_NO_PASID
,
2847 struct arm_smmu_master
*master
;
2848 struct arm_smmu_cd
*cdptr
;
2853 state
.master
= master
= dev_iommu_priv_get(dev
);
2854 smmu
= master
->smmu
;
2856 mutex_lock(&smmu_domain
->init_mutex
);
2858 if (!smmu_domain
->smmu
) {
2859 ret
= arm_smmu_domain_finalise(smmu_domain
, smmu
, 0);
2860 } else if (smmu_domain
->smmu
!= smmu
)
2863 mutex_unlock(&smmu_domain
->init_mutex
);
2867 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
) {
2868 cdptr
= arm_smmu_alloc_cd_ptr(master
, IOMMU_NO_PASID
);
2871 } else if (arm_smmu_ssids_in_use(&master
->cd_table
))
2875 * Prevent arm_smmu_share_asid() from trying to change the ASID
2876 * of either the old or new domain while we are working on it.
2877 * This allows the STE and the smmu_domain->devices list to
2878 * be inconsistent during this routine.
2880 mutex_lock(&arm_smmu_asid_lock
);
2882 ret
= arm_smmu_attach_prepare(&state
, domain
);
2884 mutex_unlock(&arm_smmu_asid_lock
);
2888 switch (smmu_domain
->stage
) {
2889 case ARM_SMMU_DOMAIN_S1
: {
2890 struct arm_smmu_cd target_cd
;
2892 arm_smmu_make_s1_cd(&target_cd
, master
, smmu_domain
);
2893 arm_smmu_write_cd_entry(master
, IOMMU_NO_PASID
, cdptr
,
2895 arm_smmu_make_cdtable_ste(&target
, master
, state
.ats_enabled
,
2896 STRTAB_STE_1_S1DSS_SSID0
);
2897 arm_smmu_install_ste_for_dev(master
, &target
);
2900 case ARM_SMMU_DOMAIN_S2
:
2901 arm_smmu_make_s2_domain_ste(&target
, master
, smmu_domain
,
2903 arm_smmu_install_ste_for_dev(master
, &target
);
2904 arm_smmu_clear_cd(master
, IOMMU_NO_PASID
);
2908 arm_smmu_attach_commit(&state
);
2909 mutex_unlock(&arm_smmu_asid_lock
);
2913 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain
*domain
,
2914 struct device
*dev
, ioasid_t id
,
2915 struct iommu_domain
*old
)
2917 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
2918 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
2919 struct arm_smmu_device
*smmu
= master
->smmu
;
2920 struct arm_smmu_cd target_cd
;
2923 mutex_lock(&smmu_domain
->init_mutex
);
2924 if (!smmu_domain
->smmu
)
2925 ret
= arm_smmu_domain_finalise(smmu_domain
, smmu
, 0);
2926 else if (smmu_domain
->smmu
!= smmu
)
2928 mutex_unlock(&smmu_domain
->init_mutex
);
2932 if (smmu_domain
->stage
!= ARM_SMMU_DOMAIN_S1
)
2936 * We can read cd.asid outside the lock because arm_smmu_set_pasid()
2939 arm_smmu_make_s1_cd(&target_cd
, master
, smmu_domain
);
2940 return arm_smmu_set_pasid(master
, to_smmu_domain(domain
), id
,
2944 static void arm_smmu_update_ste(struct arm_smmu_master
*master
,
2945 struct iommu_domain
*sid_domain
,
2948 unsigned int s1dss
= STRTAB_STE_1_S1DSS_TERMINATE
;
2949 struct arm_smmu_ste ste
;
2951 if (master
->cd_table
.in_ste
&& master
->ste_ats_enabled
== ats_enabled
)
2954 if (sid_domain
->type
== IOMMU_DOMAIN_IDENTITY
)
2955 s1dss
= STRTAB_STE_1_S1DSS_BYPASS
;
2957 WARN_ON(sid_domain
->type
!= IOMMU_DOMAIN_BLOCKED
);
2960 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
2961 * using s1dss if necessary. If the cd_table is already installed then
2962 * the S1DSS is correct and this will just update the EATS. Otherwise it
2963 * installs the entire thing. This will be hitless.
2965 arm_smmu_make_cdtable_ste(&ste
, master
, ats_enabled
, s1dss
);
2966 arm_smmu_install_ste_for_dev(master
, &ste
);
2969 int arm_smmu_set_pasid(struct arm_smmu_master
*master
,
2970 struct arm_smmu_domain
*smmu_domain
, ioasid_t pasid
,
2971 struct arm_smmu_cd
*cd
, struct iommu_domain
*old
)
2973 struct iommu_domain
*sid_domain
= iommu_get_domain_for_dev(master
->dev
);
2974 struct arm_smmu_attach_state state
= {
2979 struct arm_smmu_cd
*cdptr
;
2982 /* The core code validates pasid */
2984 if (smmu_domain
->smmu
!= master
->smmu
)
2987 if (!master
->cd_table
.in_ste
&&
2988 sid_domain
->type
!= IOMMU_DOMAIN_IDENTITY
&&
2989 sid_domain
->type
!= IOMMU_DOMAIN_BLOCKED
)
2992 cdptr
= arm_smmu_alloc_cd_ptr(master
, pasid
);
2996 mutex_lock(&arm_smmu_asid_lock
);
2997 ret
= arm_smmu_attach_prepare(&state
, &smmu_domain
->domain
);
3002 * We don't want to obtain to the asid_lock too early, so fix up the
3003 * caller set ASID under the lock in case it changed.
3005 cd
->data
[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID
);
3006 cd
->data
[0] |= cpu_to_le64(
3007 FIELD_PREP(CTXDESC_CD_0_ASID
, smmu_domain
->cd
.asid
));
3009 arm_smmu_write_cd_entry(master
, pasid
, cdptr
, cd
);
3010 arm_smmu_update_ste(master
, sid_domain
, state
.ats_enabled
);
3012 arm_smmu_attach_commit(&state
);
3015 mutex_unlock(&arm_smmu_asid_lock
);
3019 static void arm_smmu_remove_dev_pasid(struct device
*dev
, ioasid_t pasid
,
3020 struct iommu_domain
*domain
)
3022 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
3023 struct arm_smmu_domain
*smmu_domain
;
3025 smmu_domain
= to_smmu_domain(domain
);
3027 mutex_lock(&arm_smmu_asid_lock
);
3028 arm_smmu_clear_cd(master
, pasid
);
3029 if (master
->ats_enabled
)
3030 arm_smmu_atc_inv_master(master
, pasid
);
3031 arm_smmu_remove_master_domain(master
, &smmu_domain
->domain
, pasid
);
3032 mutex_unlock(&arm_smmu_asid_lock
);
3035 * When the last user of the CD table goes away downgrade the STE back
3036 * to a non-cd_table one.
3038 if (!arm_smmu_ssids_in_use(&master
->cd_table
)) {
3039 struct iommu_domain
*sid_domain
=
3040 iommu_get_domain_for_dev(master
->dev
);
3042 if (sid_domain
->type
== IOMMU_DOMAIN_IDENTITY
||
3043 sid_domain
->type
== IOMMU_DOMAIN_BLOCKED
)
3044 sid_domain
->ops
->attach_dev(sid_domain
, dev
);
3048 static void arm_smmu_attach_dev_ste(struct iommu_domain
*domain
,
3050 struct arm_smmu_ste
*ste
,
3053 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
3054 struct arm_smmu_attach_state state
= {
3056 .old_domain
= iommu_get_domain_for_dev(dev
),
3057 .ssid
= IOMMU_NO_PASID
,
3061 * Do not allow any ASID to be changed while are working on the STE,
3062 * otherwise we could miss invalidations.
3064 mutex_lock(&arm_smmu_asid_lock
);
3067 * If the CD table is not in use we can use the provided STE, otherwise
3068 * we use a cdtable STE with the provided S1DSS.
3070 if (arm_smmu_ssids_in_use(&master
->cd_table
)) {
3072 * If a CD table has to be present then we need to run with ATS
3073 * on even though the RID will fail ATS queries with UR. This is
3074 * because we have no idea what the PASID's need.
3076 state
.cd_needs_ats
= true;
3077 arm_smmu_attach_prepare(&state
, domain
);
3078 arm_smmu_make_cdtable_ste(ste
, master
, state
.ats_enabled
, s1dss
);
3080 arm_smmu_attach_prepare(&state
, domain
);
3082 arm_smmu_install_ste_for_dev(master
, ste
);
3083 arm_smmu_attach_commit(&state
);
3084 mutex_unlock(&arm_smmu_asid_lock
);
3087 * This has to be done after removing the master from the
3088 * arm_smmu_domain->devices to avoid races updating the same context
3089 * descriptor from arm_smmu_share_asid().
3091 arm_smmu_clear_cd(master
, IOMMU_NO_PASID
);
3094 static int arm_smmu_attach_dev_identity(struct iommu_domain
*domain
,
3097 struct arm_smmu_ste ste
;
3098 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
3100 arm_smmu_make_bypass_ste(master
->smmu
, &ste
);
3101 arm_smmu_attach_dev_ste(domain
, dev
, &ste
, STRTAB_STE_1_S1DSS_BYPASS
);
3105 static const struct iommu_domain_ops arm_smmu_identity_ops
= {
3106 .attach_dev
= arm_smmu_attach_dev_identity
,
3109 static struct iommu_domain arm_smmu_identity_domain
= {
3110 .type
= IOMMU_DOMAIN_IDENTITY
,
3111 .ops
= &arm_smmu_identity_ops
,
3114 static int arm_smmu_attach_dev_blocked(struct iommu_domain
*domain
,
3117 struct arm_smmu_ste ste
;
3119 arm_smmu_make_abort_ste(&ste
);
3120 arm_smmu_attach_dev_ste(domain
, dev
, &ste
,
3121 STRTAB_STE_1_S1DSS_TERMINATE
);
3125 static const struct iommu_domain_ops arm_smmu_blocked_ops
= {
3126 .attach_dev
= arm_smmu_attach_dev_blocked
,
3129 static struct iommu_domain arm_smmu_blocked_domain
= {
3130 .type
= IOMMU_DOMAIN_BLOCKED
,
3131 .ops
= &arm_smmu_blocked_ops
,
3134 static struct iommu_domain
*
3135 arm_smmu_domain_alloc_paging_flags(struct device
*dev
, u32 flags
,
3136 const struct iommu_user_data
*user_data
)
3138 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
3139 const u32 PAGING_FLAGS
= IOMMU_HWPT_ALLOC_DIRTY_TRACKING
|
3140 IOMMU_HWPT_ALLOC_PASID
|
3141 IOMMU_HWPT_ALLOC_NEST_PARENT
;
3142 struct arm_smmu_domain
*smmu_domain
;
3145 if (flags
& ~PAGING_FLAGS
)
3146 return ERR_PTR(-EOPNOTSUPP
);
3148 return ERR_PTR(-EOPNOTSUPP
);
3150 if (flags
& IOMMU_HWPT_ALLOC_PASID
)
3151 return arm_smmu_domain_alloc_paging(dev
);
3153 smmu_domain
= arm_smmu_domain_alloc();
3154 if (IS_ERR(smmu_domain
))
3155 return ERR_CAST(smmu_domain
);
3157 if (flags
& IOMMU_HWPT_ALLOC_NEST_PARENT
) {
3158 if (!(master
->smmu
->features
& ARM_SMMU_FEAT_NESTING
)) {
3162 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S2
;
3163 smmu_domain
->nest_parent
= true;
3166 smmu_domain
->domain
.type
= IOMMU_DOMAIN_UNMANAGED
;
3167 smmu_domain
->domain
.ops
= arm_smmu_ops
.default_domain_ops
;
3168 ret
= arm_smmu_domain_finalise(smmu_domain
, master
->smmu
, flags
);
3171 return &smmu_domain
->domain
;
3175 return ERR_PTR(ret
);
3178 static int arm_smmu_map_pages(struct iommu_domain
*domain
, unsigned long iova
,
3179 phys_addr_t paddr
, size_t pgsize
, size_t pgcount
,
3180 int prot
, gfp_t gfp
, size_t *mapped
)
3182 struct io_pgtable_ops
*ops
= to_smmu_domain(domain
)->pgtbl_ops
;
3187 return ops
->map_pages(ops
, iova
, paddr
, pgsize
, pgcount
, prot
, gfp
, mapped
);
3190 static size_t arm_smmu_unmap_pages(struct iommu_domain
*domain
, unsigned long iova
,
3191 size_t pgsize
, size_t pgcount
,
3192 struct iommu_iotlb_gather
*gather
)
3194 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
3195 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
3200 return ops
->unmap_pages(ops
, iova
, pgsize
, pgcount
, gather
);
3203 static void arm_smmu_flush_iotlb_all(struct iommu_domain
*domain
)
3205 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
3207 if (smmu_domain
->smmu
)
3208 arm_smmu_tlb_inv_context(smmu_domain
);
3211 static void arm_smmu_iotlb_sync(struct iommu_domain
*domain
,
3212 struct iommu_iotlb_gather
*gather
)
3214 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
3216 if (!gather
->pgsize
)
3219 arm_smmu_tlb_inv_range_domain(gather
->start
,
3220 gather
->end
- gather
->start
+ 1,
3221 gather
->pgsize
, true, smmu_domain
);
3225 arm_smmu_iova_to_phys(struct iommu_domain
*domain
, dma_addr_t iova
)
3227 struct io_pgtable_ops
*ops
= to_smmu_domain(domain
)->pgtbl_ops
;
3232 return ops
->iova_to_phys(ops
, iova
);
3235 static struct platform_driver arm_smmu_driver
;
3238 struct arm_smmu_device
*arm_smmu_get_by_fwnode(struct fwnode_handle
*fwnode
)
3240 struct device
*dev
= driver_find_device_by_fwnode(&arm_smmu_driver
.driver
,
3243 return dev
? dev_get_drvdata(dev
) : NULL
;
3246 static bool arm_smmu_sid_in_range(struct arm_smmu_device
*smmu
, u32 sid
)
3248 if (smmu
->features
& ARM_SMMU_FEAT_2_LVL_STRTAB
)
3249 return arm_smmu_strtab_l1_idx(sid
) < smmu
->strtab_cfg
.l2
.num_l1_ents
;
3250 return sid
< smmu
->strtab_cfg
.linear
.num_ents
;
3253 static int arm_smmu_init_sid_strtab(struct arm_smmu_device
*smmu
, u32 sid
)
3255 /* Check the SIDs are in range of the SMMU and our stream table */
3256 if (!arm_smmu_sid_in_range(smmu
, sid
))
3259 /* Ensure l2 strtab is initialised */
3260 if (smmu
->features
& ARM_SMMU_FEAT_2_LVL_STRTAB
)
3261 return arm_smmu_init_l2_strtab(smmu
, sid
);
3266 static int arm_smmu_insert_master(struct arm_smmu_device
*smmu
,
3267 struct arm_smmu_master
*master
)
3271 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(master
->dev
);
3273 master
->streams
= kcalloc(fwspec
->num_ids
, sizeof(*master
->streams
),
3275 if (!master
->streams
)
3277 master
->num_streams
= fwspec
->num_ids
;
3279 mutex_lock(&smmu
->streams_mutex
);
3280 for (i
= 0; i
< fwspec
->num_ids
; i
++) {
3281 struct arm_smmu_stream
*new_stream
= &master
->streams
[i
];
3282 u32 sid
= fwspec
->ids
[i
];
3284 new_stream
->id
= sid
;
3285 new_stream
->master
= master
;
3287 ret
= arm_smmu_init_sid_strtab(smmu
, sid
);
3291 /* Insert into SID tree */
3292 if (rb_find_add(&new_stream
->node
, &smmu
->streams
,
3293 arm_smmu_streams_cmp_node
)) {
3294 dev_warn(master
->dev
, "stream %u already in tree\n",
3302 for (i
--; i
>= 0; i
--)
3303 rb_erase(&master
->streams
[i
].node
, &smmu
->streams
);
3304 kfree(master
->streams
);
3306 mutex_unlock(&smmu
->streams_mutex
);
3311 static void arm_smmu_remove_master(struct arm_smmu_master
*master
)
3314 struct arm_smmu_device
*smmu
= master
->smmu
;
3315 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(master
->dev
);
3317 if (!smmu
|| !master
->streams
)
3320 mutex_lock(&smmu
->streams_mutex
);
3321 for (i
= 0; i
< fwspec
->num_ids
; i
++)
3322 rb_erase(&master
->streams
[i
].node
, &smmu
->streams
);
3323 mutex_unlock(&smmu
->streams_mutex
);
3325 kfree(master
->streams
);
3328 static struct iommu_device
*arm_smmu_probe_device(struct device
*dev
)
3331 struct arm_smmu_device
*smmu
;
3332 struct arm_smmu_master
*master
;
3333 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
3335 if (WARN_ON_ONCE(dev_iommu_priv_get(dev
)))
3336 return ERR_PTR(-EBUSY
);
3338 smmu
= arm_smmu_get_by_fwnode(fwspec
->iommu_fwnode
);
3340 return ERR_PTR(-ENODEV
);
3342 master
= kzalloc(sizeof(*master
), GFP_KERNEL
);
3344 return ERR_PTR(-ENOMEM
);
3347 master
->smmu
= smmu
;
3348 dev_iommu_priv_set(dev
, master
);
3350 ret
= arm_smmu_insert_master(smmu
, master
);
3352 goto err_free_master
;
3354 device_property_read_u32(dev
, "pasid-num-bits", &master
->ssid_bits
);
3355 master
->ssid_bits
= min(smmu
->ssid_bits
, master
->ssid_bits
);
3358 * Note that PASID must be enabled before, and disabled after ATS:
3359 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
3361 * Behavior is undefined if this bit is Set and the value of the PASID
3362 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
3365 arm_smmu_enable_pasid(master
);
3367 if (!(smmu
->features
& ARM_SMMU_FEAT_2_LVL_CDTAB
))
3368 master
->ssid_bits
= min_t(u8
, master
->ssid_bits
,
3369 CTXDESC_LINEAR_CDMAX
);
3371 if ((smmu
->features
& ARM_SMMU_FEAT_STALLS
&&
3372 device_property_read_bool(dev
, "dma-can-stall")) ||
3373 smmu
->features
& ARM_SMMU_FEAT_STALL_FORCE
)
3374 master
->stall_enabled
= true;
3376 if (dev_is_pci(dev
)) {
3377 unsigned int stu
= __ffs(smmu
->pgsize_bitmap
);
3379 pci_prepare_ats(to_pci_dev(dev
), stu
);
3382 return &smmu
->iommu
;
3386 return ERR_PTR(ret
);
3389 static void arm_smmu_release_device(struct device
*dev
)
3391 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
3393 if (WARN_ON(arm_smmu_master_sva_enabled(master
)))
3394 iopf_queue_remove_device(master
->smmu
->evtq
.iopf
, dev
);
3396 /* Put the STE back to what arm_smmu_init_strtab() sets */
3397 if (dev
->iommu
->require_direct
)
3398 arm_smmu_attach_dev_identity(&arm_smmu_identity_domain
, dev
);
3400 arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain
, dev
);
3402 arm_smmu_disable_pasid(master
);
3403 arm_smmu_remove_master(master
);
3404 if (arm_smmu_cdtab_allocated(&master
->cd_table
))
3405 arm_smmu_free_cd_tables(master
);
3409 static int arm_smmu_read_and_clear_dirty(struct iommu_domain
*domain
,
3410 unsigned long iova
, size_t size
,
3411 unsigned long flags
,
3412 struct iommu_dirty_bitmap
*dirty
)
3414 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
3415 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
3417 return ops
->read_and_clear_dirty(ops
, iova
, size
, flags
, dirty
);
3420 static int arm_smmu_set_dirty_tracking(struct iommu_domain
*domain
,
3424 * Always enabled and the dirty bitmap is cleared prior to
3425 * set_dirty_tracking().
3430 static struct iommu_group
*arm_smmu_device_group(struct device
*dev
)
3432 struct iommu_group
*group
;
3435 * We don't support devices sharing stream IDs other than PCI RID
3436 * aliases, since the necessary ID-to-device lookup becomes rather
3437 * impractical given a potential sparse 32-bit stream ID space.
3439 if (dev_is_pci(dev
))
3440 group
= pci_device_group(dev
);
3442 group
= generic_device_group(dev
);
3447 static int arm_smmu_of_xlate(struct device
*dev
,
3448 const struct of_phandle_args
*args
)
3450 return iommu_fwspec_add_ids(dev
, args
->args
, 1);
3453 static void arm_smmu_get_resv_regions(struct device
*dev
,
3454 struct list_head
*head
)
3456 struct iommu_resv_region
*region
;
3457 int prot
= IOMMU_WRITE
| IOMMU_NOEXEC
| IOMMU_MMIO
;
3459 region
= iommu_alloc_resv_region(MSI_IOVA_BASE
, MSI_IOVA_LENGTH
,
3460 prot
, IOMMU_RESV_SW_MSI
, GFP_KERNEL
);
3464 list_add_tail(®ion
->list
, head
);
3466 iommu_dma_get_resv_regions(dev
, head
);
3469 static int arm_smmu_dev_enable_feature(struct device
*dev
,
3470 enum iommu_dev_features feat
)
3472 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
3478 case IOMMU_DEV_FEAT_IOPF
:
3479 if (!arm_smmu_master_iopf_supported(master
))
3481 if (master
->iopf_enabled
)
3483 master
->iopf_enabled
= true;
3485 case IOMMU_DEV_FEAT_SVA
:
3486 if (!arm_smmu_master_sva_supported(master
))
3488 if (arm_smmu_master_sva_enabled(master
))
3490 return arm_smmu_master_enable_sva(master
);
3496 static int arm_smmu_dev_disable_feature(struct device
*dev
,
3497 enum iommu_dev_features feat
)
3499 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
3505 case IOMMU_DEV_FEAT_IOPF
:
3506 if (!master
->iopf_enabled
)
3508 if (master
->sva_enabled
)
3510 master
->iopf_enabled
= false;
3512 case IOMMU_DEV_FEAT_SVA
:
3513 if (!arm_smmu_master_sva_enabled(master
))
3515 return arm_smmu_master_disable_sva(master
);
3522 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3523 * PCIe link and save the data to memory by DMA. The hardware is restricted to
3524 * use identity mapping only.
3526 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3527 (pdev)->device == 0xa12e)
3529 static int arm_smmu_def_domain_type(struct device
*dev
)
3531 if (dev_is_pci(dev
)) {
3532 struct pci_dev
*pdev
= to_pci_dev(dev
);
3534 if (IS_HISI_PTT_DEVICE(pdev
))
3535 return IOMMU_DOMAIN_IDENTITY
;
3541 static struct iommu_ops arm_smmu_ops
= {
3542 .identity_domain
= &arm_smmu_identity_domain
,
3543 .blocked_domain
= &arm_smmu_blocked_domain
,
3544 .capable
= arm_smmu_capable
,
3545 .hw_info
= arm_smmu_hw_info
,
3546 .domain_alloc_paging
= arm_smmu_domain_alloc_paging
,
3547 .domain_alloc_sva
= arm_smmu_sva_domain_alloc
,
3548 .domain_alloc_paging_flags
= arm_smmu_domain_alloc_paging_flags
,
3549 .probe_device
= arm_smmu_probe_device
,
3550 .release_device
= arm_smmu_release_device
,
3551 .device_group
= arm_smmu_device_group
,
3552 .of_xlate
= arm_smmu_of_xlate
,
3553 .get_resv_regions
= arm_smmu_get_resv_regions
,
3554 .remove_dev_pasid
= arm_smmu_remove_dev_pasid
,
3555 .dev_enable_feat
= arm_smmu_dev_enable_feature
,
3556 .dev_disable_feat
= arm_smmu_dev_disable_feature
,
3557 .page_response
= arm_smmu_page_response
,
3558 .def_domain_type
= arm_smmu_def_domain_type
,
3559 .viommu_alloc
= arm_vsmmu_alloc
,
3560 .user_pasid_table
= 1,
3561 .pgsize_bitmap
= -1UL, /* Restricted during device attach */
3562 .owner
= THIS_MODULE
,
3563 .default_domain_ops
= &(const struct iommu_domain_ops
) {
3564 .attach_dev
= arm_smmu_attach_dev
,
3565 .enforce_cache_coherency
= arm_smmu_enforce_cache_coherency
,
3566 .set_dev_pasid
= arm_smmu_s1_set_dev_pasid
,
3567 .map_pages
= arm_smmu_map_pages
,
3568 .unmap_pages
= arm_smmu_unmap_pages
,
3569 .flush_iotlb_all
= arm_smmu_flush_iotlb_all
,
3570 .iotlb_sync
= arm_smmu_iotlb_sync
,
3571 .iova_to_phys
= arm_smmu_iova_to_phys
,
3572 .free
= arm_smmu_domain_free_paging
,
3576 static struct iommu_dirty_ops arm_smmu_dirty_ops
= {
3577 .read_and_clear_dirty
= arm_smmu_read_and_clear_dirty
,
3578 .set_dirty_tracking
= arm_smmu_set_dirty_tracking
,
3581 /* Probing and initialisation functions */
3582 int arm_smmu_init_one_queue(struct arm_smmu_device
*smmu
,
3583 struct arm_smmu_queue
*q
, void __iomem
*page
,
3584 unsigned long prod_off
, unsigned long cons_off
,
3585 size_t dwords
, const char *name
)
3590 qsz
= ((1 << q
->llq
.max_n_shift
) * dwords
) << 3;
3591 q
->base
= dmam_alloc_coherent(smmu
->dev
, qsz
, &q
->base_dma
,
3593 if (q
->base
|| qsz
< PAGE_SIZE
)
3596 q
->llq
.max_n_shift
--;
3601 "failed to allocate queue (0x%zx bytes) for %s\n",
3606 if (!WARN_ON(q
->base_dma
& (qsz
- 1))) {
3607 dev_info(smmu
->dev
, "allocated %u entries for %s\n",
3608 1 << q
->llq
.max_n_shift
, name
);
3611 q
->prod_reg
= page
+ prod_off
;
3612 q
->cons_reg
= page
+ cons_off
;
3613 q
->ent_dwords
= dwords
;
3615 q
->q_base
= Q_BASE_RWA
;
3616 q
->q_base
|= q
->base_dma
& Q_BASE_ADDR_MASK
;
3617 q
->q_base
|= FIELD_PREP(Q_BASE_LOG2SIZE
, q
->llq
.max_n_shift
);
3619 q
->llq
.prod
= q
->llq
.cons
= 0;
3623 int arm_smmu_cmdq_init(struct arm_smmu_device
*smmu
,
3624 struct arm_smmu_cmdq
*cmdq
)
3626 unsigned int nents
= 1 << cmdq
->q
.llq
.max_n_shift
;
3628 atomic_set(&cmdq
->owner_prod
, 0);
3629 atomic_set(&cmdq
->lock
, 0);
3631 cmdq
->valid_map
= (atomic_long_t
*)devm_bitmap_zalloc(smmu
->dev
, nents
,
3633 if (!cmdq
->valid_map
)
3639 static int arm_smmu_init_queues(struct arm_smmu_device
*smmu
)
3644 ret
= arm_smmu_init_one_queue(smmu
, &smmu
->cmdq
.q
, smmu
->base
,
3645 ARM_SMMU_CMDQ_PROD
, ARM_SMMU_CMDQ_CONS
,
3646 CMDQ_ENT_DWORDS
, "cmdq");
3650 ret
= arm_smmu_cmdq_init(smmu
, &smmu
->cmdq
);
3655 ret
= arm_smmu_init_one_queue(smmu
, &smmu
->evtq
.q
, smmu
->page1
,
3656 ARM_SMMU_EVTQ_PROD
, ARM_SMMU_EVTQ_CONS
,
3657 EVTQ_ENT_DWORDS
, "evtq");
3661 if ((smmu
->features
& ARM_SMMU_FEAT_SVA
) &&
3662 (smmu
->features
& ARM_SMMU_FEAT_STALLS
)) {
3663 smmu
->evtq
.iopf
= iopf_queue_alloc(dev_name(smmu
->dev
));
3664 if (!smmu
->evtq
.iopf
)
3669 if (!(smmu
->features
& ARM_SMMU_FEAT_PRI
))
3672 return arm_smmu_init_one_queue(smmu
, &smmu
->priq
.q
, smmu
->page1
,
3673 ARM_SMMU_PRIQ_PROD
, ARM_SMMU_PRIQ_CONS
,
3674 PRIQ_ENT_DWORDS
, "priq");
3677 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device
*smmu
)
3680 struct arm_smmu_strtab_cfg
*cfg
= &smmu
->strtab_cfg
;
3681 unsigned int last_sid_idx
=
3682 arm_smmu_strtab_l1_idx((1ULL << smmu
->sid_bits
) - 1);
3684 /* Calculate the L1 size, capped to the SIDSIZE. */
3685 cfg
->l2
.num_l1_ents
= min(last_sid_idx
+ 1, STRTAB_MAX_L1_ENTRIES
);
3686 if (cfg
->l2
.num_l1_ents
<= last_sid_idx
)
3688 "2-level strtab only covers %u/%u bits of SID\n",
3689 ilog2(cfg
->l2
.num_l1_ents
* STRTAB_NUM_L2_STES
),
3692 l1size
= cfg
->l2
.num_l1_ents
* sizeof(struct arm_smmu_strtab_l1
);
3693 cfg
->l2
.l1tab
= dmam_alloc_coherent(smmu
->dev
, l1size
, &cfg
->l2
.l1_dma
,
3695 if (!cfg
->l2
.l1tab
) {
3697 "failed to allocate l1 stream table (%u bytes)\n",
3702 cfg
->l2
.l2ptrs
= devm_kcalloc(smmu
->dev
, cfg
->l2
.num_l1_ents
,
3703 sizeof(*cfg
->l2
.l2ptrs
), GFP_KERNEL
);
3704 if (!cfg
->l2
.l2ptrs
)
3710 static int arm_smmu_init_strtab_linear(struct arm_smmu_device
*smmu
)
3713 struct arm_smmu_strtab_cfg
*cfg
= &smmu
->strtab_cfg
;
3715 size
= (1 << smmu
->sid_bits
) * sizeof(struct arm_smmu_ste
);
3716 cfg
->linear
.table
= dmam_alloc_coherent(smmu
->dev
, size
,
3717 &cfg
->linear
.ste_dma
,
3719 if (!cfg
->linear
.table
) {
3721 "failed to allocate linear stream table (%u bytes)\n",
3725 cfg
->linear
.num_ents
= 1 << smmu
->sid_bits
;
3727 arm_smmu_init_initial_stes(cfg
->linear
.table
, cfg
->linear
.num_ents
);
3731 static int arm_smmu_init_strtab(struct arm_smmu_device
*smmu
)
3735 if (smmu
->features
& ARM_SMMU_FEAT_2_LVL_STRTAB
)
3736 ret
= arm_smmu_init_strtab_2lvl(smmu
);
3738 ret
= arm_smmu_init_strtab_linear(smmu
);
3742 ida_init(&smmu
->vmid_map
);
3747 static int arm_smmu_init_structures(struct arm_smmu_device
*smmu
)
3751 mutex_init(&smmu
->streams_mutex
);
3752 smmu
->streams
= RB_ROOT
;
3754 ret
= arm_smmu_init_queues(smmu
);
3758 ret
= arm_smmu_init_strtab(smmu
);
3762 if (smmu
->impl_ops
&& smmu
->impl_ops
->init_structures
)
3763 return smmu
->impl_ops
->init_structures(smmu
);
3768 static int arm_smmu_write_reg_sync(struct arm_smmu_device
*smmu
, u32 val
,
3769 unsigned int reg_off
, unsigned int ack_off
)
3773 writel_relaxed(val
, smmu
->base
+ reg_off
);
3774 return readl_relaxed_poll_timeout(smmu
->base
+ ack_off
, reg
, reg
== val
,
3775 1, ARM_SMMU_POLL_TIMEOUT_US
);
3778 /* GBPA is "special" */
3779 static int arm_smmu_update_gbpa(struct arm_smmu_device
*smmu
, u32 set
, u32 clr
)
3782 u32 reg
, __iomem
*gbpa
= smmu
->base
+ ARM_SMMU_GBPA
;
3784 ret
= readl_relaxed_poll_timeout(gbpa
, reg
, !(reg
& GBPA_UPDATE
),
3785 1, ARM_SMMU_POLL_TIMEOUT_US
);
3791 writel_relaxed(reg
| GBPA_UPDATE
, gbpa
);
3792 ret
= readl_relaxed_poll_timeout(gbpa
, reg
, !(reg
& GBPA_UPDATE
),
3793 1, ARM_SMMU_POLL_TIMEOUT_US
);
3796 dev_err(smmu
->dev
, "GBPA not responding to update\n");
3800 static void arm_smmu_free_msis(void *data
)
3802 struct device
*dev
= data
;
3804 platform_device_msi_free_irqs_all(dev
);
3807 static void arm_smmu_write_msi_msg(struct msi_desc
*desc
, struct msi_msg
*msg
)
3809 phys_addr_t doorbell
;
3810 struct device
*dev
= msi_desc_to_dev(desc
);
3811 struct arm_smmu_device
*smmu
= dev_get_drvdata(dev
);
3812 phys_addr_t
*cfg
= arm_smmu_msi_cfg
[desc
->msi_index
];
3814 doorbell
= (((u64
)msg
->address_hi
) << 32) | msg
->address_lo
;
3815 doorbell
&= MSI_CFG0_ADDR_MASK
;
3817 writeq_relaxed(doorbell
, smmu
->base
+ cfg
[0]);
3818 writel_relaxed(msg
->data
, smmu
->base
+ cfg
[1]);
3819 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE
, smmu
->base
+ cfg
[2]);
3822 static void arm_smmu_setup_msis(struct arm_smmu_device
*smmu
)
3824 int ret
, nvec
= ARM_SMMU_MAX_MSIS
;
3825 struct device
*dev
= smmu
->dev
;
3827 /* Clear the MSI address regs */
3828 writeq_relaxed(0, smmu
->base
+ ARM_SMMU_GERROR_IRQ_CFG0
);
3829 writeq_relaxed(0, smmu
->base
+ ARM_SMMU_EVTQ_IRQ_CFG0
);
3831 if (smmu
->features
& ARM_SMMU_FEAT_PRI
)
3832 writeq_relaxed(0, smmu
->base
+ ARM_SMMU_PRIQ_IRQ_CFG0
);
3836 if (!(smmu
->features
& ARM_SMMU_FEAT_MSI
))
3839 if (!dev
->msi
.domain
) {
3840 dev_info(smmu
->dev
, "msi_domain absent - falling back to wired irqs\n");
3844 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3845 ret
= platform_device_msi_init_and_alloc_irqs(dev
, nvec
, arm_smmu_write_msi_msg
);
3847 dev_warn(dev
, "failed to allocate MSIs - falling back to wired irqs\n");
3851 smmu
->evtq
.q
.irq
= msi_get_virq(dev
, EVTQ_MSI_INDEX
);
3852 smmu
->gerr_irq
= msi_get_virq(dev
, GERROR_MSI_INDEX
);
3853 smmu
->priq
.q
.irq
= msi_get_virq(dev
, PRIQ_MSI_INDEX
);
3855 /* Add callback to free MSIs on teardown */
3856 devm_add_action_or_reset(dev
, arm_smmu_free_msis
, dev
);
3859 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device
*smmu
)
3863 arm_smmu_setup_msis(smmu
);
3865 /* Request interrupt lines */
3866 irq
= smmu
->evtq
.q
.irq
;
3868 ret
= devm_request_threaded_irq(smmu
->dev
, irq
, NULL
,
3869 arm_smmu_evtq_thread
,
3871 "arm-smmu-v3-evtq", smmu
);
3873 dev_warn(smmu
->dev
, "failed to enable evtq irq\n");
3875 dev_warn(smmu
->dev
, "no evtq irq - events will not be reported!\n");
3878 irq
= smmu
->gerr_irq
;
3880 ret
= devm_request_irq(smmu
->dev
, irq
, arm_smmu_gerror_handler
,
3881 0, "arm-smmu-v3-gerror", smmu
);
3883 dev_warn(smmu
->dev
, "failed to enable gerror irq\n");
3885 dev_warn(smmu
->dev
, "no gerr irq - errors will not be reported!\n");
3888 if (smmu
->features
& ARM_SMMU_FEAT_PRI
) {
3889 irq
= smmu
->priq
.q
.irq
;
3891 ret
= devm_request_threaded_irq(smmu
->dev
, irq
, NULL
,
3892 arm_smmu_priq_thread
,
3898 "failed to enable priq irq\n");
3900 dev_warn(smmu
->dev
, "no priq irq - PRI will be broken\n");
3905 static int arm_smmu_setup_irqs(struct arm_smmu_device
*smmu
)
3908 u32 irqen_flags
= IRQ_CTRL_EVTQ_IRQEN
| IRQ_CTRL_GERROR_IRQEN
;
3910 /* Disable IRQs first */
3911 ret
= arm_smmu_write_reg_sync(smmu
, 0, ARM_SMMU_IRQ_CTRL
,
3912 ARM_SMMU_IRQ_CTRLACK
);
3914 dev_err(smmu
->dev
, "failed to disable irqs\n");
3918 irq
= smmu
->combined_irq
;
3921 * Cavium ThunderX2 implementation doesn't support unique irq
3922 * lines. Use a single irq line for all the SMMUv3 interrupts.
3924 ret
= devm_request_threaded_irq(smmu
->dev
, irq
,
3925 arm_smmu_combined_irq_handler
,
3926 arm_smmu_combined_irq_thread
,
3928 "arm-smmu-v3-combined-irq", smmu
);
3930 dev_warn(smmu
->dev
, "failed to enable combined irq\n");
3932 arm_smmu_setup_unique_irqs(smmu
);
3934 if (smmu
->features
& ARM_SMMU_FEAT_PRI
)
3935 irqen_flags
|= IRQ_CTRL_PRIQ_IRQEN
;
3937 /* Enable interrupt generation on the SMMU */
3938 ret
= arm_smmu_write_reg_sync(smmu
, irqen_flags
,
3939 ARM_SMMU_IRQ_CTRL
, ARM_SMMU_IRQ_CTRLACK
);
3941 dev_warn(smmu
->dev
, "failed to enable irqs\n");
3946 static int arm_smmu_device_disable(struct arm_smmu_device
*smmu
)
3950 ret
= arm_smmu_write_reg_sync(smmu
, 0, ARM_SMMU_CR0
, ARM_SMMU_CR0ACK
);
3952 dev_err(smmu
->dev
, "failed to clear cr0\n");
3957 static void arm_smmu_write_strtab(struct arm_smmu_device
*smmu
)
3959 struct arm_smmu_strtab_cfg
*cfg
= &smmu
->strtab_cfg
;
3963 if (smmu
->features
& ARM_SMMU_FEAT_2_LVL_STRTAB
) {
3964 reg
= FIELD_PREP(STRTAB_BASE_CFG_FMT
,
3965 STRTAB_BASE_CFG_FMT_2LVL
) |
3966 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE
,
3967 ilog2(cfg
->l2
.num_l1_ents
) + STRTAB_SPLIT
) |
3968 FIELD_PREP(STRTAB_BASE_CFG_SPLIT
, STRTAB_SPLIT
);
3969 dma
= cfg
->l2
.l1_dma
;
3971 reg
= FIELD_PREP(STRTAB_BASE_CFG_FMT
,
3972 STRTAB_BASE_CFG_FMT_LINEAR
) |
3973 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE
, smmu
->sid_bits
);
3974 dma
= cfg
->linear
.ste_dma
;
3976 writeq_relaxed((dma
& STRTAB_BASE_ADDR_MASK
) | STRTAB_BASE_RA
,
3977 smmu
->base
+ ARM_SMMU_STRTAB_BASE
);
3978 writel_relaxed(reg
, smmu
->base
+ ARM_SMMU_STRTAB_BASE_CFG
);
3981 static int arm_smmu_device_reset(struct arm_smmu_device
*smmu
)
3985 struct arm_smmu_cmdq_ent cmd
;
3987 /* Clear CR0 and sync (disables SMMU and queue processing) */
3988 reg
= readl_relaxed(smmu
->base
+ ARM_SMMU_CR0
);
3989 if (reg
& CR0_SMMUEN
) {
3990 dev_warn(smmu
->dev
, "SMMU currently enabled! Resetting...\n");
3991 arm_smmu_update_gbpa(smmu
, GBPA_ABORT
, 0);
3994 ret
= arm_smmu_device_disable(smmu
);
3998 /* CR1 (table and queue memory attributes) */
3999 reg
= FIELD_PREP(CR1_TABLE_SH
, ARM_SMMU_SH_ISH
) |
4000 FIELD_PREP(CR1_TABLE_OC
, CR1_CACHE_WB
) |
4001 FIELD_PREP(CR1_TABLE_IC
, CR1_CACHE_WB
) |
4002 FIELD_PREP(CR1_QUEUE_SH
, ARM_SMMU_SH_ISH
) |
4003 FIELD_PREP(CR1_QUEUE_OC
, CR1_CACHE_WB
) |
4004 FIELD_PREP(CR1_QUEUE_IC
, CR1_CACHE_WB
);
4005 writel_relaxed(reg
, smmu
->base
+ ARM_SMMU_CR1
);
4007 /* CR2 (random crap) */
4008 reg
= CR2_PTM
| CR2_RECINVSID
;
4010 if (smmu
->features
& ARM_SMMU_FEAT_E2H
)
4013 writel_relaxed(reg
, smmu
->base
+ ARM_SMMU_CR2
);
4016 arm_smmu_write_strtab(smmu
);
4019 writeq_relaxed(smmu
->cmdq
.q
.q_base
, smmu
->base
+ ARM_SMMU_CMDQ_BASE
);
4020 writel_relaxed(smmu
->cmdq
.q
.llq
.prod
, smmu
->base
+ ARM_SMMU_CMDQ_PROD
);
4021 writel_relaxed(smmu
->cmdq
.q
.llq
.cons
, smmu
->base
+ ARM_SMMU_CMDQ_CONS
);
4023 enables
= CR0_CMDQEN
;
4024 ret
= arm_smmu_write_reg_sync(smmu
, enables
, ARM_SMMU_CR0
,
4027 dev_err(smmu
->dev
, "failed to enable command queue\n");
4031 /* Invalidate any cached configuration */
4032 cmd
.opcode
= CMDQ_OP_CFGI_ALL
;
4033 arm_smmu_cmdq_issue_cmd_with_sync(smmu
, &cmd
);
4035 /* Invalidate any stale TLB entries */
4036 if (smmu
->features
& ARM_SMMU_FEAT_HYP
) {
4037 cmd
.opcode
= CMDQ_OP_TLBI_EL2_ALL
;
4038 arm_smmu_cmdq_issue_cmd_with_sync(smmu
, &cmd
);
4041 cmd
.opcode
= CMDQ_OP_TLBI_NSNH_ALL
;
4042 arm_smmu_cmdq_issue_cmd_with_sync(smmu
, &cmd
);
4045 writeq_relaxed(smmu
->evtq
.q
.q_base
, smmu
->base
+ ARM_SMMU_EVTQ_BASE
);
4046 writel_relaxed(smmu
->evtq
.q
.llq
.prod
, smmu
->page1
+ ARM_SMMU_EVTQ_PROD
);
4047 writel_relaxed(smmu
->evtq
.q
.llq
.cons
, smmu
->page1
+ ARM_SMMU_EVTQ_CONS
);
4049 enables
|= CR0_EVTQEN
;
4050 ret
= arm_smmu_write_reg_sync(smmu
, enables
, ARM_SMMU_CR0
,
4053 dev_err(smmu
->dev
, "failed to enable event queue\n");
4058 if (smmu
->features
& ARM_SMMU_FEAT_PRI
) {
4059 writeq_relaxed(smmu
->priq
.q
.q_base
,
4060 smmu
->base
+ ARM_SMMU_PRIQ_BASE
);
4061 writel_relaxed(smmu
->priq
.q
.llq
.prod
,
4062 smmu
->page1
+ ARM_SMMU_PRIQ_PROD
);
4063 writel_relaxed(smmu
->priq
.q
.llq
.cons
,
4064 smmu
->page1
+ ARM_SMMU_PRIQ_CONS
);
4066 enables
|= CR0_PRIQEN
;
4067 ret
= arm_smmu_write_reg_sync(smmu
, enables
, ARM_SMMU_CR0
,
4070 dev_err(smmu
->dev
, "failed to enable PRI queue\n");
4075 if (smmu
->features
& ARM_SMMU_FEAT_ATS
) {
4076 enables
|= CR0_ATSCHK
;
4077 ret
= arm_smmu_write_reg_sync(smmu
, enables
, ARM_SMMU_CR0
,
4080 dev_err(smmu
->dev
, "failed to enable ATS check\n");
4085 ret
= arm_smmu_setup_irqs(smmu
);
4087 dev_err(smmu
->dev
, "failed to setup irqs\n");
4091 if (is_kdump_kernel())
4092 enables
&= ~(CR0_EVTQEN
| CR0_PRIQEN
);
4094 /* Enable the SMMU interface */
4095 enables
|= CR0_SMMUEN
;
4096 ret
= arm_smmu_write_reg_sync(smmu
, enables
, ARM_SMMU_CR0
,
4099 dev_err(smmu
->dev
, "failed to enable SMMU interface\n");
4103 if (smmu
->impl_ops
&& smmu
->impl_ops
->device_reset
) {
4104 ret
= smmu
->impl_ops
->device_reset(smmu
);
4106 dev_err(smmu
->dev
, "failed to reset impl\n");
4114 #define IIDR_IMPLEMENTER_ARM 0x43b
4115 #define IIDR_PRODUCTID_ARM_MMU_600 0x483
4116 #define IIDR_PRODUCTID_ARM_MMU_700 0x487
4118 static void arm_smmu_device_iidr_probe(struct arm_smmu_device
*smmu
)
4121 unsigned int implementer
, productid
, variant
, revision
;
4123 reg
= readl_relaxed(smmu
->base
+ ARM_SMMU_IIDR
);
4124 implementer
= FIELD_GET(IIDR_IMPLEMENTER
, reg
);
4125 productid
= FIELD_GET(IIDR_PRODUCTID
, reg
);
4126 variant
= FIELD_GET(IIDR_VARIANT
, reg
);
4127 revision
= FIELD_GET(IIDR_REVISION
, reg
);
4129 switch (implementer
) {
4130 case IIDR_IMPLEMENTER_ARM
:
4131 switch (productid
) {
4132 case IIDR_PRODUCTID_ARM_MMU_600
:
4133 /* Arm erratum 1076982 */
4134 if (variant
== 0 && revision
<= 2)
4135 smmu
->features
&= ~ARM_SMMU_FEAT_SEV
;
4136 /* Arm erratum 1209401 */
4138 smmu
->features
&= ~ARM_SMMU_FEAT_NESTING
;
4140 case IIDR_PRODUCTID_ARM_MMU_700
:
4141 /* Arm erratum 2812531 */
4142 smmu
->features
&= ~ARM_SMMU_FEAT_BTM
;
4143 smmu
->options
|= ARM_SMMU_OPT_CMDQ_FORCE_SYNC
;
4144 /* Arm errata 2268618, 2812531 */
4145 smmu
->features
&= ~ARM_SMMU_FEAT_NESTING
;
4152 static void arm_smmu_get_httu(struct arm_smmu_device
*smmu
, u32 reg
)
4154 u32 fw_features
= smmu
->features
& (ARM_SMMU_FEAT_HA
| ARM_SMMU_FEAT_HD
);
4155 u32 hw_features
= 0;
4157 switch (FIELD_GET(IDR0_HTTU
, reg
)) {
4158 case IDR0_HTTU_ACCESS_DIRTY
:
4159 hw_features
|= ARM_SMMU_FEAT_HD
;
4161 case IDR0_HTTU_ACCESS
:
4162 hw_features
|= ARM_SMMU_FEAT_HA
;
4165 if (smmu
->dev
->of_node
)
4166 smmu
->features
|= hw_features
;
4167 else if (hw_features
!= fw_features
)
4168 /* ACPI IORT sets the HTTU bits */
4170 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
4171 hw_features
, fw_features
);
4174 static int arm_smmu_device_hw_probe(struct arm_smmu_device
*smmu
)
4177 bool coherent
= smmu
->features
& ARM_SMMU_FEAT_COHERENCY
;
4180 reg
= readl_relaxed(smmu
->base
+ ARM_SMMU_IDR0
);
4182 /* 2-level structures */
4183 if (FIELD_GET(IDR0_ST_LVL
, reg
) == IDR0_ST_LVL_2LVL
)
4184 smmu
->features
|= ARM_SMMU_FEAT_2_LVL_STRTAB
;
4186 if (reg
& IDR0_CD2L
)
4187 smmu
->features
|= ARM_SMMU_FEAT_2_LVL_CDTAB
;
4190 * Translation table endianness.
4191 * We currently require the same endianness as the CPU, but this
4192 * could be changed later by adding a new IO_PGTABLE_QUIRK.
4194 switch (FIELD_GET(IDR0_TTENDIAN
, reg
)) {
4195 case IDR0_TTENDIAN_MIXED
:
4196 smmu
->features
|= ARM_SMMU_FEAT_TT_LE
| ARM_SMMU_FEAT_TT_BE
;
4199 case IDR0_TTENDIAN_BE
:
4200 smmu
->features
|= ARM_SMMU_FEAT_TT_BE
;
4203 case IDR0_TTENDIAN_LE
:
4204 smmu
->features
|= ARM_SMMU_FEAT_TT_LE
;
4208 dev_err(smmu
->dev
, "unknown/unsupported TT endianness!\n");
4212 /* Boolean feature flags */
4213 if (IS_ENABLED(CONFIG_PCI_PRI
) && reg
& IDR0_PRI
)
4214 smmu
->features
|= ARM_SMMU_FEAT_PRI
;
4216 if (IS_ENABLED(CONFIG_PCI_ATS
) && reg
& IDR0_ATS
)
4217 smmu
->features
|= ARM_SMMU_FEAT_ATS
;
4220 smmu
->features
|= ARM_SMMU_FEAT_SEV
;
4222 if (reg
& IDR0_MSI
) {
4223 smmu
->features
|= ARM_SMMU_FEAT_MSI
;
4224 if (coherent
&& !disable_msipolling
)
4225 smmu
->options
|= ARM_SMMU_OPT_MSIPOLL
;
4228 if (reg
& IDR0_HYP
) {
4229 smmu
->features
|= ARM_SMMU_FEAT_HYP
;
4230 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN
))
4231 smmu
->features
|= ARM_SMMU_FEAT_E2H
;
4234 arm_smmu_get_httu(smmu
, reg
);
4237 * The coherency feature as set by FW is used in preference to the ID
4238 * register, but warn on mismatch.
4240 if (!!(reg
& IDR0_COHACC
) != coherent
)
4241 dev_warn(smmu
->dev
, "IDR0.COHACC overridden by FW configuration (%s)\n",
4242 coherent
? "true" : "false");
4244 switch (FIELD_GET(IDR0_STALL_MODEL
, reg
)) {
4245 case IDR0_STALL_MODEL_FORCE
:
4246 smmu
->features
|= ARM_SMMU_FEAT_STALL_FORCE
;
4248 case IDR0_STALL_MODEL_STALL
:
4249 smmu
->features
|= ARM_SMMU_FEAT_STALLS
;
4253 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S1
;
4256 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S2
;
4258 if (!(reg
& (IDR0_S1P
| IDR0_S2P
))) {
4259 dev_err(smmu
->dev
, "no translation support!\n");
4263 /* We only support the AArch64 table format at present */
4264 switch (FIELD_GET(IDR0_TTF
, reg
)) {
4265 case IDR0_TTF_AARCH32_64
:
4268 case IDR0_TTF_AARCH64
:
4271 dev_err(smmu
->dev
, "AArch64 table format not supported!\n");
4275 /* ASID/VMID sizes */
4276 smmu
->asid_bits
= reg
& IDR0_ASID16
? 16 : 8;
4277 smmu
->vmid_bits
= reg
& IDR0_VMID16
? 16 : 8;
4280 reg
= readl_relaxed(smmu
->base
+ ARM_SMMU_IDR1
);
4281 if (reg
& (IDR1_TABLES_PRESET
| IDR1_QUEUES_PRESET
| IDR1_REL
)) {
4282 dev_err(smmu
->dev
, "embedded implementation not supported\n");
4286 if (reg
& IDR1_ATTR_TYPES_OVR
)
4287 smmu
->features
|= ARM_SMMU_FEAT_ATTR_TYPES_OVR
;
4289 /* Queue sizes, capped to ensure natural alignment */
4290 smmu
->cmdq
.q
.llq
.max_n_shift
= min_t(u32
, CMDQ_MAX_SZ_SHIFT
,
4291 FIELD_GET(IDR1_CMDQS
, reg
));
4292 if (smmu
->cmdq
.q
.llq
.max_n_shift
<= ilog2(CMDQ_BATCH_ENTRIES
)) {
4294 * We don't support splitting up batches, so one batch of
4295 * commands plus an extra sync needs to fit inside the command
4296 * queue. There's also no way we can handle the weird alignment
4297 * restrictions on the base pointer for a unit-length queue.
4299 dev_err(smmu
->dev
, "command queue size <= %d entries not supported\n",
4300 CMDQ_BATCH_ENTRIES
);
4304 smmu
->evtq
.q
.llq
.max_n_shift
= min_t(u32
, EVTQ_MAX_SZ_SHIFT
,
4305 FIELD_GET(IDR1_EVTQS
, reg
));
4306 smmu
->priq
.q
.llq
.max_n_shift
= min_t(u32
, PRIQ_MAX_SZ_SHIFT
,
4307 FIELD_GET(IDR1_PRIQS
, reg
));
4309 /* SID/SSID sizes */
4310 smmu
->ssid_bits
= FIELD_GET(IDR1_SSIDSIZE
, reg
);
4311 smmu
->sid_bits
= FIELD_GET(IDR1_SIDSIZE
, reg
);
4312 smmu
->iommu
.max_pasids
= 1UL << smmu
->ssid_bits
;
4315 * If the SMMU supports fewer bits than would fill a single L2 stream
4316 * table, use a linear table instead.
4318 if (smmu
->sid_bits
<= STRTAB_SPLIT
)
4319 smmu
->features
&= ~ARM_SMMU_FEAT_2_LVL_STRTAB
;
4322 reg
= readl_relaxed(smmu
->base
+ ARM_SMMU_IDR3
);
4323 if (FIELD_GET(IDR3_RIL
, reg
))
4324 smmu
->features
|= ARM_SMMU_FEAT_RANGE_INV
;
4327 reg
= readl_relaxed(smmu
->base
+ ARM_SMMU_IDR5
);
4329 /* Maximum number of outstanding stalls */
4330 smmu
->evtq
.max_stalls
= FIELD_GET(IDR5_STALL_MAX
, reg
);
4333 if (reg
& IDR5_GRAN64K
)
4334 smmu
->pgsize_bitmap
|= SZ_64K
| SZ_512M
;
4335 if (reg
& IDR5_GRAN16K
)
4336 smmu
->pgsize_bitmap
|= SZ_16K
| SZ_32M
;
4337 if (reg
& IDR5_GRAN4K
)
4338 smmu
->pgsize_bitmap
|= SZ_4K
| SZ_2M
| SZ_1G
;
4340 /* Input address size */
4341 if (FIELD_GET(IDR5_VAX
, reg
) == IDR5_VAX_52_BIT
)
4342 smmu
->features
|= ARM_SMMU_FEAT_VAX
;
4344 /* Output address size */
4345 switch (FIELD_GET(IDR5_OAS
, reg
)) {
4346 case IDR5_OAS_32_BIT
:
4349 case IDR5_OAS_36_BIT
:
4352 case IDR5_OAS_40_BIT
:
4355 case IDR5_OAS_42_BIT
:
4358 case IDR5_OAS_44_BIT
:
4361 case IDR5_OAS_52_BIT
:
4363 smmu
->pgsize_bitmap
|= 1ULL << 42; /* 4TB */
4367 "unknown output address size. Truncating to 48-bit\n");
4369 case IDR5_OAS_48_BIT
:
4373 if (arm_smmu_ops
.pgsize_bitmap
== -1UL)
4374 arm_smmu_ops
.pgsize_bitmap
= smmu
->pgsize_bitmap
;
4376 arm_smmu_ops
.pgsize_bitmap
|= smmu
->pgsize_bitmap
;
4378 /* Set the DMA mask for our table walker */
4379 if (dma_set_mask_and_coherent(smmu
->dev
, DMA_BIT_MASK(smmu
->oas
)))
4381 "failed to set DMA mask for table walker\n");
4383 smmu
->ias
= max(smmu
->ias
, smmu
->oas
);
4385 if ((smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
) &&
4386 (smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
))
4387 smmu
->features
|= ARM_SMMU_FEAT_NESTING
;
4389 arm_smmu_device_iidr_probe(smmu
);
4391 if (arm_smmu_sva_supported(smmu
))
4392 smmu
->features
|= ARM_SMMU_FEAT_SVA
;
4394 dev_info(smmu
->dev
, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
4395 smmu
->ias
, smmu
->oas
, smmu
->features
);
4400 #ifdef CONFIG_TEGRA241_CMDQV
4401 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node
*node
,
4402 struct arm_smmu_device
*smmu
)
4404 const char *uid
= kasprintf(GFP_KERNEL
, "%u", node
->identifier
);
4405 struct acpi_device
*adev
;
4407 /* Look for an NVDA200C node whose _UID matches the SMMU node ID */
4408 adev
= acpi_dev_get_first_match_dev("NVDA200C", uid
, -1);
4410 /* Tegra241 CMDQV driver is responsible for put_device() */
4411 smmu
->impl_dev
= &adev
->dev
;
4412 smmu
->options
|= ARM_SMMU_OPT_TEGRA241_CMDQV
;
4413 dev_info(smmu
->dev
, "found companion CMDQV device: %s\n",
4414 dev_name(smmu
->impl_dev
));
4419 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node
*node
,
4420 struct arm_smmu_device
*smmu
)
4425 static int acpi_smmu_iort_probe_model(struct acpi_iort_node
*node
,
4426 struct arm_smmu_device
*smmu
)
4428 struct acpi_iort_smmu_v3
*iort_smmu
=
4429 (struct acpi_iort_smmu_v3
*)node
->node_data
;
4431 switch (iort_smmu
->model
) {
4432 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
:
4433 smmu
->options
|= ARM_SMMU_OPT_PAGE0_REGS_ONLY
;
4435 case ACPI_IORT_SMMU_V3_HISILICON_HI161X
:
4436 smmu
->options
|= ARM_SMMU_OPT_SKIP_PREFETCH
;
4438 case ACPI_IORT_SMMU_V3_GENERIC
:
4440 * Tegra241 implementation stores its SMMU options and impl_dev
4441 * in DSDT. Thus, go through the ACPI tables unconditionally.
4443 acpi_smmu_dsdt_probe_tegra241_cmdqv(node
, smmu
);
4447 dev_notice(smmu
->dev
, "option mask 0x%x\n", smmu
->options
);
4451 static int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
4452 struct arm_smmu_device
*smmu
)
4454 struct acpi_iort_smmu_v3
*iort_smmu
;
4455 struct device
*dev
= smmu
->dev
;
4456 struct acpi_iort_node
*node
;
4458 node
= *(struct acpi_iort_node
**)dev_get_platdata(dev
);
4460 /* Retrieve SMMUv3 specific data */
4461 iort_smmu
= (struct acpi_iort_smmu_v3
*)node
->node_data
;
4463 if (iort_smmu
->flags
& ACPI_IORT_SMMU_V3_COHACC_OVERRIDE
)
4464 smmu
->features
|= ARM_SMMU_FEAT_COHERENCY
;
4466 switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE
, iort_smmu
->flags
)) {
4467 case IDR0_HTTU_ACCESS_DIRTY
:
4468 smmu
->features
|= ARM_SMMU_FEAT_HD
;
4470 case IDR0_HTTU_ACCESS
:
4471 smmu
->features
|= ARM_SMMU_FEAT_HA
;
4474 return acpi_smmu_iort_probe_model(node
, smmu
);
4477 static inline int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
4478 struct arm_smmu_device
*smmu
)
4484 static int arm_smmu_device_dt_probe(struct platform_device
*pdev
,
4485 struct arm_smmu_device
*smmu
)
4487 struct device
*dev
= &pdev
->dev
;
4491 if (of_property_read_u32(dev
->of_node
, "#iommu-cells", &cells
))
4492 dev_err(dev
, "missing #iommu-cells property\n");
4493 else if (cells
!= 1)
4494 dev_err(dev
, "invalid #iommu-cells value (%d)\n", cells
);
4498 parse_driver_options(smmu
);
4500 if (of_dma_is_coherent(dev
->of_node
))
4501 smmu
->features
|= ARM_SMMU_FEAT_COHERENCY
;
4506 static unsigned long arm_smmu_resource_size(struct arm_smmu_device
*smmu
)
4508 if (smmu
->options
& ARM_SMMU_OPT_PAGE0_REGS_ONLY
)
4514 static void __iomem
*arm_smmu_ioremap(struct device
*dev
, resource_size_t start
,
4515 resource_size_t size
)
4517 struct resource res
= DEFINE_RES_MEM(start
, size
);
4519 return devm_ioremap_resource(dev
, &res
);
4522 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device
*smmu
)
4524 struct list_head rmr_list
;
4525 struct iommu_resv_region
*e
;
4527 INIT_LIST_HEAD(&rmr_list
);
4528 iort_get_rmr_sids(dev_fwnode(smmu
->dev
), &rmr_list
);
4530 list_for_each_entry(e
, &rmr_list
, list
) {
4531 struct iommu_iort_rmr_data
*rmr
;
4534 rmr
= container_of(e
, struct iommu_iort_rmr_data
, rr
);
4535 for (i
= 0; i
< rmr
->num_sids
; i
++) {
4536 ret
= arm_smmu_init_sid_strtab(smmu
, rmr
->sids
[i
]);
4538 dev_err(smmu
->dev
, "RMR SID(0x%x) bypass failed\n",
4544 * STE table is not programmed to HW, see
4545 * arm_smmu_initial_bypass_stes()
4547 arm_smmu_make_bypass_ste(smmu
,
4548 arm_smmu_get_step_for_sid(smmu
, rmr
->sids
[i
]));
4552 iort_put_rmr_sids(dev_fwnode(smmu
->dev
), &rmr_list
);
4555 static void arm_smmu_impl_remove(void *data
)
4557 struct arm_smmu_device
*smmu
= data
;
4559 if (smmu
->impl_ops
&& smmu
->impl_ops
->device_remove
)
4560 smmu
->impl_ops
->device_remove(smmu
);
4564 * Probe all the compiled in implementations. Each one checks to see if it
4565 * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
4566 * replaces the callers. Otherwise the original is returned or ERR_PTR.
4568 static struct arm_smmu_device
*arm_smmu_impl_probe(struct arm_smmu_device
*smmu
)
4570 struct arm_smmu_device
*new_smmu
= ERR_PTR(-ENODEV
);
4573 if (smmu
->impl_dev
&& (smmu
->options
& ARM_SMMU_OPT_TEGRA241_CMDQV
))
4574 new_smmu
= tegra241_cmdqv_probe(smmu
);
4576 if (new_smmu
== ERR_PTR(-ENODEV
))
4578 if (IS_ERR(new_smmu
))
4581 ret
= devm_add_action_or_reset(new_smmu
->dev
, arm_smmu_impl_remove
,
4584 return ERR_PTR(ret
);
4588 static int arm_smmu_device_probe(struct platform_device
*pdev
)
4591 struct resource
*res
;
4592 resource_size_t ioaddr
;
4593 struct arm_smmu_device
*smmu
;
4594 struct device
*dev
= &pdev
->dev
;
4596 smmu
= devm_kzalloc(dev
, sizeof(*smmu
), GFP_KERNEL
);
4602 ret
= arm_smmu_device_dt_probe(pdev
, smmu
);
4604 ret
= arm_smmu_device_acpi_probe(pdev
, smmu
);
4609 smmu
= arm_smmu_impl_probe(smmu
);
4611 return PTR_ERR(smmu
);
4614 res
= platform_get_resource(pdev
, IORESOURCE_MEM
, 0);
4617 if (resource_size(res
) < arm_smmu_resource_size(smmu
)) {
4618 dev_err(dev
, "MMIO region too small (%pr)\n", res
);
4621 ioaddr
= res
->start
;
4624 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4625 * the PMCG registers which are reserved by the PMU driver.
4627 smmu
->base
= arm_smmu_ioremap(dev
, ioaddr
, ARM_SMMU_REG_SZ
);
4628 if (IS_ERR(smmu
->base
))
4629 return PTR_ERR(smmu
->base
);
4631 if (arm_smmu_resource_size(smmu
) > SZ_64K
) {
4632 smmu
->page1
= arm_smmu_ioremap(dev
, ioaddr
+ SZ_64K
,
4634 if (IS_ERR(smmu
->page1
))
4635 return PTR_ERR(smmu
->page1
);
4637 smmu
->page1
= smmu
->base
;
4640 /* Interrupt lines */
4642 irq
= platform_get_irq_byname_optional(pdev
, "combined");
4644 smmu
->combined_irq
= irq
;
4646 irq
= platform_get_irq_byname_optional(pdev
, "eventq");
4648 smmu
->evtq
.q
.irq
= irq
;
4650 irq
= platform_get_irq_byname_optional(pdev
, "priq");
4652 smmu
->priq
.q
.irq
= irq
;
4654 irq
= platform_get_irq_byname_optional(pdev
, "gerror");
4656 smmu
->gerr_irq
= irq
;
4659 ret
= arm_smmu_device_hw_probe(smmu
);
4663 /* Initialise in-memory data structures */
4664 ret
= arm_smmu_init_structures(smmu
);
4668 /* Record our private device structure */
4669 platform_set_drvdata(pdev
, smmu
);
4671 /* Check for RMRs and install bypass STEs if any */
4672 arm_smmu_rmr_install_bypass_ste(smmu
);
4674 /* Reset the device */
4675 ret
= arm_smmu_device_reset(smmu
);
4679 /* And we're up. Go go go! */
4680 ret
= iommu_device_sysfs_add(&smmu
->iommu
, dev
, NULL
,
4681 "smmu3.%pa", &ioaddr
);
4685 ret
= iommu_device_register(&smmu
->iommu
, &arm_smmu_ops
, dev
);
4687 dev_err(dev
, "Failed to register iommu\n");
4688 iommu_device_sysfs_remove(&smmu
->iommu
);
4695 static void arm_smmu_device_remove(struct platform_device
*pdev
)
4697 struct arm_smmu_device
*smmu
= platform_get_drvdata(pdev
);
4699 iommu_device_unregister(&smmu
->iommu
);
4700 iommu_device_sysfs_remove(&smmu
->iommu
);
4701 arm_smmu_device_disable(smmu
);
4702 iopf_queue_free(smmu
->evtq
.iopf
);
4703 ida_destroy(&smmu
->vmid_map
);
4706 static void arm_smmu_device_shutdown(struct platform_device
*pdev
)
4708 struct arm_smmu_device
*smmu
= platform_get_drvdata(pdev
);
4710 arm_smmu_device_disable(smmu
);
4713 static const struct of_device_id arm_smmu_of_match
[] = {
4714 { .compatible
= "arm,smmu-v3", },
4717 MODULE_DEVICE_TABLE(of
, arm_smmu_of_match
);
4719 static void arm_smmu_driver_unregister(struct platform_driver
*drv
)
4721 arm_smmu_sva_notifier_synchronize();
4722 platform_driver_unregister(drv
);
4725 static struct platform_driver arm_smmu_driver
= {
4727 .name
= "arm-smmu-v3",
4728 .of_match_table
= arm_smmu_of_match
,
4729 .suppress_bind_attrs
= true,
4731 .probe
= arm_smmu_device_probe
,
4732 .remove_new
= arm_smmu_device_remove
,
4733 .shutdown
= arm_smmu_device_shutdown
,
4735 module_driver(arm_smmu_driver
, platform_driver_register
,
4736 arm_smmu_driver_unregister
);
4738 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4739 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4740 MODULE_ALIAS("platform:arm-smmu-v3");
4741 MODULE_LICENSE("GPL v2");