Get rid of 'remove_new' relic from platform driver struct
[linux.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
bloba5c7002ff75bb0ce377e60faa29ae1c6d01fb18e
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 #include <kunit/visibility.h>
30 #include <uapi/linux/iommufd.h>
32 #include "arm-smmu-v3.h"
33 #include "../../dma-iommu.h"
35 static bool disable_msipolling;
36 module_param(disable_msipolling, bool, 0444);
37 MODULE_PARM_DESC(disable_msipolling,
38 "Disable MSI-based polling for CMD_SYNC completion.");
40 static struct iommu_ops arm_smmu_ops;
41 static struct iommu_dirty_ops arm_smmu_dirty_ops;
43 enum arm_smmu_msi_index {
44 EVTQ_MSI_INDEX,
45 GERROR_MSI_INDEX,
46 PRIQ_MSI_INDEX,
47 ARM_SMMU_MAX_MSIS,
50 #define NUM_ENTRY_QWORDS 8
51 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
52 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
54 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
55 [EVTQ_MSI_INDEX] = {
56 ARM_SMMU_EVTQ_IRQ_CFG0,
57 ARM_SMMU_EVTQ_IRQ_CFG1,
58 ARM_SMMU_EVTQ_IRQ_CFG2,
60 [GERROR_MSI_INDEX] = {
61 ARM_SMMU_GERROR_IRQ_CFG0,
62 ARM_SMMU_GERROR_IRQ_CFG1,
63 ARM_SMMU_GERROR_IRQ_CFG2,
65 [PRIQ_MSI_INDEX] = {
66 ARM_SMMU_PRIQ_IRQ_CFG0,
67 ARM_SMMU_PRIQ_IRQ_CFG1,
68 ARM_SMMU_PRIQ_IRQ_CFG2,
72 struct arm_smmu_option_prop {
73 u32 opt;
74 const char *prop;
77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
78 DEFINE_MUTEX(arm_smmu_asid_lock);
80 static struct arm_smmu_option_prop arm_smmu_options[] = {
81 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
82 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
83 { 0, NULL},
86 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
87 struct arm_smmu_device *smmu, u32 flags);
88 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
90 static void parse_driver_options(struct arm_smmu_device *smmu)
92 int i = 0;
94 do {
95 if (of_property_read_bool(smmu->dev->of_node,
96 arm_smmu_options[i].prop)) {
97 smmu->options |= arm_smmu_options[i].opt;
98 dev_notice(smmu->dev, "option %s\n",
99 arm_smmu_options[i].prop);
101 } while (arm_smmu_options[++i].opt);
104 /* Low-level queue manipulation functions */
105 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 u32 space, prod, cons;
109 prod = Q_IDX(q, q->prod);
110 cons = Q_IDX(q, q->cons);
112 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
113 space = (1 << q->max_n_shift) - (prod - cons);
114 else
115 space = cons - prod;
117 return space >= n;
120 static bool queue_full(struct arm_smmu_ll_queue *q)
122 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
123 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
126 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
129 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
132 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
135 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
136 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
137 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
140 static void queue_sync_cons_out(struct arm_smmu_queue *q)
143 * Ensure that all CPU accesses (reads and writes) to the queue
144 * are complete before we update the cons pointer.
146 __iomb();
147 writel_relaxed(q->llq.cons, q->cons_reg);
150 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
153 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
156 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
158 struct arm_smmu_ll_queue *llq = &q->llq;
160 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
161 return;
163 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
164 Q_IDX(llq, llq->cons);
165 queue_sync_cons_out(q);
168 static int queue_sync_prod_in(struct arm_smmu_queue *q)
170 u32 prod;
171 int ret = 0;
174 * We can't use the _relaxed() variant here, as we must prevent
175 * speculative reads of the queue before we have determined that
176 * prod has indeed moved.
178 prod = readl(q->prod_reg);
180 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
181 ret = -EOVERFLOW;
183 q->llq.prod = prod;
184 return ret;
187 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
189 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
190 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
193 static void queue_poll_init(struct arm_smmu_device *smmu,
194 struct arm_smmu_queue_poll *qp)
196 qp->delay = 1;
197 qp->spin_cnt = 0;
198 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
199 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
202 static int queue_poll(struct arm_smmu_queue_poll *qp)
204 if (ktime_compare(ktime_get(), qp->timeout) > 0)
205 return -ETIMEDOUT;
207 if (qp->wfe) {
208 wfe();
209 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
210 cpu_relax();
211 } else {
212 udelay(qp->delay);
213 qp->delay *= 2;
214 qp->spin_cnt = 0;
217 return 0;
220 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
222 int i;
224 for (i = 0; i < n_dwords; ++i)
225 *dst++ = cpu_to_le64(*src++);
228 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
230 int i;
232 for (i = 0; i < n_dwords; ++i)
233 *dst++ = le64_to_cpu(*src++);
236 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
238 if (queue_empty(&q->llq))
239 return -EAGAIN;
241 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
242 queue_inc_cons(&q->llq);
243 queue_sync_cons_out(q);
244 return 0;
247 /* High-level queue accessors */
248 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
250 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
251 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
253 switch (ent->opcode) {
254 case CMDQ_OP_TLBI_EL2_ALL:
255 case CMDQ_OP_TLBI_NSNH_ALL:
256 break;
257 case CMDQ_OP_PREFETCH_CFG:
258 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
259 break;
260 case CMDQ_OP_CFGI_CD:
261 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
262 fallthrough;
263 case CMDQ_OP_CFGI_STE:
264 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
265 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
266 break;
267 case CMDQ_OP_CFGI_CD_ALL:
268 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 break;
270 case CMDQ_OP_CFGI_ALL:
271 /* Cover the entire SID range */
272 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
273 break;
274 case CMDQ_OP_TLBI_NH_VA:
275 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
276 fallthrough;
277 case CMDQ_OP_TLBI_EL2_VA:
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
280 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
283 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
284 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
285 break;
286 case CMDQ_OP_TLBI_S2_IPA:
287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
289 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
291 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
292 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
293 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
294 break;
295 case CMDQ_OP_TLBI_NH_ASID:
296 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
297 fallthrough;
298 case CMDQ_OP_TLBI_NH_ALL:
299 case CMDQ_OP_TLBI_S12_VMALL:
300 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
301 break;
302 case CMDQ_OP_TLBI_EL2_ASID:
303 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
304 break;
305 case CMDQ_OP_ATC_INV:
306 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
307 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
308 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
309 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
310 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
311 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
312 break;
313 case CMDQ_OP_PRI_RESP:
314 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
315 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
316 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
317 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
318 switch (ent->pri.resp) {
319 case PRI_RESP_DENY:
320 case PRI_RESP_FAIL:
321 case PRI_RESP_SUCC:
322 break;
323 default:
324 return -EINVAL;
326 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
327 break;
328 case CMDQ_OP_RESUME:
329 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
330 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
331 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
332 break;
333 case CMDQ_OP_CMD_SYNC:
334 if (ent->sync.msiaddr) {
335 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
336 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
337 } else {
338 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
340 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
341 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
342 break;
343 default:
344 return -ENOENT;
347 return 0;
350 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
351 struct arm_smmu_cmdq_ent *ent)
353 struct arm_smmu_cmdq *cmdq = NULL;
355 if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
356 cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
358 return cmdq ?: &smmu->cmdq;
361 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
362 struct arm_smmu_cmdq *cmdq)
364 if (cmdq == &smmu->cmdq)
365 return false;
367 return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
370 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
371 struct arm_smmu_cmdq *cmdq, u32 prod)
373 struct arm_smmu_queue *q = &cmdq->q;
374 struct arm_smmu_cmdq_ent ent = {
375 .opcode = CMDQ_OP_CMD_SYNC,
379 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
380 * payload, so the write will zero the entire command on that platform.
382 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
383 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
384 q->ent_dwords * 8;
387 arm_smmu_cmdq_build_cmd(cmd, &ent);
388 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
389 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
392 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
393 struct arm_smmu_cmdq *cmdq)
395 static const char * const cerror_str[] = {
396 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
397 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
398 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
399 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
401 struct arm_smmu_queue *q = &cmdq->q;
403 int i;
404 u64 cmd[CMDQ_ENT_DWORDS];
405 u32 cons = readl_relaxed(q->cons_reg);
406 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
407 struct arm_smmu_cmdq_ent cmd_sync = {
408 .opcode = CMDQ_OP_CMD_SYNC,
411 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
412 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
414 switch (idx) {
415 case CMDQ_ERR_CERROR_ABT_IDX:
416 dev_err(smmu->dev, "retrying command fetch\n");
417 return;
418 case CMDQ_ERR_CERROR_NONE_IDX:
419 return;
420 case CMDQ_ERR_CERROR_ATC_INV_IDX:
422 * ATC Invalidation Completion timeout. CONS is still pointing
423 * at the CMD_SYNC. Attempt to complete other pending commands
424 * by repeating the CMD_SYNC, though we might well end up back
425 * here since the ATC invalidation may still be pending.
427 return;
428 case CMDQ_ERR_CERROR_ILL_IDX:
429 default:
430 break;
434 * We may have concurrent producers, so we need to be careful
435 * not to touch any of the shadow cmdq state.
437 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
438 dev_err(smmu->dev, "skipping command in error state:\n");
439 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
440 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
442 /* Convert the erroneous command into a CMD_SYNC */
443 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
444 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
445 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
447 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
450 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
452 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
456 * Command queue locking.
457 * This is a form of bastardised rwlock with the following major changes:
459 * - The only LOCK routines are exclusive_trylock() and shared_lock().
460 * Neither have barrier semantics, and instead provide only a control
461 * dependency.
463 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
464 * fails if the caller appears to be the last lock holder (yes, this is
465 * racy). All successful UNLOCK routines have RELEASE semantics.
467 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
469 int val;
472 * We can try to avoid the cmpxchg() loop by simply incrementing the
473 * lock counter. When held in exclusive state, the lock counter is set
474 * to INT_MIN so these increments won't hurt as the value will remain
475 * negative.
477 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
478 return;
480 do {
481 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
482 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
485 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
487 (void)atomic_dec_return_release(&cmdq->lock);
490 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
492 if (atomic_read(&cmdq->lock) == 1)
493 return false;
495 arm_smmu_cmdq_shared_unlock(cmdq);
496 return true;
499 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
500 ({ \
501 bool __ret; \
502 local_irq_save(flags); \
503 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
504 if (!__ret) \
505 local_irq_restore(flags); \
506 __ret; \
509 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
510 ({ \
511 atomic_set_release(&cmdq->lock, 0); \
512 local_irq_restore(flags); \
517 * Command queue insertion.
518 * This is made fiddly by our attempts to achieve some sort of scalability
519 * since there is one queue shared amongst all of the CPUs in the system. If
520 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
521 * then you'll *love* this monstrosity.
523 * The basic idea is to split the queue up into ranges of commands that are
524 * owned by a given CPU; the owner may not have written all of the commands
525 * itself, but is responsible for advancing the hardware prod pointer when
526 * the time comes. The algorithm is roughly:
528 * 1. Allocate some space in the queue. At this point we also discover
529 * whether the head of the queue is currently owned by another CPU,
530 * or whether we are the owner.
532 * 2. Write our commands into our allocated slots in the queue.
534 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
536 * 4. If we are an owner:
537 * a. Wait for the previous owner to finish.
538 * b. Mark the queue head as unowned, which tells us the range
539 * that we are responsible for publishing.
540 * c. Wait for all commands in our owned range to become valid.
541 * d. Advance the hardware prod pointer.
542 * e. Tell the next owner we've finished.
544 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
545 * owner), then we need to stick around until it has completed:
546 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
547 * to clear the first 4 bytes.
548 * b. Otherwise, we spin waiting for the hardware cons pointer to
549 * advance past our command.
551 * The devil is in the details, particularly the use of locking for handling
552 * SYNC completion and freeing up space in the queue before we think that it is
553 * full.
555 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
556 u32 sprod, u32 eprod, bool set)
558 u32 swidx, sbidx, ewidx, ebidx;
559 struct arm_smmu_ll_queue llq = {
560 .max_n_shift = cmdq->q.llq.max_n_shift,
561 .prod = sprod,
564 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
565 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
567 while (llq.prod != eprod) {
568 unsigned long mask;
569 atomic_long_t *ptr;
570 u32 limit = BITS_PER_LONG;
572 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
573 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
575 ptr = &cmdq->valid_map[swidx];
577 if ((swidx == ewidx) && (sbidx < ebidx))
578 limit = ebidx;
580 mask = GENMASK(limit - 1, sbidx);
583 * The valid bit is the inverse of the wrap bit. This means
584 * that a zero-initialised queue is invalid and, after marking
585 * all entries as valid, they become invalid again when we
586 * wrap.
588 if (set) {
589 atomic_long_xor(mask, ptr);
590 } else { /* Poll */
591 unsigned long valid;
593 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
594 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
597 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
601 /* Mark all entries in the range [sprod, eprod) as valid */
602 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
603 u32 sprod, u32 eprod)
605 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
608 /* Wait for all entries in the range [sprod, eprod) to become valid */
609 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
610 u32 sprod, u32 eprod)
612 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
615 /* Wait for the command queue to become non-full */
616 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
617 struct arm_smmu_cmdq *cmdq,
618 struct arm_smmu_ll_queue *llq)
620 unsigned long flags;
621 struct arm_smmu_queue_poll qp;
622 int ret = 0;
625 * Try to update our copy of cons by grabbing exclusive cmdq access. If
626 * that fails, spin until somebody else updates it for us.
628 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
629 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
630 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
631 llq->val = READ_ONCE(cmdq->q.llq.val);
632 return 0;
635 queue_poll_init(smmu, &qp);
636 do {
637 llq->val = READ_ONCE(cmdq->q.llq.val);
638 if (!queue_full(llq))
639 break;
641 ret = queue_poll(&qp);
642 } while (!ret);
644 return ret;
648 * Wait until the SMMU signals a CMD_SYNC completion MSI.
649 * Must be called with the cmdq lock held in some capacity.
651 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
652 struct arm_smmu_cmdq *cmdq,
653 struct arm_smmu_ll_queue *llq)
655 int ret = 0;
656 struct arm_smmu_queue_poll qp;
657 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
659 queue_poll_init(smmu, &qp);
662 * The MSI won't generate an event, since it's being written back
663 * into the command queue.
665 qp.wfe = false;
666 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
667 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
668 return ret;
672 * Wait until the SMMU cons index passes llq->prod.
673 * Must be called with the cmdq lock held in some capacity.
675 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
676 struct arm_smmu_cmdq *cmdq,
677 struct arm_smmu_ll_queue *llq)
679 struct arm_smmu_queue_poll qp;
680 u32 prod = llq->prod;
681 int ret = 0;
683 queue_poll_init(smmu, &qp);
684 llq->val = READ_ONCE(cmdq->q.llq.val);
685 do {
686 if (queue_consumed(llq, prod))
687 break;
689 ret = queue_poll(&qp);
692 * This needs to be a readl() so that our subsequent call
693 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
695 * Specifically, we need to ensure that we observe all
696 * shared_lock()s by other CMD_SYNCs that share our owner,
697 * so that a failing call to tryunlock() means that we're
698 * the last one out and therefore we can safely advance
699 * cmdq->q.llq.cons. Roughly speaking:
701 * CPU 0 CPU1 CPU2 (us)
703 * if (sync)
704 * shared_lock();
706 * dma_wmb();
707 * set_valid_map();
709 * if (owner) {
710 * poll_valid_map();
711 * <control dependency>
712 * writel(prod_reg);
714 * readl(cons_reg);
715 * tryunlock();
717 * Requires us to see CPU 0's shared_lock() acquisition.
719 llq->cons = readl(cmdq->q.cons_reg);
720 } while (!ret);
722 return ret;
725 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
726 struct arm_smmu_cmdq *cmdq,
727 struct arm_smmu_ll_queue *llq)
729 if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
730 !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
731 return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
733 return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
736 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
737 u32 prod, int n)
739 int i;
740 struct arm_smmu_ll_queue llq = {
741 .max_n_shift = cmdq->q.llq.max_n_shift,
742 .prod = prod,
745 for (i = 0; i < n; ++i) {
746 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
748 prod = queue_inc_prod_n(&llq, i);
749 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
754 * This is the actual insertion function, and provides the following
755 * ordering guarantees to callers:
757 * - There is a dma_wmb() before publishing any commands to the queue.
758 * This can be relied upon to order prior writes to data structures
759 * in memory (such as a CD or an STE) before the command.
761 * - On completion of a CMD_SYNC, there is a control dependency.
762 * This can be relied upon to order subsequent writes to memory (e.g.
763 * freeing an IOVA) after completion of the CMD_SYNC.
765 * - Command insertion is totally ordered, so if two CPUs each race to
766 * insert their own list of commands then all of the commands from one
767 * CPU will appear before any of the commands from the other CPU.
769 int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
770 struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
771 bool sync)
773 u64 cmd_sync[CMDQ_ENT_DWORDS];
774 u32 prod;
775 unsigned long flags;
776 bool owner;
777 struct arm_smmu_ll_queue llq, head;
778 int ret = 0;
780 llq.max_n_shift = cmdq->q.llq.max_n_shift;
782 /* 1. Allocate some space in the queue */
783 local_irq_save(flags);
784 llq.val = READ_ONCE(cmdq->q.llq.val);
785 do {
786 u64 old;
788 while (!queue_has_space(&llq, n + sync)) {
789 local_irq_restore(flags);
790 if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
791 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
792 local_irq_save(flags);
795 head.cons = llq.cons;
796 head.prod = queue_inc_prod_n(&llq, n + sync) |
797 CMDQ_PROD_OWNED_FLAG;
799 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
800 if (old == llq.val)
801 break;
803 llq.val = old;
804 } while (1);
805 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
806 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
807 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
810 * 2. Write our commands into the queue
811 * Dependency ordering from the cmpxchg() loop above.
813 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
814 if (sync) {
815 prod = queue_inc_prod_n(&llq, n);
816 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
817 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
820 * In order to determine completion of our CMD_SYNC, we must
821 * ensure that the queue can't wrap twice without us noticing.
822 * We achieve that by taking the cmdq lock as shared before
823 * marking our slot as valid.
825 arm_smmu_cmdq_shared_lock(cmdq);
828 /* 3. Mark our slots as valid, ensuring commands are visible first */
829 dma_wmb();
830 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
832 /* 4. If we are the owner, take control of the SMMU hardware */
833 if (owner) {
834 /* a. Wait for previous owner to finish */
835 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
837 /* b. Stop gathering work by clearing the owned flag */
838 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
839 &cmdq->q.llq.atomic.prod);
840 prod &= ~CMDQ_PROD_OWNED_FLAG;
843 * c. Wait for any gathered work to be written to the queue.
844 * Note that we read our own entries so that we have the control
845 * dependency required by (d).
847 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
850 * d. Advance the hardware prod pointer
851 * Control dependency ordering from the entries becoming valid.
853 writel_relaxed(prod, cmdq->q.prod_reg);
856 * e. Tell the next owner we're done
857 * Make sure we've updated the hardware first, so that we don't
858 * race to update prod and potentially move it backwards.
860 atomic_set_release(&cmdq->owner_prod, prod);
863 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
864 if (sync) {
865 llq.prod = queue_inc_prod_n(&llq, n);
866 ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
867 if (ret) {
868 dev_err_ratelimited(smmu->dev,
869 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
870 llq.prod,
871 readl_relaxed(cmdq->q.prod_reg),
872 readl_relaxed(cmdq->q.cons_reg));
876 * Try to unlock the cmdq lock. This will fail if we're the last
877 * reader, in which case we can safely update cmdq->q.llq.cons
879 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
880 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
881 arm_smmu_cmdq_shared_unlock(cmdq);
885 local_irq_restore(flags);
886 return ret;
889 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
890 struct arm_smmu_cmdq_ent *ent,
891 bool sync)
893 u64 cmd[CMDQ_ENT_DWORDS];
895 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
896 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
897 ent->opcode);
898 return -EINVAL;
901 return arm_smmu_cmdq_issue_cmdlist(
902 smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
905 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
906 struct arm_smmu_cmdq_ent *ent)
908 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
911 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
912 struct arm_smmu_cmdq_ent *ent)
914 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
917 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
918 struct arm_smmu_cmdq_batch *cmds,
919 struct arm_smmu_cmdq_ent *ent)
921 cmds->num = 0;
922 cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
925 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
926 struct arm_smmu_cmdq_batch *cmds,
927 struct arm_smmu_cmdq_ent *cmd)
929 bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
930 bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
931 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
932 int index;
934 if (force_sync || unsupported_cmd) {
935 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
936 cmds->num, true);
937 arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
940 if (cmds->num == CMDQ_BATCH_ENTRIES) {
941 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
942 cmds->num, false);
943 arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
946 index = cmds->num * CMDQ_ENT_DWORDS;
947 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
948 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
949 cmd->opcode);
950 return;
953 cmds->num++;
956 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
957 struct arm_smmu_cmdq_batch *cmds)
959 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
960 cmds->num, true);
963 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
964 struct iommu_page_response *resp)
966 struct arm_smmu_cmdq_ent cmd = {0};
967 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
968 int sid = master->streams[0].id;
970 if (WARN_ON(!master->stall_enabled))
971 return;
973 cmd.opcode = CMDQ_OP_RESUME;
974 cmd.resume.sid = sid;
975 cmd.resume.stag = resp->grpid;
976 switch (resp->code) {
977 case IOMMU_PAGE_RESP_INVALID:
978 case IOMMU_PAGE_RESP_FAILURE:
979 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
980 break;
981 case IOMMU_PAGE_RESP_SUCCESS:
982 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
983 break;
984 default:
985 break;
988 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
990 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
991 * RESUME consumption guarantees that the stalled transaction will be
992 * terminated... at some point in the future. PRI_RESP is fire and
993 * forget.
997 /* Context descriptor manipulation functions */
998 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
1000 struct arm_smmu_cmdq_ent cmd = {
1001 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
1002 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
1003 .tlbi.asid = asid,
1006 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1010 * Based on the value of ent report which bits of the STE the HW will access. It
1011 * would be nice if this was complete according to the spec, but minimally it
1012 * has to capture the bits this driver uses.
1014 VISIBLE_IF_KUNIT
1015 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
1017 unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
1019 used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
1020 if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
1021 return;
1023 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
1025 /* S1 translates */
1026 if (cfg & BIT(0)) {
1027 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
1028 STRTAB_STE_0_S1CTXPTR_MASK |
1029 STRTAB_STE_0_S1CDMAX);
1030 used_bits[1] |=
1031 cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
1032 STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
1033 STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
1034 STRTAB_STE_1_EATS);
1035 used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
1038 * See 13.5 Summary of attribute/permission configuration fields
1039 * for the SHCFG behavior.
1041 if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
1042 STRTAB_STE_1_S1DSS_BYPASS)
1043 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1046 /* S2 translates */
1047 if (cfg & BIT(1)) {
1048 used_bits[1] |=
1049 cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
1050 STRTAB_STE_1_SHCFG);
1051 used_bits[2] |=
1052 cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1053 STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1054 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
1055 STRTAB_STE_2_S2R);
1056 used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1059 if (cfg == STRTAB_STE_0_CFG_BYPASS)
1060 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1062 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
1065 * Figure out if we can do a hitless update of entry to become target. Returns a
1066 * bit mask where 1 indicates that qword needs to be set disruptively.
1067 * unused_update is an intermediate value of entry that has unused bits set to
1068 * their new values.
1070 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
1071 const __le64 *entry, const __le64 *target,
1072 __le64 *unused_update)
1074 __le64 target_used[NUM_ENTRY_QWORDS] = {};
1075 __le64 cur_used[NUM_ENTRY_QWORDS] = {};
1076 u8 used_qword_diff = 0;
1077 unsigned int i;
1079 writer->ops->get_used(entry, cur_used);
1080 writer->ops->get_used(target, target_used);
1082 for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
1084 * Check that masks are up to date, the make functions are not
1085 * allowed to set a bit to 1 if the used function doesn't say it
1086 * is used.
1088 WARN_ON_ONCE(target[i] & ~target_used[i]);
1090 /* Bits can change because they are not currently being used */
1091 unused_update[i] = (entry[i] & cur_used[i]) |
1092 (target[i] & ~cur_used[i]);
1094 * Each bit indicates that a used bit in a qword needs to be
1095 * changed after unused_update is applied.
1097 if ((unused_update[i] & target_used[i]) != target[i])
1098 used_qword_diff |= 1 << i;
1100 return used_qword_diff;
1103 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
1104 const __le64 *target, unsigned int start,
1105 unsigned int len)
1107 bool changed = false;
1108 unsigned int i;
1110 for (i = start; len != 0; len--, i++) {
1111 if (entry[i] != target[i]) {
1112 WRITE_ONCE(entry[i], target[i]);
1113 changed = true;
1117 if (changed)
1118 writer->ops->sync(writer);
1119 return changed;
1123 * Update the STE/CD to the target configuration. The transition from the
1124 * current entry to the target entry takes place over multiple steps that
1125 * attempts to make the transition hitless if possible. This function takes care
1126 * not to create a situation where the HW can perceive a corrupted entry. HW is
1127 * only required to have a 64 bit atomicity with stores from the CPU, while
1128 * entries are many 64 bit values big.
1130 * The difference between the current value and the target value is analyzed to
1131 * determine which of three updates are required - disruptive, hitless or no
1132 * change.
1134 * In the most general disruptive case we can make any update in three steps:
1135 * - Disrupting the entry (V=0)
1136 * - Fill now unused qwords, execpt qword 0 which contains V
1137 * - Make qword 0 have the final value and valid (V=1) with a single 64
1138 * bit store
1140 * However this disrupts the HW while it is happening. There are several
1141 * interesting cases where a STE/CD can be updated without disturbing the HW
1142 * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1143 * because the used bits don't intersect. We can detect this by calculating how
1144 * many 64 bit values need update after adjusting the unused bits and skip the
1145 * V=0 process. This relies on the IGNORED behavior described in the
1146 * specification.
1148 VISIBLE_IF_KUNIT
1149 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
1150 const __le64 *target)
1152 __le64 unused_update[NUM_ENTRY_QWORDS];
1153 u8 used_qword_diff;
1155 used_qword_diff =
1156 arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
1157 if (hweight8(used_qword_diff) == 1) {
1159 * Only one qword needs its used bits to be changed. This is a
1160 * hitless update, update all bits the current STE/CD is
1161 * ignoring to their new values, then update a single "critical
1162 * qword" to change the STE/CD and finally 0 out any bits that
1163 * are now unused in the target configuration.
1165 unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1168 * Skip writing unused bits in the critical qword since we'll be
1169 * writing it in the next step anyways. This can save a sync
1170 * when the only change is in that qword.
1172 unused_update[critical_qword_index] =
1173 entry[critical_qword_index];
1174 entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
1175 entry_set(writer, entry, target, critical_qword_index, 1);
1176 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
1177 } else if (used_qword_diff) {
1179 * At least two qwords need their inuse bits to be changed. This
1180 * requires a breaking update, zero the V bit, write all qwords
1181 * but 0, then set qword 0
1183 unused_update[0] = 0;
1184 entry_set(writer, entry, unused_update, 0, 1);
1185 entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
1186 entry_set(writer, entry, target, 0, 1);
1187 } else {
1189 * No inuse bit changed. Sanity check that all unused bits are 0
1190 * in the entry. The target was already sanity checked by
1191 * compute_qword_diff().
1193 WARN_ON_ONCE(
1194 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
1197 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
1199 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1200 int ssid, bool leaf)
1202 size_t i;
1203 struct arm_smmu_cmdq_batch cmds;
1204 struct arm_smmu_device *smmu = master->smmu;
1205 struct arm_smmu_cmdq_ent cmd = {
1206 .opcode = CMDQ_OP_CFGI_CD,
1207 .cfgi = {
1208 .ssid = ssid,
1209 .leaf = leaf,
1213 arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
1214 for (i = 0; i < master->num_streams; i++) {
1215 cmd.cfgi.sid = master->streams[i].id;
1216 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1219 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1222 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
1223 dma_addr_t l2ptr_dma)
1225 u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
1227 /* The HW has 64 bit atomicity with stores to the L2 CD table */
1228 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1231 static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
1233 return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
1236 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
1237 u32 ssid)
1239 struct arm_smmu_cdtab_l2 *l2;
1240 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1242 if (!arm_smmu_cdtab_allocated(cd_table))
1243 return NULL;
1245 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1246 return &cd_table->linear.table[ssid];
1248 l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
1249 if (!l2)
1250 return NULL;
1251 return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
1254 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
1255 u32 ssid)
1257 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1258 struct arm_smmu_device *smmu = master->smmu;
1260 might_sleep();
1261 iommu_group_mutex_assert(master->dev);
1263 if (!arm_smmu_cdtab_allocated(cd_table)) {
1264 if (arm_smmu_alloc_cd_tables(master))
1265 return NULL;
1268 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
1269 unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
1270 struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
1272 if (!*l2ptr) {
1273 dma_addr_t l2ptr_dma;
1275 *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
1276 &l2ptr_dma, GFP_KERNEL);
1277 if (!*l2ptr)
1278 return NULL;
1280 arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
1281 l2ptr_dma);
1282 /* An invalid L1CD can be cached */
1283 arm_smmu_sync_cd(master, ssid, false);
1286 return arm_smmu_get_cd_ptr(master, ssid);
1289 struct arm_smmu_cd_writer {
1290 struct arm_smmu_entry_writer writer;
1291 unsigned int ssid;
1294 VISIBLE_IF_KUNIT
1295 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
1297 used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
1298 if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
1299 return;
1300 memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
1303 * If EPD0 is set by the make function it means
1304 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1306 if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
1307 used_bits[0] &= ~cpu_to_le64(
1308 CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
1309 CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
1310 CTXDESC_CD_0_TCR_SH0);
1311 used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
1314 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
1316 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1318 struct arm_smmu_cd_writer *cd_writer =
1319 container_of(writer, struct arm_smmu_cd_writer, writer);
1321 arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
1324 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
1325 .sync = arm_smmu_cd_writer_sync_entry,
1326 .get_used = arm_smmu_get_cd_used,
1329 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
1330 struct arm_smmu_cd *cdptr,
1331 const struct arm_smmu_cd *target)
1333 bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1334 bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1335 struct arm_smmu_cd_writer cd_writer = {
1336 .writer = {
1337 .ops = &arm_smmu_cd_writer_ops,
1338 .master = master,
1340 .ssid = ssid,
1343 if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
1344 if (cur_valid)
1345 master->cd_table.used_ssids--;
1346 else
1347 master->cd_table.used_ssids++;
1350 arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
1353 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
1354 struct arm_smmu_master *master,
1355 struct arm_smmu_domain *smmu_domain)
1357 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
1358 const struct io_pgtable_cfg *pgtbl_cfg =
1359 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1360 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
1361 &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1363 memset(target, 0, sizeof(*target));
1365 target->data[0] = cpu_to_le64(
1366 FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1367 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1368 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1369 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1370 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1371 #ifdef __BIG_ENDIAN
1372 CTXDESC_CD_0_ENDI |
1373 #endif
1374 CTXDESC_CD_0_TCR_EPD1 |
1375 CTXDESC_CD_0_V |
1376 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1377 CTXDESC_CD_0_AA64 |
1378 (master->stall_enabled ? CTXDESC_CD_0_S : 0) |
1379 CTXDESC_CD_0_R |
1380 CTXDESC_CD_0_A |
1381 CTXDESC_CD_0_ASET |
1382 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
1385 /* To enable dirty flag update, set both Access flag and dirty state update */
1386 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
1387 target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
1388 CTXDESC_CD_0_TCR_HD);
1390 target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
1391 CTXDESC_CD_1_TTB0_MASK);
1392 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
1394 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
1396 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
1398 struct arm_smmu_cd target = {};
1399 struct arm_smmu_cd *cdptr;
1401 if (!arm_smmu_cdtab_allocated(&master->cd_table))
1402 return;
1403 cdptr = arm_smmu_get_cd_ptr(master, ssid);
1404 if (WARN_ON(!cdptr))
1405 return;
1406 arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
1409 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1411 int ret;
1412 size_t l1size;
1413 size_t max_contexts;
1414 struct arm_smmu_device *smmu = master->smmu;
1415 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1417 cd_table->s1cdmax = master->ssid_bits;
1418 max_contexts = 1 << cd_table->s1cdmax;
1420 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1421 max_contexts <= CTXDESC_L2_ENTRIES) {
1422 cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1423 cd_table->linear.num_ents = max_contexts;
1425 l1size = max_contexts * sizeof(struct arm_smmu_cd);
1426 cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
1427 &cd_table->cdtab_dma,
1428 GFP_KERNEL);
1429 if (!cd_table->linear.table)
1430 return -ENOMEM;
1431 } else {
1432 cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1433 cd_table->l2.num_l1_ents =
1434 DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
1436 cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
1437 sizeof(*cd_table->l2.l2ptrs),
1438 GFP_KERNEL);
1439 if (!cd_table->l2.l2ptrs)
1440 return -ENOMEM;
1442 l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
1443 cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
1444 &cd_table->cdtab_dma,
1445 GFP_KERNEL);
1446 if (!cd_table->l2.l2ptrs) {
1447 ret = -ENOMEM;
1448 goto err_free_l2ptrs;
1451 return 0;
1453 err_free_l2ptrs:
1454 kfree(cd_table->l2.l2ptrs);
1455 cd_table->l2.l2ptrs = NULL;
1456 return ret;
1459 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1461 int i;
1462 struct arm_smmu_device *smmu = master->smmu;
1463 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1465 if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
1466 for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
1467 if (!cd_table->l2.l2ptrs[i])
1468 continue;
1470 dma_free_coherent(smmu->dev,
1471 sizeof(*cd_table->l2.l2ptrs[i]),
1472 cd_table->l2.l2ptrs[i],
1473 arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
1475 kfree(cd_table->l2.l2ptrs);
1477 dma_free_coherent(smmu->dev,
1478 cd_table->l2.num_l1_ents *
1479 sizeof(struct arm_smmu_cdtab_l1),
1480 cd_table->l2.l1tab, cd_table->cdtab_dma);
1481 } else {
1482 dma_free_coherent(smmu->dev,
1483 cd_table->linear.num_ents *
1484 sizeof(struct arm_smmu_cd),
1485 cd_table->linear.table, cd_table->cdtab_dma);
1489 /* Stream table manipulation functions */
1490 static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
1491 dma_addr_t l2ptr_dma)
1493 u64 val = 0;
1495 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
1496 val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1498 /* The HW has 64 bit atomicity with stores to the L2 STE table */
1499 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1502 struct arm_smmu_ste_writer {
1503 struct arm_smmu_entry_writer writer;
1504 u32 sid;
1507 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1509 struct arm_smmu_ste_writer *ste_writer =
1510 container_of(writer, struct arm_smmu_ste_writer, writer);
1511 struct arm_smmu_cmdq_ent cmd = {
1512 .opcode = CMDQ_OP_CFGI_STE,
1513 .cfgi = {
1514 .sid = ste_writer->sid,
1515 .leaf = true,
1519 arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
1522 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
1523 .sync = arm_smmu_ste_writer_sync_entry,
1524 .get_used = arm_smmu_get_ste_used,
1527 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1528 struct arm_smmu_ste *ste,
1529 const struct arm_smmu_ste *target)
1531 struct arm_smmu_device *smmu = master->smmu;
1532 struct arm_smmu_ste_writer ste_writer = {
1533 .writer = {
1534 .ops = &arm_smmu_ste_writer_ops,
1535 .master = master,
1537 .sid = sid,
1540 arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
1542 /* It's likely that we'll want to use the new STE soon */
1543 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1544 struct arm_smmu_cmdq_ent
1545 prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1546 .prefetch = {
1547 .sid = sid,
1548 } };
1550 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1554 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1556 memset(target, 0, sizeof(*target));
1557 target->data[0] = cpu_to_le64(
1558 STRTAB_STE_0_V |
1559 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1561 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
1563 VISIBLE_IF_KUNIT
1564 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1565 struct arm_smmu_ste *target)
1567 memset(target, 0, sizeof(*target));
1568 target->data[0] = cpu_to_le64(
1569 STRTAB_STE_0_V |
1570 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1572 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1573 target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1574 STRTAB_STE_1_SHCFG_INCOMING));
1576 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
1578 VISIBLE_IF_KUNIT
1579 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1580 struct arm_smmu_master *master, bool ats_enabled,
1581 unsigned int s1dss)
1583 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1584 struct arm_smmu_device *smmu = master->smmu;
1586 memset(target, 0, sizeof(*target));
1587 target->data[0] = cpu_to_le64(
1588 STRTAB_STE_0_V |
1589 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1590 FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1591 (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1592 FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1594 target->data[1] = cpu_to_le64(
1595 FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
1596 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1597 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1598 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1599 ((smmu->features & ARM_SMMU_FEAT_STALLS &&
1600 !master->stall_enabled) ?
1601 STRTAB_STE_1_S1STALLD :
1602 0) |
1603 FIELD_PREP(STRTAB_STE_1_EATS,
1604 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1606 if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
1607 s1dss == STRTAB_STE_1_S1DSS_BYPASS)
1608 target->data[1] |= cpu_to_le64(FIELD_PREP(
1609 STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1611 if (smmu->features & ARM_SMMU_FEAT_E2H) {
1613 * To support BTM the streamworld needs to match the
1614 * configuration of the CPU so that the ASID broadcasts are
1615 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1616 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1617 * PASID this should always use a BTM compatible configuration
1618 * if the HW supports it.
1620 target->data[1] |= cpu_to_le64(
1621 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1622 } else {
1623 target->data[1] |= cpu_to_le64(
1624 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1627 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1628 * arm_smmu_domain_alloc_id()
1630 target->data[2] =
1631 cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1634 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
1636 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1637 struct arm_smmu_master *master,
1638 struct arm_smmu_domain *smmu_domain,
1639 bool ats_enabled)
1641 struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1642 const struct io_pgtable_cfg *pgtbl_cfg =
1643 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1644 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1645 &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1646 u64 vtcr_val;
1647 struct arm_smmu_device *smmu = master->smmu;
1649 memset(target, 0, sizeof(*target));
1650 target->data[0] = cpu_to_le64(
1651 STRTAB_STE_0_V |
1652 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1654 target->data[1] = cpu_to_le64(
1655 FIELD_PREP(STRTAB_STE_1_EATS,
1656 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1658 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
1659 target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
1660 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1661 target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1662 STRTAB_STE_1_SHCFG_INCOMING));
1664 vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1665 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1666 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1667 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1668 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1669 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1670 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1671 target->data[2] = cpu_to_le64(
1672 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1673 FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1674 STRTAB_STE_2_S2AA64 |
1675 #ifdef __BIG_ENDIAN
1676 STRTAB_STE_2_S2ENDI |
1677 #endif
1678 STRTAB_STE_2_S2PTW |
1679 (master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
1680 STRTAB_STE_2_S2R);
1682 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1683 STRTAB_STE_3_S2TTB_MASK);
1685 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
1688 * This can safely directly manipulate the STE memory without a sync sequence
1689 * because the STE table has not been installed in the SMMU yet.
1691 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1692 unsigned int nent)
1694 unsigned int i;
1696 for (i = 0; i < nent; ++i) {
1697 arm_smmu_make_abort_ste(strtab);
1698 strtab++;
1702 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1704 dma_addr_t l2ptr_dma;
1705 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1706 struct arm_smmu_strtab_l2 **l2table;
1708 l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
1709 if (*l2table)
1710 return 0;
1712 *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
1713 &l2ptr_dma, GFP_KERNEL);
1714 if (!*l2table) {
1715 dev_err(smmu->dev,
1716 "failed to allocate l2 stream table for SID %u\n",
1717 sid);
1718 return -ENOMEM;
1721 arm_smmu_init_initial_stes((*l2table)->stes,
1722 ARRAY_SIZE((*l2table)->stes));
1723 arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
1724 l2ptr_dma);
1725 return 0;
1728 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
1730 struct arm_smmu_stream *stream_rhs =
1731 rb_entry(rhs, struct arm_smmu_stream, node);
1732 const u32 *sid_lhs = lhs;
1734 if (*sid_lhs < stream_rhs->id)
1735 return -1;
1736 if (*sid_lhs > stream_rhs->id)
1737 return 1;
1738 return 0;
1741 static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
1742 const struct rb_node *rhs)
1744 return arm_smmu_streams_cmp_key(
1745 &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
1748 static struct arm_smmu_master *
1749 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1751 struct rb_node *node;
1753 lockdep_assert_held(&smmu->streams_mutex);
1755 node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
1756 if (!node)
1757 return NULL;
1758 return rb_entry(node, struct arm_smmu_stream, node)->master;
1761 /* IRQ and event handlers */
1762 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1764 int ret = 0;
1765 u32 perm = 0;
1766 struct arm_smmu_master *master;
1767 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1768 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1769 struct iopf_fault fault_evt = { };
1770 struct iommu_fault *flt = &fault_evt.fault;
1772 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1773 case EVT_ID_TRANSLATION_FAULT:
1774 case EVT_ID_ADDR_SIZE_FAULT:
1775 case EVT_ID_ACCESS_FAULT:
1776 case EVT_ID_PERMISSION_FAULT:
1777 break;
1778 default:
1779 return -EOPNOTSUPP;
1782 if (!(evt[1] & EVTQ_1_STALL))
1783 return -EOPNOTSUPP;
1785 if (evt[1] & EVTQ_1_RnW)
1786 perm |= IOMMU_FAULT_PERM_READ;
1787 else
1788 perm |= IOMMU_FAULT_PERM_WRITE;
1790 if (evt[1] & EVTQ_1_InD)
1791 perm |= IOMMU_FAULT_PERM_EXEC;
1793 if (evt[1] & EVTQ_1_PnU)
1794 perm |= IOMMU_FAULT_PERM_PRIV;
1796 flt->type = IOMMU_FAULT_PAGE_REQ;
1797 flt->prm = (struct iommu_fault_page_request) {
1798 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1799 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1800 .perm = perm,
1801 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1804 if (ssid_valid) {
1805 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1806 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1809 mutex_lock(&smmu->streams_mutex);
1810 master = arm_smmu_find_master(smmu, sid);
1811 if (!master) {
1812 ret = -EINVAL;
1813 goto out_unlock;
1816 ret = iommu_report_device_fault(master->dev, &fault_evt);
1817 out_unlock:
1818 mutex_unlock(&smmu->streams_mutex);
1819 return ret;
1822 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1824 int i, ret;
1825 struct arm_smmu_device *smmu = dev;
1826 struct arm_smmu_queue *q = &smmu->evtq.q;
1827 struct arm_smmu_ll_queue *llq = &q->llq;
1828 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1829 DEFAULT_RATELIMIT_BURST);
1830 u64 evt[EVTQ_ENT_DWORDS];
1832 do {
1833 while (!queue_remove_raw(q, evt)) {
1834 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1836 ret = arm_smmu_handle_evt(smmu, evt);
1837 if (!ret || !__ratelimit(&rs))
1838 continue;
1840 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1841 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1842 dev_info(smmu->dev, "\t0x%016llx\n",
1843 (unsigned long long)evt[i]);
1845 cond_resched();
1849 * Not much we can do on overflow, so scream and pretend we're
1850 * trying harder.
1852 if (queue_sync_prod_in(q) == -EOVERFLOW)
1853 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1854 } while (!queue_empty(llq));
1856 /* Sync our overflow flag, as we believe we're up to speed */
1857 queue_sync_cons_ovf(q);
1858 return IRQ_HANDLED;
1861 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1863 u32 sid, ssid;
1864 u16 grpid;
1865 bool ssv, last;
1867 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1868 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1869 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1870 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1871 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1873 dev_info(smmu->dev, "unexpected PRI request received:\n");
1874 dev_info(smmu->dev,
1875 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1876 sid, ssid, grpid, last ? "L" : "",
1877 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1878 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1879 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1880 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1881 evt[1] & PRIQ_1_ADDR_MASK);
1883 if (last) {
1884 struct arm_smmu_cmdq_ent cmd = {
1885 .opcode = CMDQ_OP_PRI_RESP,
1886 .substream_valid = ssv,
1887 .pri = {
1888 .sid = sid,
1889 .ssid = ssid,
1890 .grpid = grpid,
1891 .resp = PRI_RESP_DENY,
1895 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1899 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1901 struct arm_smmu_device *smmu = dev;
1902 struct arm_smmu_queue *q = &smmu->priq.q;
1903 struct arm_smmu_ll_queue *llq = &q->llq;
1904 u64 evt[PRIQ_ENT_DWORDS];
1906 do {
1907 while (!queue_remove_raw(q, evt))
1908 arm_smmu_handle_ppr(smmu, evt);
1910 if (queue_sync_prod_in(q) == -EOVERFLOW)
1911 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1912 } while (!queue_empty(llq));
1914 /* Sync our overflow flag, as we believe we're up to speed */
1915 queue_sync_cons_ovf(q);
1916 return IRQ_HANDLED;
1919 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1921 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1923 u32 gerror, gerrorn, active;
1924 struct arm_smmu_device *smmu = dev;
1926 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1927 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1929 active = gerror ^ gerrorn;
1930 if (!(active & GERROR_ERR_MASK))
1931 return IRQ_NONE; /* No errors pending */
1933 dev_warn(smmu->dev,
1934 "unexpected global error reported (0x%08x), this could be serious\n",
1935 active);
1937 if (active & GERROR_SFM_ERR) {
1938 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1939 arm_smmu_device_disable(smmu);
1942 if (active & GERROR_MSI_GERROR_ABT_ERR)
1943 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1945 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1946 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1948 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1949 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1951 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1952 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1954 if (active & GERROR_PRIQ_ABT_ERR)
1955 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1957 if (active & GERROR_EVTQ_ABT_ERR)
1958 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1960 if (active & GERROR_CMDQ_ERR)
1961 arm_smmu_cmdq_skip_err(smmu);
1963 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1964 return IRQ_HANDLED;
1967 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1969 struct arm_smmu_device *smmu = dev;
1971 arm_smmu_evtq_thread(irq, dev);
1972 if (smmu->features & ARM_SMMU_FEAT_PRI)
1973 arm_smmu_priq_thread(irq, dev);
1975 return IRQ_HANDLED;
1978 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1980 arm_smmu_gerror_handler(irq, dev);
1981 return IRQ_WAKE_THREAD;
1984 static void
1985 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1986 struct arm_smmu_cmdq_ent *cmd)
1988 size_t log2_span;
1989 size_t span_mask;
1990 /* ATC invalidates are always on 4096-bytes pages */
1991 size_t inval_grain_shift = 12;
1992 unsigned long page_start, page_end;
1995 * ATS and PASID:
1997 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1998 * prefix. In that case all ATC entries within the address range are
1999 * invalidated, including those that were requested with a PASID! There
2000 * is no way to invalidate only entries without PASID.
2002 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
2003 * traffic), translation requests without PASID create ATC entries
2004 * without PASID, which must be invalidated with substream_valid clear.
2005 * This has the unpleasant side-effect of invalidating all PASID-tagged
2006 * ATC entries within the address range.
2008 *cmd = (struct arm_smmu_cmdq_ent) {
2009 .opcode = CMDQ_OP_ATC_INV,
2010 .substream_valid = (ssid != IOMMU_NO_PASID),
2011 .atc.ssid = ssid,
2014 if (!size) {
2015 cmd->atc.size = ATC_INV_SIZE_ALL;
2016 return;
2019 page_start = iova >> inval_grain_shift;
2020 page_end = (iova + size - 1) >> inval_grain_shift;
2023 * In an ATS Invalidate Request, the address must be aligned on the
2024 * range size, which must be a power of two number of page sizes. We
2025 * thus have to choose between grossly over-invalidating the region, or
2026 * splitting the invalidation into multiple commands. For simplicity
2027 * we'll go with the first solution, but should refine it in the future
2028 * if multiple commands are shown to be more efficient.
2030 * Find the smallest power of two that covers the range. The most
2031 * significant differing bit between the start and end addresses,
2032 * fls(start ^ end), indicates the required span. For example:
2034 * We want to invalidate pages [8; 11]. This is already the ideal range:
2035 * x = 0b1000 ^ 0b1011 = 0b11
2036 * span = 1 << fls(x) = 4
2038 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2039 * x = 0b0111 ^ 0b1010 = 0b1101
2040 * span = 1 << fls(x) = 16
2042 log2_span = fls_long(page_start ^ page_end);
2043 span_mask = (1ULL << log2_span) - 1;
2045 page_start &= ~span_mask;
2047 cmd->atc.addr = page_start << inval_grain_shift;
2048 cmd->atc.size = log2_span;
2051 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
2052 ioasid_t ssid)
2054 int i;
2055 struct arm_smmu_cmdq_ent cmd;
2056 struct arm_smmu_cmdq_batch cmds;
2058 arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
2060 arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
2061 for (i = 0; i < master->num_streams; i++) {
2062 cmd.atc.sid = master->streams[i].id;
2063 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
2066 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
2069 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2070 unsigned long iova, size_t size)
2072 struct arm_smmu_master_domain *master_domain;
2073 int i;
2074 unsigned long flags;
2075 struct arm_smmu_cmdq_ent cmd = {
2076 .opcode = CMDQ_OP_ATC_INV,
2078 struct arm_smmu_cmdq_batch cmds;
2080 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2081 return 0;
2084 * Ensure that we've completed prior invalidation of the main TLBs
2085 * before we read 'nr_ats_masters' in case of a concurrent call to
2086 * arm_smmu_enable_ats():
2088 * // unmap() // arm_smmu_enable_ats()
2089 * TLBI+SYNC atomic_inc(&nr_ats_masters);
2090 * smp_mb(); [...]
2091 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2093 * Ensures that we always see the incremented 'nr_ats_masters' count if
2094 * ATS was enabled at the PCI device before completion of the TLBI.
2096 smp_mb();
2097 if (!atomic_read(&smmu_domain->nr_ats_masters))
2098 return 0;
2100 arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
2102 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2103 list_for_each_entry(master_domain, &smmu_domain->devices,
2104 devices_elm) {
2105 struct arm_smmu_master *master = master_domain->master;
2107 if (!master->ats_enabled)
2108 continue;
2110 if (master_domain->nested_ats_flush) {
2112 * If a S2 used as a nesting parent is changed we have
2113 * no option but to completely flush the ATC.
2115 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
2116 } else {
2117 arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
2118 &cmd);
2121 for (i = 0; i < master->num_streams; i++) {
2122 cmd.atc.sid = master->streams[i].id;
2123 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2126 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2128 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2131 /* IO_PGTABLE API */
2132 static void arm_smmu_tlb_inv_context(void *cookie)
2134 struct arm_smmu_domain *smmu_domain = cookie;
2135 struct arm_smmu_device *smmu = smmu_domain->smmu;
2136 struct arm_smmu_cmdq_ent cmd;
2139 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2140 * PTEs previously cleared by unmaps on the current CPU not yet visible
2141 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2142 * insertion to guarantee those are observed before the TLBI. Do be
2143 * careful, 007.
2145 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2146 arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2147 } else {
2148 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
2149 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2150 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2152 arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
2155 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2156 unsigned long iova, size_t size,
2157 size_t granule,
2158 struct arm_smmu_domain *smmu_domain)
2160 struct arm_smmu_device *smmu = smmu_domain->smmu;
2161 unsigned long end = iova + size, num_pages = 0, tg = 0;
2162 size_t inv_range = granule;
2163 struct arm_smmu_cmdq_batch cmds;
2165 if (!size)
2166 return;
2168 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2169 /* Get the leaf page size */
2170 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2172 num_pages = size >> tg;
2174 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
2175 cmd->tlbi.tg = (tg - 10) / 2;
2178 * Determine what level the granule is at. For non-leaf, both
2179 * io-pgtable and SVA pass a nominal last-level granule because
2180 * they don't know what level(s) actually apply, so ignore that
2181 * and leave TTL=0. However for various errata reasons we still
2182 * want to use a range command, so avoid the SVA corner case
2183 * where both scale and num could be 0 as well.
2185 if (cmd->tlbi.leaf)
2186 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2187 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2188 num_pages++;
2191 arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
2193 while (iova < end) {
2194 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2196 * On each iteration of the loop, the range is 5 bits
2197 * worth of the aligned size remaining.
2198 * The range in pages is:
2200 * range = (num_pages & (0x1f << __ffs(num_pages)))
2202 unsigned long scale, num;
2204 /* Determine the power of 2 multiple number of pages */
2205 scale = __ffs(num_pages);
2206 cmd->tlbi.scale = scale;
2208 /* Determine how many chunks of 2^scale size we have */
2209 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2210 cmd->tlbi.num = num - 1;
2212 /* range is num * 2^scale * pgsize */
2213 inv_range = num << (scale + tg);
2215 /* Clear out the lower order bits for the next iteration */
2216 num_pages -= num << scale;
2219 cmd->tlbi.addr = iova;
2220 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2221 iova += inv_range;
2223 arm_smmu_cmdq_batch_submit(smmu, &cmds);
2226 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2227 size_t granule, bool leaf,
2228 struct arm_smmu_domain *smmu_domain)
2230 struct arm_smmu_cmdq_ent cmd = {
2231 .tlbi = {
2232 .leaf = leaf,
2236 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2237 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2238 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2239 cmd.tlbi.asid = smmu_domain->cd.asid;
2240 } else {
2241 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
2242 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2244 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2246 if (smmu_domain->nest_parent) {
2248 * When the S2 domain changes all the nested S1 ASIDs have to be
2249 * flushed too.
2251 cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
2252 arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
2256 * Unfortunately, this can't be leaf-only since we may have
2257 * zapped an entire table.
2259 arm_smmu_atc_inv_domain(smmu_domain, iova, size);
2262 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2263 size_t granule, bool leaf,
2264 struct arm_smmu_domain *smmu_domain)
2266 struct arm_smmu_cmdq_ent cmd = {
2267 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2268 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2269 .tlbi = {
2270 .asid = asid,
2271 .leaf = leaf,
2275 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2278 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2279 unsigned long iova, size_t granule,
2280 void *cookie)
2282 struct arm_smmu_domain *smmu_domain = cookie;
2283 struct iommu_domain *domain = &smmu_domain->domain;
2285 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2288 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2289 size_t granule, void *cookie)
2291 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2294 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2295 .tlb_flush_all = arm_smmu_tlb_inv_context,
2296 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2297 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
2300 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
2302 u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
2304 return (smmu->features & features) == features;
2307 /* IOMMU API */
2308 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2310 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2312 switch (cap) {
2313 case IOMMU_CAP_CACHE_COHERENCY:
2314 /* Assume that a coherent TCU implies coherent TBUs */
2315 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2316 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
2317 return arm_smmu_master_canwbs(master);
2318 case IOMMU_CAP_NOEXEC:
2319 case IOMMU_CAP_DEFERRED_FLUSH:
2320 return true;
2321 case IOMMU_CAP_DIRTY_TRACKING:
2322 return arm_smmu_dbm_capable(master->smmu);
2323 default:
2324 return false;
2328 static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
2330 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2331 struct arm_smmu_master_domain *master_domain;
2332 unsigned long flags;
2333 bool ret = true;
2335 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2336 list_for_each_entry(master_domain, &smmu_domain->devices,
2337 devices_elm) {
2338 if (!arm_smmu_master_canwbs(master_domain->master)) {
2339 ret = false;
2340 break;
2343 smmu_domain->enforce_cache_coherency = ret;
2344 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2345 return ret;
2348 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
2350 struct arm_smmu_domain *smmu_domain;
2352 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2353 if (!smmu_domain)
2354 return ERR_PTR(-ENOMEM);
2356 mutex_init(&smmu_domain->init_mutex);
2357 INIT_LIST_HEAD(&smmu_domain->devices);
2358 spin_lock_init(&smmu_domain->devices_lock);
2360 return smmu_domain;
2363 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2365 struct arm_smmu_domain *smmu_domain;
2368 * Allocate the domain and initialise some of its data structures.
2369 * We can't really do anything meaningful until we've added a
2370 * master.
2372 smmu_domain = arm_smmu_domain_alloc();
2373 if (IS_ERR(smmu_domain))
2374 return ERR_CAST(smmu_domain);
2376 if (dev) {
2377 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2378 int ret;
2380 ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0);
2381 if (ret) {
2382 kfree(smmu_domain);
2383 return ERR_PTR(ret);
2386 return &smmu_domain->domain;
2389 static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
2391 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2392 struct arm_smmu_device *smmu = smmu_domain->smmu;
2394 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2396 /* Free the ASID or VMID */
2397 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2398 /* Prevent SVA from touching the CD while we're freeing it */
2399 mutex_lock(&arm_smmu_asid_lock);
2400 xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
2401 mutex_unlock(&arm_smmu_asid_lock);
2402 } else {
2403 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2404 if (cfg->vmid)
2405 ida_free(&smmu->vmid_map, cfg->vmid);
2408 kfree(smmu_domain);
2411 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2412 struct arm_smmu_domain *smmu_domain)
2414 int ret;
2415 u32 asid = 0;
2416 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2418 /* Prevent SVA from modifying the ASID until it is written to the CD */
2419 mutex_lock(&arm_smmu_asid_lock);
2420 ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
2421 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2422 cd->asid = (u16)asid;
2423 mutex_unlock(&arm_smmu_asid_lock);
2424 return ret;
2427 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2428 struct arm_smmu_domain *smmu_domain)
2430 int vmid;
2431 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2433 /* Reserve VMID 0 for stage-2 bypass STEs */
2434 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2435 GFP_KERNEL);
2436 if (vmid < 0)
2437 return vmid;
2439 cfg->vmid = (u16)vmid;
2440 return 0;
2443 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2444 struct arm_smmu_device *smmu, u32 flags)
2446 int ret;
2447 enum io_pgtable_fmt fmt;
2448 struct io_pgtable_cfg pgtbl_cfg;
2449 struct io_pgtable_ops *pgtbl_ops;
2450 int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2451 struct arm_smmu_domain *smmu_domain);
2452 bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2454 /* Restrict the stage to what we can actually support */
2455 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2456 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2457 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2458 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2460 pgtbl_cfg = (struct io_pgtable_cfg) {
2461 .pgsize_bitmap = smmu->pgsize_bitmap,
2462 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2463 .tlb = &arm_smmu_flush_ops,
2464 .iommu_dev = smmu->dev,
2467 switch (smmu_domain->stage) {
2468 case ARM_SMMU_DOMAIN_S1: {
2469 unsigned long ias = (smmu->features &
2470 ARM_SMMU_FEAT_VAX) ? 52 : 48;
2472 pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
2473 pgtbl_cfg.oas = smmu->ias;
2474 if (enable_dirty)
2475 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
2476 fmt = ARM_64_LPAE_S1;
2477 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2478 break;
2480 case ARM_SMMU_DOMAIN_S2:
2481 if (enable_dirty)
2482 return -EOPNOTSUPP;
2483 pgtbl_cfg.ias = smmu->ias;
2484 pgtbl_cfg.oas = smmu->oas;
2485 fmt = ARM_64_LPAE_S2;
2486 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2487 if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
2488 (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
2489 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
2490 break;
2491 default:
2492 return -EINVAL;
2495 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2496 if (!pgtbl_ops)
2497 return -ENOMEM;
2499 smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2500 smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2501 smmu_domain->domain.geometry.force_aperture = true;
2502 if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
2503 smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
2505 ret = finalise_stage_fn(smmu, smmu_domain);
2506 if (ret < 0) {
2507 free_io_pgtable_ops(pgtbl_ops);
2508 return ret;
2511 smmu_domain->pgtbl_ops = pgtbl_ops;
2512 smmu_domain->smmu = smmu;
2513 return 0;
2516 static struct arm_smmu_ste *
2517 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2519 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2521 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2522 /* Two-level walk */
2523 return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
2524 ->stes[arm_smmu_strtab_l2_idx(sid)];
2525 } else {
2526 /* Simple linear lookup */
2527 return &cfg->linear.table[sid];
2531 void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2532 const struct arm_smmu_ste *target)
2534 int i, j;
2535 struct arm_smmu_device *smmu = master->smmu;
2537 master->cd_table.in_ste =
2538 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
2539 STRTAB_STE_0_CFG_S1_TRANS;
2540 master->ste_ats_enabled =
2541 FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
2542 STRTAB_STE_1_EATS_TRANS;
2544 for (i = 0; i < master->num_streams; ++i) {
2545 u32 sid = master->streams[i].id;
2546 struct arm_smmu_ste *step =
2547 arm_smmu_get_step_for_sid(smmu, sid);
2549 /* Bridged PCI devices may end up with duplicated IDs */
2550 for (j = 0; j < i; j++)
2551 if (master->streams[j].id == sid)
2552 break;
2553 if (j < i)
2554 continue;
2556 arm_smmu_write_ste(master, sid, step, target);
2560 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2562 struct device *dev = master->dev;
2563 struct arm_smmu_device *smmu = master->smmu;
2564 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2566 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2567 return false;
2569 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2570 return false;
2572 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2575 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2577 size_t stu;
2578 struct pci_dev *pdev;
2579 struct arm_smmu_device *smmu = master->smmu;
2581 /* Smallest Translation Unit: log2 of the smallest supported granule */
2582 stu = __ffs(smmu->pgsize_bitmap);
2583 pdev = to_pci_dev(master->dev);
2586 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2588 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2589 if (pci_enable_ats(pdev, stu))
2590 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2593 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2595 int ret;
2596 int features;
2597 int num_pasids;
2598 struct pci_dev *pdev;
2600 if (!dev_is_pci(master->dev))
2601 return -ENODEV;
2603 pdev = to_pci_dev(master->dev);
2605 features = pci_pasid_features(pdev);
2606 if (features < 0)
2607 return features;
2609 num_pasids = pci_max_pasids(pdev);
2610 if (num_pasids <= 0)
2611 return num_pasids;
2613 ret = pci_enable_pasid(pdev, features);
2614 if (ret) {
2615 dev_err(&pdev->dev, "Failed to enable PASID\n");
2616 return ret;
2619 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2620 master->smmu->ssid_bits);
2621 return 0;
2624 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2626 struct pci_dev *pdev;
2628 if (!dev_is_pci(master->dev))
2629 return;
2631 pdev = to_pci_dev(master->dev);
2633 if (!pdev->pasid_enabled)
2634 return;
2636 master->ssid_bits = 0;
2637 pci_disable_pasid(pdev);
2640 static struct arm_smmu_master_domain *
2641 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
2642 struct arm_smmu_master *master,
2643 ioasid_t ssid, bool nested_ats_flush)
2645 struct arm_smmu_master_domain *master_domain;
2647 lockdep_assert_held(&smmu_domain->devices_lock);
2649 list_for_each_entry(master_domain, &smmu_domain->devices,
2650 devices_elm) {
2651 if (master_domain->master == master &&
2652 master_domain->ssid == ssid &&
2653 master_domain->nested_ats_flush == nested_ats_flush)
2654 return master_domain;
2656 return NULL;
2660 * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
2661 * structure, otherwise NULL. These domains track attached devices so they can
2662 * issue invalidations.
2664 static struct arm_smmu_domain *
2665 to_smmu_domain_devices(struct iommu_domain *domain)
2667 /* The domain can be NULL only when processing the first attach */
2668 if (!domain)
2669 return NULL;
2670 if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
2671 domain->type == IOMMU_DOMAIN_SVA)
2672 return to_smmu_domain(domain);
2673 if (domain->type == IOMMU_DOMAIN_NESTED)
2674 return to_smmu_nested_domain(domain)->vsmmu->s2_parent;
2675 return NULL;
2678 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
2679 struct iommu_domain *domain,
2680 ioasid_t ssid)
2682 struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
2683 struct arm_smmu_master_domain *master_domain;
2684 bool nested_ats_flush = false;
2685 unsigned long flags;
2687 if (!smmu_domain)
2688 return;
2690 if (domain->type == IOMMU_DOMAIN_NESTED)
2691 nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
2693 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2694 master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid,
2695 nested_ats_flush);
2696 if (master_domain) {
2697 list_del(&master_domain->devices_elm);
2698 kfree(master_domain);
2699 if (master->ats_enabled)
2700 atomic_dec(&smmu_domain->nr_ats_masters);
2702 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2706 * Start the sequence to attach a domain to a master. The sequence contains three
2707 * steps:
2708 * arm_smmu_attach_prepare()
2709 * arm_smmu_install_ste_for_dev()
2710 * arm_smmu_attach_commit()
2712 * If prepare succeeds then the sequence must be completed. The STE installed
2713 * must set the STE.EATS field according to state.ats_enabled.
2715 * If the device supports ATS then this determines if EATS should be enabled
2716 * in the STE, and starts sequencing EATS disable if required.
2718 * The change of the EATS in the STE and the PCI ATS config space is managed by
2719 * this sequence to be in the right order so that if PCI ATS is enabled then
2720 * STE.ETAS is enabled.
2722 * new_domain can be a non-paging domain. In this case ATS will not be enabled,
2723 * and invalidations won't be tracked.
2725 int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
2726 struct iommu_domain *new_domain)
2728 struct arm_smmu_master *master = state->master;
2729 struct arm_smmu_master_domain *master_domain;
2730 struct arm_smmu_domain *smmu_domain =
2731 to_smmu_domain_devices(new_domain);
2732 unsigned long flags;
2735 * arm_smmu_share_asid() must not see two domains pointing to the same
2736 * arm_smmu_master_domain contents otherwise it could randomly write one
2737 * or the other to the CD.
2739 lockdep_assert_held(&arm_smmu_asid_lock);
2741 if (smmu_domain || state->cd_needs_ats) {
2743 * The SMMU does not support enabling ATS with bypass/abort.
2744 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
2745 * Translation Requests and Translated transactions are denied
2746 * as though ATS is disabled for the stream (STE.EATS == 0b00),
2747 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
2748 * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
2749 * enabled if we have arm_smmu_domain, those always have page
2750 * tables.
2752 state->ats_enabled = !state->disable_ats &&
2753 arm_smmu_ats_supported(master);
2756 if (smmu_domain) {
2757 master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
2758 if (!master_domain)
2759 return -ENOMEM;
2760 master_domain->master = master;
2761 master_domain->ssid = state->ssid;
2762 if (new_domain->type == IOMMU_DOMAIN_NESTED)
2763 master_domain->nested_ats_flush =
2764 to_smmu_nested_domain(new_domain)->enable_ats;
2767 * During prepare we want the current smmu_domain and new
2768 * smmu_domain to be in the devices list before we change any
2769 * HW. This ensures that both domains will send ATS
2770 * invalidations to the master until we are done.
2772 * It is tempting to make this list only track masters that are
2773 * using ATS, but arm_smmu_share_asid() also uses this to change
2774 * the ASID of a domain, unrelated to ATS.
2776 * Notice if we are re-attaching the same domain then the list
2777 * will have two identical entries and commit will remove only
2778 * one of them.
2780 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2781 if (smmu_domain->enforce_cache_coherency &&
2782 !arm_smmu_master_canwbs(master)) {
2783 spin_unlock_irqrestore(&smmu_domain->devices_lock,
2784 flags);
2785 kfree(master_domain);
2786 return -EINVAL;
2789 if (state->ats_enabled)
2790 atomic_inc(&smmu_domain->nr_ats_masters);
2791 list_add(&master_domain->devices_elm, &smmu_domain->devices);
2792 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2795 if (!state->ats_enabled && master->ats_enabled) {
2796 pci_disable_ats(to_pci_dev(master->dev));
2798 * This is probably overkill, but the config write for disabling
2799 * ATS should complete before the STE is configured to generate
2800 * UR to avoid AER noise.
2802 wmb();
2804 return 0;
2808 * Commit is done after the STE/CD are configured with the EATS setting. It
2809 * completes synchronizing the PCI device's ATC and finishes manipulating the
2810 * smmu_domain->devices list.
2812 void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
2814 struct arm_smmu_master *master = state->master;
2816 lockdep_assert_held(&arm_smmu_asid_lock);
2818 if (state->ats_enabled && !master->ats_enabled) {
2819 arm_smmu_enable_ats(master);
2820 } else if (state->ats_enabled && master->ats_enabled) {
2822 * The translation has changed, flush the ATC. At this point the
2823 * SMMU is translating for the new domain and both the old&new
2824 * domain will issue invalidations.
2826 arm_smmu_atc_inv_master(master, state->ssid);
2827 } else if (!state->ats_enabled && master->ats_enabled) {
2828 /* ATS is being switched off, invalidate the entire ATC */
2829 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2831 master->ats_enabled = state->ats_enabled;
2833 arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
2836 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2838 int ret = 0;
2839 struct arm_smmu_ste target;
2840 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2841 struct arm_smmu_device *smmu;
2842 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2843 struct arm_smmu_attach_state state = {
2844 .old_domain = iommu_get_domain_for_dev(dev),
2845 .ssid = IOMMU_NO_PASID,
2847 struct arm_smmu_master *master;
2848 struct arm_smmu_cd *cdptr;
2850 if (!fwspec)
2851 return -ENOENT;
2853 state.master = master = dev_iommu_priv_get(dev);
2854 smmu = master->smmu;
2856 mutex_lock(&smmu_domain->init_mutex);
2858 if (!smmu_domain->smmu) {
2859 ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2860 } else if (smmu_domain->smmu != smmu)
2861 ret = -EINVAL;
2863 mutex_unlock(&smmu_domain->init_mutex);
2864 if (ret)
2865 return ret;
2867 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2868 cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
2869 if (!cdptr)
2870 return -ENOMEM;
2871 } else if (arm_smmu_ssids_in_use(&master->cd_table))
2872 return -EBUSY;
2875 * Prevent arm_smmu_share_asid() from trying to change the ASID
2876 * of either the old or new domain while we are working on it.
2877 * This allows the STE and the smmu_domain->devices list to
2878 * be inconsistent during this routine.
2880 mutex_lock(&arm_smmu_asid_lock);
2882 ret = arm_smmu_attach_prepare(&state, domain);
2883 if (ret) {
2884 mutex_unlock(&arm_smmu_asid_lock);
2885 return ret;
2888 switch (smmu_domain->stage) {
2889 case ARM_SMMU_DOMAIN_S1: {
2890 struct arm_smmu_cd target_cd;
2892 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2893 arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
2894 &target_cd);
2895 arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
2896 STRTAB_STE_1_S1DSS_SSID0);
2897 arm_smmu_install_ste_for_dev(master, &target);
2898 break;
2900 case ARM_SMMU_DOMAIN_S2:
2901 arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
2902 state.ats_enabled);
2903 arm_smmu_install_ste_for_dev(master, &target);
2904 arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2905 break;
2908 arm_smmu_attach_commit(&state);
2909 mutex_unlock(&arm_smmu_asid_lock);
2910 return 0;
2913 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
2914 struct device *dev, ioasid_t id,
2915 struct iommu_domain *old)
2917 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2918 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2919 struct arm_smmu_device *smmu = master->smmu;
2920 struct arm_smmu_cd target_cd;
2921 int ret = 0;
2923 mutex_lock(&smmu_domain->init_mutex);
2924 if (!smmu_domain->smmu)
2925 ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2926 else if (smmu_domain->smmu != smmu)
2927 ret = -EINVAL;
2928 mutex_unlock(&smmu_domain->init_mutex);
2929 if (ret)
2930 return ret;
2932 if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
2933 return -EINVAL;
2936 * We can read cd.asid outside the lock because arm_smmu_set_pasid()
2937 * will fix it
2939 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2940 return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
2941 &target_cd, old);
2944 static void arm_smmu_update_ste(struct arm_smmu_master *master,
2945 struct iommu_domain *sid_domain,
2946 bool ats_enabled)
2948 unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
2949 struct arm_smmu_ste ste;
2951 if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
2952 return;
2954 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
2955 s1dss = STRTAB_STE_1_S1DSS_BYPASS;
2956 else
2957 WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
2960 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
2961 * using s1dss if necessary. If the cd_table is already installed then
2962 * the S1DSS is correct and this will just update the EATS. Otherwise it
2963 * installs the entire thing. This will be hitless.
2965 arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
2966 arm_smmu_install_ste_for_dev(master, &ste);
2969 int arm_smmu_set_pasid(struct arm_smmu_master *master,
2970 struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
2971 struct arm_smmu_cd *cd, struct iommu_domain *old)
2973 struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
2974 struct arm_smmu_attach_state state = {
2975 .master = master,
2976 .ssid = pasid,
2977 .old_domain = old,
2979 struct arm_smmu_cd *cdptr;
2980 int ret;
2982 /* The core code validates pasid */
2984 if (smmu_domain->smmu != master->smmu)
2985 return -EINVAL;
2987 if (!master->cd_table.in_ste &&
2988 sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
2989 sid_domain->type != IOMMU_DOMAIN_BLOCKED)
2990 return -EINVAL;
2992 cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
2993 if (!cdptr)
2994 return -ENOMEM;
2996 mutex_lock(&arm_smmu_asid_lock);
2997 ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
2998 if (ret)
2999 goto out_unlock;
3002 * We don't want to obtain to the asid_lock too early, so fix up the
3003 * caller set ASID under the lock in case it changed.
3005 cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
3006 cd->data[0] |= cpu_to_le64(
3007 FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
3009 arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
3010 arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
3012 arm_smmu_attach_commit(&state);
3014 out_unlock:
3015 mutex_unlock(&arm_smmu_asid_lock);
3016 return ret;
3019 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
3020 struct iommu_domain *domain)
3022 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3023 struct arm_smmu_domain *smmu_domain;
3025 smmu_domain = to_smmu_domain(domain);
3027 mutex_lock(&arm_smmu_asid_lock);
3028 arm_smmu_clear_cd(master, pasid);
3029 if (master->ats_enabled)
3030 arm_smmu_atc_inv_master(master, pasid);
3031 arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
3032 mutex_unlock(&arm_smmu_asid_lock);
3035 * When the last user of the CD table goes away downgrade the STE back
3036 * to a non-cd_table one.
3038 if (!arm_smmu_ssids_in_use(&master->cd_table)) {
3039 struct iommu_domain *sid_domain =
3040 iommu_get_domain_for_dev(master->dev);
3042 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
3043 sid_domain->type == IOMMU_DOMAIN_BLOCKED)
3044 sid_domain->ops->attach_dev(sid_domain, dev);
3048 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
3049 struct device *dev,
3050 struct arm_smmu_ste *ste,
3051 unsigned int s1dss)
3053 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3054 struct arm_smmu_attach_state state = {
3055 .master = master,
3056 .old_domain = iommu_get_domain_for_dev(dev),
3057 .ssid = IOMMU_NO_PASID,
3061 * Do not allow any ASID to be changed while are working on the STE,
3062 * otherwise we could miss invalidations.
3064 mutex_lock(&arm_smmu_asid_lock);
3067 * If the CD table is not in use we can use the provided STE, otherwise
3068 * we use a cdtable STE with the provided S1DSS.
3070 if (arm_smmu_ssids_in_use(&master->cd_table)) {
3072 * If a CD table has to be present then we need to run with ATS
3073 * on even though the RID will fail ATS queries with UR. This is
3074 * because we have no idea what the PASID's need.
3076 state.cd_needs_ats = true;
3077 arm_smmu_attach_prepare(&state, domain);
3078 arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
3079 } else {
3080 arm_smmu_attach_prepare(&state, domain);
3082 arm_smmu_install_ste_for_dev(master, ste);
3083 arm_smmu_attach_commit(&state);
3084 mutex_unlock(&arm_smmu_asid_lock);
3087 * This has to be done after removing the master from the
3088 * arm_smmu_domain->devices to avoid races updating the same context
3089 * descriptor from arm_smmu_share_asid().
3091 arm_smmu_clear_cd(master, IOMMU_NO_PASID);
3094 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
3095 struct device *dev)
3097 struct arm_smmu_ste ste;
3098 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3100 arm_smmu_make_bypass_ste(master->smmu, &ste);
3101 arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
3102 return 0;
3105 static const struct iommu_domain_ops arm_smmu_identity_ops = {
3106 .attach_dev = arm_smmu_attach_dev_identity,
3109 static struct iommu_domain arm_smmu_identity_domain = {
3110 .type = IOMMU_DOMAIN_IDENTITY,
3111 .ops = &arm_smmu_identity_ops,
3114 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
3115 struct device *dev)
3117 struct arm_smmu_ste ste;
3119 arm_smmu_make_abort_ste(&ste);
3120 arm_smmu_attach_dev_ste(domain, dev, &ste,
3121 STRTAB_STE_1_S1DSS_TERMINATE);
3122 return 0;
3125 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
3126 .attach_dev = arm_smmu_attach_dev_blocked,
3129 static struct iommu_domain arm_smmu_blocked_domain = {
3130 .type = IOMMU_DOMAIN_BLOCKED,
3131 .ops = &arm_smmu_blocked_ops,
3134 static struct iommu_domain *
3135 arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
3136 const struct iommu_user_data *user_data)
3138 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3139 const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
3140 IOMMU_HWPT_ALLOC_PASID |
3141 IOMMU_HWPT_ALLOC_NEST_PARENT;
3142 struct arm_smmu_domain *smmu_domain;
3143 int ret;
3145 if (flags & ~PAGING_FLAGS)
3146 return ERR_PTR(-EOPNOTSUPP);
3147 if (user_data)
3148 return ERR_PTR(-EOPNOTSUPP);
3150 if (flags & IOMMU_HWPT_ALLOC_PASID)
3151 return arm_smmu_domain_alloc_paging(dev);
3153 smmu_domain = arm_smmu_domain_alloc();
3154 if (IS_ERR(smmu_domain))
3155 return ERR_CAST(smmu_domain);
3157 if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) {
3158 if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) {
3159 ret = -EOPNOTSUPP;
3160 goto err_free;
3162 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
3163 smmu_domain->nest_parent = true;
3166 smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
3167 smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
3168 ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
3169 if (ret)
3170 goto err_free;
3171 return &smmu_domain->domain;
3173 err_free:
3174 kfree(smmu_domain);
3175 return ERR_PTR(ret);
3178 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
3179 phys_addr_t paddr, size_t pgsize, size_t pgcount,
3180 int prot, gfp_t gfp, size_t *mapped)
3182 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3184 if (!ops)
3185 return -ENODEV;
3187 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
3190 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
3191 size_t pgsize, size_t pgcount,
3192 struct iommu_iotlb_gather *gather)
3194 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3195 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3197 if (!ops)
3198 return 0;
3200 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
3203 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
3205 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3207 if (smmu_domain->smmu)
3208 arm_smmu_tlb_inv_context(smmu_domain);
3211 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
3212 struct iommu_iotlb_gather *gather)
3214 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3216 if (!gather->pgsize)
3217 return;
3219 arm_smmu_tlb_inv_range_domain(gather->start,
3220 gather->end - gather->start + 1,
3221 gather->pgsize, true, smmu_domain);
3224 static phys_addr_t
3225 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
3227 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3229 if (!ops)
3230 return 0;
3232 return ops->iova_to_phys(ops, iova);
3235 static struct platform_driver arm_smmu_driver;
3237 static
3238 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
3240 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
3241 fwnode);
3242 put_device(dev);
3243 return dev ? dev_get_drvdata(dev) : NULL;
3246 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
3248 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3249 return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
3250 return sid < smmu->strtab_cfg.linear.num_ents;
3253 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
3255 /* Check the SIDs are in range of the SMMU and our stream table */
3256 if (!arm_smmu_sid_in_range(smmu, sid))
3257 return -ERANGE;
3259 /* Ensure l2 strtab is initialised */
3260 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3261 return arm_smmu_init_l2_strtab(smmu, sid);
3263 return 0;
3266 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
3267 struct arm_smmu_master *master)
3269 int i;
3270 int ret = 0;
3271 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3273 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
3274 GFP_KERNEL);
3275 if (!master->streams)
3276 return -ENOMEM;
3277 master->num_streams = fwspec->num_ids;
3279 mutex_lock(&smmu->streams_mutex);
3280 for (i = 0; i < fwspec->num_ids; i++) {
3281 struct arm_smmu_stream *new_stream = &master->streams[i];
3282 u32 sid = fwspec->ids[i];
3284 new_stream->id = sid;
3285 new_stream->master = master;
3287 ret = arm_smmu_init_sid_strtab(smmu, sid);
3288 if (ret)
3289 break;
3291 /* Insert into SID tree */
3292 if (rb_find_add(&new_stream->node, &smmu->streams,
3293 arm_smmu_streams_cmp_node)) {
3294 dev_warn(master->dev, "stream %u already in tree\n",
3295 sid);
3296 ret = -EINVAL;
3297 break;
3301 if (ret) {
3302 for (i--; i >= 0; i--)
3303 rb_erase(&master->streams[i].node, &smmu->streams);
3304 kfree(master->streams);
3306 mutex_unlock(&smmu->streams_mutex);
3308 return ret;
3311 static void arm_smmu_remove_master(struct arm_smmu_master *master)
3313 int i;
3314 struct arm_smmu_device *smmu = master->smmu;
3315 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3317 if (!smmu || !master->streams)
3318 return;
3320 mutex_lock(&smmu->streams_mutex);
3321 for (i = 0; i < fwspec->num_ids; i++)
3322 rb_erase(&master->streams[i].node, &smmu->streams);
3323 mutex_unlock(&smmu->streams_mutex);
3325 kfree(master->streams);
3328 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
3330 int ret;
3331 struct arm_smmu_device *smmu;
3332 struct arm_smmu_master *master;
3333 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3335 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
3336 return ERR_PTR(-EBUSY);
3338 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
3339 if (!smmu)
3340 return ERR_PTR(-ENODEV);
3342 master = kzalloc(sizeof(*master), GFP_KERNEL);
3343 if (!master)
3344 return ERR_PTR(-ENOMEM);
3346 master->dev = dev;
3347 master->smmu = smmu;
3348 dev_iommu_priv_set(dev, master);
3350 ret = arm_smmu_insert_master(smmu, master);
3351 if (ret)
3352 goto err_free_master;
3354 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
3355 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
3358 * Note that PASID must be enabled before, and disabled after ATS:
3359 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
3361 * Behavior is undefined if this bit is Set and the value of the PASID
3362 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
3363 * are changed.
3365 arm_smmu_enable_pasid(master);
3367 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
3368 master->ssid_bits = min_t(u8, master->ssid_bits,
3369 CTXDESC_LINEAR_CDMAX);
3371 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
3372 device_property_read_bool(dev, "dma-can-stall")) ||
3373 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
3374 master->stall_enabled = true;
3376 if (dev_is_pci(dev)) {
3377 unsigned int stu = __ffs(smmu->pgsize_bitmap);
3379 pci_prepare_ats(to_pci_dev(dev), stu);
3382 return &smmu->iommu;
3384 err_free_master:
3385 kfree(master);
3386 return ERR_PTR(ret);
3389 static void arm_smmu_release_device(struct device *dev)
3391 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3393 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
3394 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
3396 /* Put the STE back to what arm_smmu_init_strtab() sets */
3397 if (dev->iommu->require_direct)
3398 arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
3399 else
3400 arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
3402 arm_smmu_disable_pasid(master);
3403 arm_smmu_remove_master(master);
3404 if (arm_smmu_cdtab_allocated(&master->cd_table))
3405 arm_smmu_free_cd_tables(master);
3406 kfree(master);
3409 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
3410 unsigned long iova, size_t size,
3411 unsigned long flags,
3412 struct iommu_dirty_bitmap *dirty)
3414 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3415 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3417 return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
3420 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
3421 bool enabled)
3424 * Always enabled and the dirty bitmap is cleared prior to
3425 * set_dirty_tracking().
3427 return 0;
3430 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3432 struct iommu_group *group;
3435 * We don't support devices sharing stream IDs other than PCI RID
3436 * aliases, since the necessary ID-to-device lookup becomes rather
3437 * impractical given a potential sparse 32-bit stream ID space.
3439 if (dev_is_pci(dev))
3440 group = pci_device_group(dev);
3441 else
3442 group = generic_device_group(dev);
3444 return group;
3447 static int arm_smmu_of_xlate(struct device *dev,
3448 const struct of_phandle_args *args)
3450 return iommu_fwspec_add_ids(dev, args->args, 1);
3453 static void arm_smmu_get_resv_regions(struct device *dev,
3454 struct list_head *head)
3456 struct iommu_resv_region *region;
3457 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3459 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3460 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
3461 if (!region)
3462 return;
3464 list_add_tail(&region->list, head);
3466 iommu_dma_get_resv_regions(dev, head);
3469 static int arm_smmu_dev_enable_feature(struct device *dev,
3470 enum iommu_dev_features feat)
3472 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3474 if (!master)
3475 return -ENODEV;
3477 switch (feat) {
3478 case IOMMU_DEV_FEAT_IOPF:
3479 if (!arm_smmu_master_iopf_supported(master))
3480 return -EINVAL;
3481 if (master->iopf_enabled)
3482 return -EBUSY;
3483 master->iopf_enabled = true;
3484 return 0;
3485 case IOMMU_DEV_FEAT_SVA:
3486 if (!arm_smmu_master_sva_supported(master))
3487 return -EINVAL;
3488 if (arm_smmu_master_sva_enabled(master))
3489 return -EBUSY;
3490 return arm_smmu_master_enable_sva(master);
3491 default:
3492 return -EINVAL;
3496 static int arm_smmu_dev_disable_feature(struct device *dev,
3497 enum iommu_dev_features feat)
3499 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3501 if (!master)
3502 return -EINVAL;
3504 switch (feat) {
3505 case IOMMU_DEV_FEAT_IOPF:
3506 if (!master->iopf_enabled)
3507 return -EINVAL;
3508 if (master->sva_enabled)
3509 return -EBUSY;
3510 master->iopf_enabled = false;
3511 return 0;
3512 case IOMMU_DEV_FEAT_SVA:
3513 if (!arm_smmu_master_sva_enabled(master))
3514 return -EINVAL;
3515 return arm_smmu_master_disable_sva(master);
3516 default:
3517 return -EINVAL;
3522 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3523 * PCIe link and save the data to memory by DMA. The hardware is restricted to
3524 * use identity mapping only.
3526 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3527 (pdev)->device == 0xa12e)
3529 static int arm_smmu_def_domain_type(struct device *dev)
3531 if (dev_is_pci(dev)) {
3532 struct pci_dev *pdev = to_pci_dev(dev);
3534 if (IS_HISI_PTT_DEVICE(pdev))
3535 return IOMMU_DOMAIN_IDENTITY;
3538 return 0;
3541 static struct iommu_ops arm_smmu_ops = {
3542 .identity_domain = &arm_smmu_identity_domain,
3543 .blocked_domain = &arm_smmu_blocked_domain,
3544 .capable = arm_smmu_capable,
3545 .hw_info = arm_smmu_hw_info,
3546 .domain_alloc_paging = arm_smmu_domain_alloc_paging,
3547 .domain_alloc_sva = arm_smmu_sva_domain_alloc,
3548 .domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
3549 .probe_device = arm_smmu_probe_device,
3550 .release_device = arm_smmu_release_device,
3551 .device_group = arm_smmu_device_group,
3552 .of_xlate = arm_smmu_of_xlate,
3553 .get_resv_regions = arm_smmu_get_resv_regions,
3554 .remove_dev_pasid = arm_smmu_remove_dev_pasid,
3555 .dev_enable_feat = arm_smmu_dev_enable_feature,
3556 .dev_disable_feat = arm_smmu_dev_disable_feature,
3557 .page_response = arm_smmu_page_response,
3558 .def_domain_type = arm_smmu_def_domain_type,
3559 .viommu_alloc = arm_vsmmu_alloc,
3560 .user_pasid_table = 1,
3561 .pgsize_bitmap = -1UL, /* Restricted during device attach */
3562 .owner = THIS_MODULE,
3563 .default_domain_ops = &(const struct iommu_domain_ops) {
3564 .attach_dev = arm_smmu_attach_dev,
3565 .enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
3566 .set_dev_pasid = arm_smmu_s1_set_dev_pasid,
3567 .map_pages = arm_smmu_map_pages,
3568 .unmap_pages = arm_smmu_unmap_pages,
3569 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
3570 .iotlb_sync = arm_smmu_iotlb_sync,
3571 .iova_to_phys = arm_smmu_iova_to_phys,
3572 .free = arm_smmu_domain_free_paging,
3576 static struct iommu_dirty_ops arm_smmu_dirty_ops = {
3577 .read_and_clear_dirty = arm_smmu_read_and_clear_dirty,
3578 .set_dirty_tracking = arm_smmu_set_dirty_tracking,
3581 /* Probing and initialisation functions */
3582 int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3583 struct arm_smmu_queue *q, void __iomem *page,
3584 unsigned long prod_off, unsigned long cons_off,
3585 size_t dwords, const char *name)
3587 size_t qsz;
3589 do {
3590 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3591 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3592 GFP_KERNEL);
3593 if (q->base || qsz < PAGE_SIZE)
3594 break;
3596 q->llq.max_n_shift--;
3597 } while (1);
3599 if (!q->base) {
3600 dev_err(smmu->dev,
3601 "failed to allocate queue (0x%zx bytes) for %s\n",
3602 qsz, name);
3603 return -ENOMEM;
3606 if (!WARN_ON(q->base_dma & (qsz - 1))) {
3607 dev_info(smmu->dev, "allocated %u entries for %s\n",
3608 1 << q->llq.max_n_shift, name);
3611 q->prod_reg = page + prod_off;
3612 q->cons_reg = page + cons_off;
3613 q->ent_dwords = dwords;
3615 q->q_base = Q_BASE_RWA;
3616 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3617 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3619 q->llq.prod = q->llq.cons = 0;
3620 return 0;
3623 int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
3624 struct arm_smmu_cmdq *cmdq)
3626 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3628 atomic_set(&cmdq->owner_prod, 0);
3629 atomic_set(&cmdq->lock, 0);
3631 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3632 GFP_KERNEL);
3633 if (!cmdq->valid_map)
3634 return -ENOMEM;
3636 return 0;
3639 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3641 int ret;
3643 /* cmdq */
3644 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3645 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3646 CMDQ_ENT_DWORDS, "cmdq");
3647 if (ret)
3648 return ret;
3650 ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
3651 if (ret)
3652 return ret;
3654 /* evtq */
3655 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3656 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3657 EVTQ_ENT_DWORDS, "evtq");
3658 if (ret)
3659 return ret;
3661 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3662 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3663 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3664 if (!smmu->evtq.iopf)
3665 return -ENOMEM;
3668 /* priq */
3669 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3670 return 0;
3672 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3673 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3674 PRIQ_ENT_DWORDS, "priq");
3677 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3679 u32 l1size;
3680 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3681 unsigned int last_sid_idx =
3682 arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1);
3684 /* Calculate the L1 size, capped to the SIDSIZE. */
3685 cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
3686 if (cfg->l2.num_l1_ents <= last_sid_idx)
3687 dev_warn(smmu->dev,
3688 "2-level strtab only covers %u/%u bits of SID\n",
3689 ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
3690 smmu->sid_bits);
3692 l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
3693 cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
3694 GFP_KERNEL);
3695 if (!cfg->l2.l1tab) {
3696 dev_err(smmu->dev,
3697 "failed to allocate l1 stream table (%u bytes)\n",
3698 l1size);
3699 return -ENOMEM;
3702 cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
3703 sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
3704 if (!cfg->l2.l2ptrs)
3705 return -ENOMEM;
3707 return 0;
3710 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3712 u32 size;
3713 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3715 size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
3716 cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
3717 &cfg->linear.ste_dma,
3718 GFP_KERNEL);
3719 if (!cfg->linear.table) {
3720 dev_err(smmu->dev,
3721 "failed to allocate linear stream table (%u bytes)\n",
3722 size);
3723 return -ENOMEM;
3725 cfg->linear.num_ents = 1 << smmu->sid_bits;
3727 arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
3728 return 0;
3731 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3733 int ret;
3735 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3736 ret = arm_smmu_init_strtab_2lvl(smmu);
3737 else
3738 ret = arm_smmu_init_strtab_linear(smmu);
3739 if (ret)
3740 return ret;
3742 ida_init(&smmu->vmid_map);
3744 return 0;
3747 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3749 int ret;
3751 mutex_init(&smmu->streams_mutex);
3752 smmu->streams = RB_ROOT;
3754 ret = arm_smmu_init_queues(smmu);
3755 if (ret)
3756 return ret;
3758 ret = arm_smmu_init_strtab(smmu);
3759 if (ret)
3760 return ret;
3762 if (smmu->impl_ops && smmu->impl_ops->init_structures)
3763 return smmu->impl_ops->init_structures(smmu);
3765 return 0;
3768 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3769 unsigned int reg_off, unsigned int ack_off)
3771 u32 reg;
3773 writel_relaxed(val, smmu->base + reg_off);
3774 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3775 1, ARM_SMMU_POLL_TIMEOUT_US);
3778 /* GBPA is "special" */
3779 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3781 int ret;
3782 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3784 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3785 1, ARM_SMMU_POLL_TIMEOUT_US);
3786 if (ret)
3787 return ret;
3789 reg &= ~clr;
3790 reg |= set;
3791 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3792 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3793 1, ARM_SMMU_POLL_TIMEOUT_US);
3795 if (ret)
3796 dev_err(smmu->dev, "GBPA not responding to update\n");
3797 return ret;
3800 static void arm_smmu_free_msis(void *data)
3802 struct device *dev = data;
3804 platform_device_msi_free_irqs_all(dev);
3807 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3809 phys_addr_t doorbell;
3810 struct device *dev = msi_desc_to_dev(desc);
3811 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3812 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3814 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3815 doorbell &= MSI_CFG0_ADDR_MASK;
3817 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3818 writel_relaxed(msg->data, smmu->base + cfg[1]);
3819 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3822 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3824 int ret, nvec = ARM_SMMU_MAX_MSIS;
3825 struct device *dev = smmu->dev;
3827 /* Clear the MSI address regs */
3828 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3829 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3831 if (smmu->features & ARM_SMMU_FEAT_PRI)
3832 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3833 else
3834 nvec--;
3836 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3837 return;
3839 if (!dev->msi.domain) {
3840 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3841 return;
3844 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3845 ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3846 if (ret) {
3847 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3848 return;
3851 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3852 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3853 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3855 /* Add callback to free MSIs on teardown */
3856 devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3859 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3861 int irq, ret;
3863 arm_smmu_setup_msis(smmu);
3865 /* Request interrupt lines */
3866 irq = smmu->evtq.q.irq;
3867 if (irq) {
3868 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3869 arm_smmu_evtq_thread,
3870 IRQF_ONESHOT,
3871 "arm-smmu-v3-evtq", smmu);
3872 if (ret < 0)
3873 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3874 } else {
3875 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3878 irq = smmu->gerr_irq;
3879 if (irq) {
3880 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3881 0, "arm-smmu-v3-gerror", smmu);
3882 if (ret < 0)
3883 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3884 } else {
3885 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3888 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3889 irq = smmu->priq.q.irq;
3890 if (irq) {
3891 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3892 arm_smmu_priq_thread,
3893 IRQF_ONESHOT,
3894 "arm-smmu-v3-priq",
3895 smmu);
3896 if (ret < 0)
3897 dev_warn(smmu->dev,
3898 "failed to enable priq irq\n");
3899 } else {
3900 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3905 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3907 int ret, irq;
3908 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3910 /* Disable IRQs first */
3911 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3912 ARM_SMMU_IRQ_CTRLACK);
3913 if (ret) {
3914 dev_err(smmu->dev, "failed to disable irqs\n");
3915 return ret;
3918 irq = smmu->combined_irq;
3919 if (irq) {
3921 * Cavium ThunderX2 implementation doesn't support unique irq
3922 * lines. Use a single irq line for all the SMMUv3 interrupts.
3924 ret = devm_request_threaded_irq(smmu->dev, irq,
3925 arm_smmu_combined_irq_handler,
3926 arm_smmu_combined_irq_thread,
3927 IRQF_ONESHOT,
3928 "arm-smmu-v3-combined-irq", smmu);
3929 if (ret < 0)
3930 dev_warn(smmu->dev, "failed to enable combined irq\n");
3931 } else
3932 arm_smmu_setup_unique_irqs(smmu);
3934 if (smmu->features & ARM_SMMU_FEAT_PRI)
3935 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3937 /* Enable interrupt generation on the SMMU */
3938 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3939 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3940 if (ret)
3941 dev_warn(smmu->dev, "failed to enable irqs\n");
3943 return 0;
3946 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3948 int ret;
3950 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3951 if (ret)
3952 dev_err(smmu->dev, "failed to clear cr0\n");
3954 return ret;
3957 static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
3959 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3960 dma_addr_t dma;
3961 u32 reg;
3963 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
3964 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
3965 STRTAB_BASE_CFG_FMT_2LVL) |
3966 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
3967 ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
3968 FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3969 dma = cfg->l2.l1_dma;
3970 } else {
3971 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
3972 STRTAB_BASE_CFG_FMT_LINEAR) |
3973 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3974 dma = cfg->linear.ste_dma;
3976 writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
3977 smmu->base + ARM_SMMU_STRTAB_BASE);
3978 writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3981 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
3983 int ret;
3984 u32 reg, enables;
3985 struct arm_smmu_cmdq_ent cmd;
3987 /* Clear CR0 and sync (disables SMMU and queue processing) */
3988 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3989 if (reg & CR0_SMMUEN) {
3990 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3991 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3994 ret = arm_smmu_device_disable(smmu);
3995 if (ret)
3996 return ret;
3998 /* CR1 (table and queue memory attributes) */
3999 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
4000 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
4001 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
4002 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
4003 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
4004 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
4005 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
4007 /* CR2 (random crap) */
4008 reg = CR2_PTM | CR2_RECINVSID;
4010 if (smmu->features & ARM_SMMU_FEAT_E2H)
4011 reg |= CR2_E2H;
4013 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
4015 /* Stream table */
4016 arm_smmu_write_strtab(smmu);
4018 /* Command queue */
4019 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
4020 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
4021 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
4023 enables = CR0_CMDQEN;
4024 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4025 ARM_SMMU_CR0ACK);
4026 if (ret) {
4027 dev_err(smmu->dev, "failed to enable command queue\n");
4028 return ret;
4031 /* Invalidate any cached configuration */
4032 cmd.opcode = CMDQ_OP_CFGI_ALL;
4033 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4035 /* Invalidate any stale TLB entries */
4036 if (smmu->features & ARM_SMMU_FEAT_HYP) {
4037 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
4038 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4041 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
4042 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4044 /* Event queue */
4045 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
4046 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
4047 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
4049 enables |= CR0_EVTQEN;
4050 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4051 ARM_SMMU_CR0ACK);
4052 if (ret) {
4053 dev_err(smmu->dev, "failed to enable event queue\n");
4054 return ret;
4057 /* PRI queue */
4058 if (smmu->features & ARM_SMMU_FEAT_PRI) {
4059 writeq_relaxed(smmu->priq.q.q_base,
4060 smmu->base + ARM_SMMU_PRIQ_BASE);
4061 writel_relaxed(smmu->priq.q.llq.prod,
4062 smmu->page1 + ARM_SMMU_PRIQ_PROD);
4063 writel_relaxed(smmu->priq.q.llq.cons,
4064 smmu->page1 + ARM_SMMU_PRIQ_CONS);
4066 enables |= CR0_PRIQEN;
4067 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4068 ARM_SMMU_CR0ACK);
4069 if (ret) {
4070 dev_err(smmu->dev, "failed to enable PRI queue\n");
4071 return ret;
4075 if (smmu->features & ARM_SMMU_FEAT_ATS) {
4076 enables |= CR0_ATSCHK;
4077 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4078 ARM_SMMU_CR0ACK);
4079 if (ret) {
4080 dev_err(smmu->dev, "failed to enable ATS check\n");
4081 return ret;
4085 ret = arm_smmu_setup_irqs(smmu);
4086 if (ret) {
4087 dev_err(smmu->dev, "failed to setup irqs\n");
4088 return ret;
4091 if (is_kdump_kernel())
4092 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
4094 /* Enable the SMMU interface */
4095 enables |= CR0_SMMUEN;
4096 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4097 ARM_SMMU_CR0ACK);
4098 if (ret) {
4099 dev_err(smmu->dev, "failed to enable SMMU interface\n");
4100 return ret;
4103 if (smmu->impl_ops && smmu->impl_ops->device_reset) {
4104 ret = smmu->impl_ops->device_reset(smmu);
4105 if (ret) {
4106 dev_err(smmu->dev, "failed to reset impl\n");
4107 return ret;
4111 return 0;
4114 #define IIDR_IMPLEMENTER_ARM 0x43b
4115 #define IIDR_PRODUCTID_ARM_MMU_600 0x483
4116 #define IIDR_PRODUCTID_ARM_MMU_700 0x487
4118 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
4120 u32 reg;
4121 unsigned int implementer, productid, variant, revision;
4123 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
4124 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
4125 productid = FIELD_GET(IIDR_PRODUCTID, reg);
4126 variant = FIELD_GET(IIDR_VARIANT, reg);
4127 revision = FIELD_GET(IIDR_REVISION, reg);
4129 switch (implementer) {
4130 case IIDR_IMPLEMENTER_ARM:
4131 switch (productid) {
4132 case IIDR_PRODUCTID_ARM_MMU_600:
4133 /* Arm erratum 1076982 */
4134 if (variant == 0 && revision <= 2)
4135 smmu->features &= ~ARM_SMMU_FEAT_SEV;
4136 /* Arm erratum 1209401 */
4137 if (variant < 2)
4138 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4139 break;
4140 case IIDR_PRODUCTID_ARM_MMU_700:
4141 /* Arm erratum 2812531 */
4142 smmu->features &= ~ARM_SMMU_FEAT_BTM;
4143 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
4144 /* Arm errata 2268618, 2812531 */
4145 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4146 break;
4148 break;
4152 static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
4154 u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
4155 u32 hw_features = 0;
4157 switch (FIELD_GET(IDR0_HTTU, reg)) {
4158 case IDR0_HTTU_ACCESS_DIRTY:
4159 hw_features |= ARM_SMMU_FEAT_HD;
4160 fallthrough;
4161 case IDR0_HTTU_ACCESS:
4162 hw_features |= ARM_SMMU_FEAT_HA;
4165 if (smmu->dev->of_node)
4166 smmu->features |= hw_features;
4167 else if (hw_features != fw_features)
4168 /* ACPI IORT sets the HTTU bits */
4169 dev_warn(smmu->dev,
4170 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
4171 hw_features, fw_features);
4174 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
4176 u32 reg;
4177 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
4179 /* IDR0 */
4180 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
4182 /* 2-level structures */
4183 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
4184 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
4186 if (reg & IDR0_CD2L)
4187 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
4190 * Translation table endianness.
4191 * We currently require the same endianness as the CPU, but this
4192 * could be changed later by adding a new IO_PGTABLE_QUIRK.
4194 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
4195 case IDR0_TTENDIAN_MIXED:
4196 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
4197 break;
4198 #ifdef __BIG_ENDIAN
4199 case IDR0_TTENDIAN_BE:
4200 smmu->features |= ARM_SMMU_FEAT_TT_BE;
4201 break;
4202 #else
4203 case IDR0_TTENDIAN_LE:
4204 smmu->features |= ARM_SMMU_FEAT_TT_LE;
4205 break;
4206 #endif
4207 default:
4208 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
4209 return -ENXIO;
4212 /* Boolean feature flags */
4213 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
4214 smmu->features |= ARM_SMMU_FEAT_PRI;
4216 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
4217 smmu->features |= ARM_SMMU_FEAT_ATS;
4219 if (reg & IDR0_SEV)
4220 smmu->features |= ARM_SMMU_FEAT_SEV;
4222 if (reg & IDR0_MSI) {
4223 smmu->features |= ARM_SMMU_FEAT_MSI;
4224 if (coherent && !disable_msipolling)
4225 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
4228 if (reg & IDR0_HYP) {
4229 smmu->features |= ARM_SMMU_FEAT_HYP;
4230 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
4231 smmu->features |= ARM_SMMU_FEAT_E2H;
4234 arm_smmu_get_httu(smmu, reg);
4237 * The coherency feature as set by FW is used in preference to the ID
4238 * register, but warn on mismatch.
4240 if (!!(reg & IDR0_COHACC) != coherent)
4241 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
4242 coherent ? "true" : "false");
4244 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
4245 case IDR0_STALL_MODEL_FORCE:
4246 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
4247 fallthrough;
4248 case IDR0_STALL_MODEL_STALL:
4249 smmu->features |= ARM_SMMU_FEAT_STALLS;
4252 if (reg & IDR0_S1P)
4253 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
4255 if (reg & IDR0_S2P)
4256 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
4258 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
4259 dev_err(smmu->dev, "no translation support!\n");
4260 return -ENXIO;
4263 /* We only support the AArch64 table format at present */
4264 switch (FIELD_GET(IDR0_TTF, reg)) {
4265 case IDR0_TTF_AARCH32_64:
4266 smmu->ias = 40;
4267 fallthrough;
4268 case IDR0_TTF_AARCH64:
4269 break;
4270 default:
4271 dev_err(smmu->dev, "AArch64 table format not supported!\n");
4272 return -ENXIO;
4275 /* ASID/VMID sizes */
4276 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
4277 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
4279 /* IDR1 */
4280 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
4281 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
4282 dev_err(smmu->dev, "embedded implementation not supported\n");
4283 return -ENXIO;
4286 if (reg & IDR1_ATTR_TYPES_OVR)
4287 smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
4289 /* Queue sizes, capped to ensure natural alignment */
4290 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
4291 FIELD_GET(IDR1_CMDQS, reg));
4292 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
4294 * We don't support splitting up batches, so one batch of
4295 * commands plus an extra sync needs to fit inside the command
4296 * queue. There's also no way we can handle the weird alignment
4297 * restrictions on the base pointer for a unit-length queue.
4299 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
4300 CMDQ_BATCH_ENTRIES);
4301 return -ENXIO;
4304 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
4305 FIELD_GET(IDR1_EVTQS, reg));
4306 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
4307 FIELD_GET(IDR1_PRIQS, reg));
4309 /* SID/SSID sizes */
4310 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
4311 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
4312 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
4315 * If the SMMU supports fewer bits than would fill a single L2 stream
4316 * table, use a linear table instead.
4318 if (smmu->sid_bits <= STRTAB_SPLIT)
4319 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
4321 /* IDR3 */
4322 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
4323 if (FIELD_GET(IDR3_RIL, reg))
4324 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
4326 /* IDR5 */
4327 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
4329 /* Maximum number of outstanding stalls */
4330 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
4332 /* Page sizes */
4333 if (reg & IDR5_GRAN64K)
4334 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
4335 if (reg & IDR5_GRAN16K)
4336 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
4337 if (reg & IDR5_GRAN4K)
4338 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
4340 /* Input address size */
4341 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
4342 smmu->features |= ARM_SMMU_FEAT_VAX;
4344 /* Output address size */
4345 switch (FIELD_GET(IDR5_OAS, reg)) {
4346 case IDR5_OAS_32_BIT:
4347 smmu->oas = 32;
4348 break;
4349 case IDR5_OAS_36_BIT:
4350 smmu->oas = 36;
4351 break;
4352 case IDR5_OAS_40_BIT:
4353 smmu->oas = 40;
4354 break;
4355 case IDR5_OAS_42_BIT:
4356 smmu->oas = 42;
4357 break;
4358 case IDR5_OAS_44_BIT:
4359 smmu->oas = 44;
4360 break;
4361 case IDR5_OAS_52_BIT:
4362 smmu->oas = 52;
4363 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
4364 break;
4365 default:
4366 dev_info(smmu->dev,
4367 "unknown output address size. Truncating to 48-bit\n");
4368 fallthrough;
4369 case IDR5_OAS_48_BIT:
4370 smmu->oas = 48;
4373 if (arm_smmu_ops.pgsize_bitmap == -1UL)
4374 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
4375 else
4376 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
4378 /* Set the DMA mask for our table walker */
4379 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
4380 dev_warn(smmu->dev,
4381 "failed to set DMA mask for table walker\n");
4383 smmu->ias = max(smmu->ias, smmu->oas);
4385 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
4386 (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
4387 smmu->features |= ARM_SMMU_FEAT_NESTING;
4389 arm_smmu_device_iidr_probe(smmu);
4391 if (arm_smmu_sva_supported(smmu))
4392 smmu->features |= ARM_SMMU_FEAT_SVA;
4394 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
4395 smmu->ias, smmu->oas, smmu->features);
4396 return 0;
4399 #ifdef CONFIG_ACPI
4400 #ifdef CONFIG_TEGRA241_CMDQV
4401 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4402 struct arm_smmu_device *smmu)
4404 const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
4405 struct acpi_device *adev;
4407 /* Look for an NVDA200C node whose _UID matches the SMMU node ID */
4408 adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
4409 if (adev) {
4410 /* Tegra241 CMDQV driver is responsible for put_device() */
4411 smmu->impl_dev = &adev->dev;
4412 smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
4413 dev_info(smmu->dev, "found companion CMDQV device: %s\n",
4414 dev_name(smmu->impl_dev));
4416 kfree(uid);
4418 #else
4419 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4420 struct arm_smmu_device *smmu)
4423 #endif
4425 static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
4426 struct arm_smmu_device *smmu)
4428 struct acpi_iort_smmu_v3 *iort_smmu =
4429 (struct acpi_iort_smmu_v3 *)node->node_data;
4431 switch (iort_smmu->model) {
4432 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
4433 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
4434 break;
4435 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
4436 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
4437 break;
4438 case ACPI_IORT_SMMU_V3_GENERIC:
4440 * Tegra241 implementation stores its SMMU options and impl_dev
4441 * in DSDT. Thus, go through the ACPI tables unconditionally.
4443 acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
4444 break;
4447 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
4448 return 0;
4451 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4452 struct arm_smmu_device *smmu)
4454 struct acpi_iort_smmu_v3 *iort_smmu;
4455 struct device *dev = smmu->dev;
4456 struct acpi_iort_node *node;
4458 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
4460 /* Retrieve SMMUv3 specific data */
4461 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
4463 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
4464 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4466 switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
4467 case IDR0_HTTU_ACCESS_DIRTY:
4468 smmu->features |= ARM_SMMU_FEAT_HD;
4469 fallthrough;
4470 case IDR0_HTTU_ACCESS:
4471 smmu->features |= ARM_SMMU_FEAT_HA;
4474 return acpi_smmu_iort_probe_model(node, smmu);
4476 #else
4477 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4478 struct arm_smmu_device *smmu)
4480 return -ENODEV;
4482 #endif
4484 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
4485 struct arm_smmu_device *smmu)
4487 struct device *dev = &pdev->dev;
4488 u32 cells;
4489 int ret = -EINVAL;
4491 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
4492 dev_err(dev, "missing #iommu-cells property\n");
4493 else if (cells != 1)
4494 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
4495 else
4496 ret = 0;
4498 parse_driver_options(smmu);
4500 if (of_dma_is_coherent(dev->of_node))
4501 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4503 return ret;
4506 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
4508 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
4509 return SZ_64K;
4510 else
4511 return SZ_128K;
4514 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4515 resource_size_t size)
4517 struct resource res = DEFINE_RES_MEM(start, size);
4519 return devm_ioremap_resource(dev, &res);
4522 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
4524 struct list_head rmr_list;
4525 struct iommu_resv_region *e;
4527 INIT_LIST_HEAD(&rmr_list);
4528 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4530 list_for_each_entry(e, &rmr_list, list) {
4531 struct iommu_iort_rmr_data *rmr;
4532 int ret, i;
4534 rmr = container_of(e, struct iommu_iort_rmr_data, rr);
4535 for (i = 0; i < rmr->num_sids; i++) {
4536 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
4537 if (ret) {
4538 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
4539 rmr->sids[i]);
4540 continue;
4544 * STE table is not programmed to HW, see
4545 * arm_smmu_initial_bypass_stes()
4547 arm_smmu_make_bypass_ste(smmu,
4548 arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4552 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4555 static void arm_smmu_impl_remove(void *data)
4557 struct arm_smmu_device *smmu = data;
4559 if (smmu->impl_ops && smmu->impl_ops->device_remove)
4560 smmu->impl_ops->device_remove(smmu);
4564 * Probe all the compiled in implementations. Each one checks to see if it
4565 * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
4566 * replaces the callers. Otherwise the original is returned or ERR_PTR.
4568 static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
4570 struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
4571 int ret;
4573 if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
4574 new_smmu = tegra241_cmdqv_probe(smmu);
4576 if (new_smmu == ERR_PTR(-ENODEV))
4577 return smmu;
4578 if (IS_ERR(new_smmu))
4579 return new_smmu;
4581 ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
4582 new_smmu);
4583 if (ret)
4584 return ERR_PTR(ret);
4585 return new_smmu;
4588 static int arm_smmu_device_probe(struct platform_device *pdev)
4590 int irq, ret;
4591 struct resource *res;
4592 resource_size_t ioaddr;
4593 struct arm_smmu_device *smmu;
4594 struct device *dev = &pdev->dev;
4596 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4597 if (!smmu)
4598 return -ENOMEM;
4599 smmu->dev = dev;
4601 if (dev->of_node) {
4602 ret = arm_smmu_device_dt_probe(pdev, smmu);
4603 } else {
4604 ret = arm_smmu_device_acpi_probe(pdev, smmu);
4606 if (ret)
4607 return ret;
4609 smmu = arm_smmu_impl_probe(smmu);
4610 if (IS_ERR(smmu))
4611 return PTR_ERR(smmu);
4613 /* Base address */
4614 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4615 if (!res)
4616 return -EINVAL;
4617 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4618 dev_err(dev, "MMIO region too small (%pr)\n", res);
4619 return -EINVAL;
4621 ioaddr = res->start;
4624 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4625 * the PMCG registers which are reserved by the PMU driver.
4627 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4628 if (IS_ERR(smmu->base))
4629 return PTR_ERR(smmu->base);
4631 if (arm_smmu_resource_size(smmu) > SZ_64K) {
4632 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4633 ARM_SMMU_REG_SZ);
4634 if (IS_ERR(smmu->page1))
4635 return PTR_ERR(smmu->page1);
4636 } else {
4637 smmu->page1 = smmu->base;
4640 /* Interrupt lines */
4642 irq = platform_get_irq_byname_optional(pdev, "combined");
4643 if (irq > 0)
4644 smmu->combined_irq = irq;
4645 else {
4646 irq = platform_get_irq_byname_optional(pdev, "eventq");
4647 if (irq > 0)
4648 smmu->evtq.q.irq = irq;
4650 irq = platform_get_irq_byname_optional(pdev, "priq");
4651 if (irq > 0)
4652 smmu->priq.q.irq = irq;
4654 irq = platform_get_irq_byname_optional(pdev, "gerror");
4655 if (irq > 0)
4656 smmu->gerr_irq = irq;
4658 /* Probe the h/w */
4659 ret = arm_smmu_device_hw_probe(smmu);
4660 if (ret)
4661 return ret;
4663 /* Initialise in-memory data structures */
4664 ret = arm_smmu_init_structures(smmu);
4665 if (ret)
4666 return ret;
4668 /* Record our private device structure */
4669 platform_set_drvdata(pdev, smmu);
4671 /* Check for RMRs and install bypass STEs if any */
4672 arm_smmu_rmr_install_bypass_ste(smmu);
4674 /* Reset the device */
4675 ret = arm_smmu_device_reset(smmu);
4676 if (ret)
4677 return ret;
4679 /* And we're up. Go go go! */
4680 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4681 "smmu3.%pa", &ioaddr);
4682 if (ret)
4683 return ret;
4685 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4686 if (ret) {
4687 dev_err(dev, "Failed to register iommu\n");
4688 iommu_device_sysfs_remove(&smmu->iommu);
4689 return ret;
4692 return 0;
4695 static void arm_smmu_device_remove(struct platform_device *pdev)
4697 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4699 iommu_device_unregister(&smmu->iommu);
4700 iommu_device_sysfs_remove(&smmu->iommu);
4701 arm_smmu_device_disable(smmu);
4702 iopf_queue_free(smmu->evtq.iopf);
4703 ida_destroy(&smmu->vmid_map);
4706 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4708 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4710 arm_smmu_device_disable(smmu);
4713 static const struct of_device_id arm_smmu_of_match[] = {
4714 { .compatible = "arm,smmu-v3", },
4715 { },
4717 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4719 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4721 arm_smmu_sva_notifier_synchronize();
4722 platform_driver_unregister(drv);
4725 static struct platform_driver arm_smmu_driver = {
4726 .driver = {
4727 .name = "arm-smmu-v3",
4728 .of_match_table = arm_smmu_of_match,
4729 .suppress_bind_attrs = true,
4731 .probe = arm_smmu_device_probe,
4732 .remove = arm_smmu_device_remove,
4733 .shutdown = arm_smmu_device_shutdown,
4735 module_driver(arm_smmu_driver, platform_driver_register,
4736 arm_smmu_driver_unregister);
4738 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4739 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4740 MODULE_ALIAS("platform:arm-smmu-v3");
4741 MODULE_LICENSE("GPL v2");