Merge tag 'block-5.11-2021-01-10' of git://git.kernel.dk/linux-block
[linux/fpc-iii.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
blob8ca7415d785d9bf5015544fea56cd52f17b90850
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
32 #include <linux/amba/bus.h>
34 #include "arm-smmu-v3.h"
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 "Disable MSI-based polling for CMD_SYNC completion.");
46 enum arm_smmu_msi_index {
47 EVTQ_MSI_INDEX,
48 GERROR_MSI_INDEX,
49 PRIQ_MSI_INDEX,
50 ARM_SMMU_MAX_MSIS,
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 [EVTQ_MSI_INDEX] = {
55 ARM_SMMU_EVTQ_IRQ_CFG0,
56 ARM_SMMU_EVTQ_IRQ_CFG1,
57 ARM_SMMU_EVTQ_IRQ_CFG2,
59 [GERROR_MSI_INDEX] = {
60 ARM_SMMU_GERROR_IRQ_CFG0,
61 ARM_SMMU_GERROR_IRQ_CFG1,
62 ARM_SMMU_GERROR_IRQ_CFG2,
64 [PRIQ_MSI_INDEX] = {
65 ARM_SMMU_PRIQ_IRQ_CFG0,
66 ARM_SMMU_PRIQ_IRQ_CFG1,
67 ARM_SMMU_PRIQ_IRQ_CFG2,
71 struct arm_smmu_option_prop {
72 u32 opt;
73 const char *prop;
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
80 * Special value used by SVA when a process dies, to quiesce a CD without
81 * disabling it.
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 { 0, NULL},
91 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
92 struct arm_smmu_device *smmu)
94 if (offset > SZ_64K)
95 return smmu->page1 + offset - SZ_64K;
97 return smmu->base + offset;
100 static void parse_driver_options(struct arm_smmu_device *smmu)
102 int i = 0;
104 do {
105 if (of_property_read_bool(smmu->dev->of_node,
106 arm_smmu_options[i].prop)) {
107 smmu->options |= arm_smmu_options[i].opt;
108 dev_notice(smmu->dev, "option %s\n",
109 arm_smmu_options[i].prop);
111 } while (arm_smmu_options[++i].opt);
114 /* Low-level queue manipulation functions */
115 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
117 u32 space, prod, cons;
119 prod = Q_IDX(q, q->prod);
120 cons = Q_IDX(q, q->cons);
122 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
123 space = (1 << q->max_n_shift) - (prod - cons);
124 else
125 space = cons - prod;
127 return space >= n;
130 static bool queue_full(struct arm_smmu_ll_queue *q)
132 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
133 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
136 static bool queue_empty(struct arm_smmu_ll_queue *q)
138 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
139 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
142 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
144 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
145 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
146 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
147 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
150 static void queue_sync_cons_out(struct arm_smmu_queue *q)
153 * Ensure that all CPU accesses (reads and writes) to the queue
154 * are complete before we update the cons pointer.
156 __iomb();
157 writel_relaxed(q->llq.cons, q->cons_reg);
160 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
162 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
163 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
166 static int queue_sync_prod_in(struct arm_smmu_queue *q)
168 u32 prod;
169 int ret = 0;
172 * We can't use the _relaxed() variant here, as we must prevent
173 * speculative reads of the queue before we have determined that
174 * prod has indeed moved.
176 prod = readl(q->prod_reg);
178 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
179 ret = -EOVERFLOW;
181 q->llq.prod = prod;
182 return ret;
185 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
188 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
191 static void queue_poll_init(struct arm_smmu_device *smmu,
192 struct arm_smmu_queue_poll *qp)
194 qp->delay = 1;
195 qp->spin_cnt = 0;
196 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
197 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
200 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 if (ktime_compare(ktime_get(), qp->timeout) > 0)
203 return -ETIMEDOUT;
205 if (qp->wfe) {
206 wfe();
207 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
208 cpu_relax();
209 } else {
210 udelay(qp->delay);
211 qp->delay *= 2;
212 qp->spin_cnt = 0;
215 return 0;
218 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220 int i;
222 for (i = 0; i < n_dwords; ++i)
223 *dst++ = cpu_to_le64(*src++);
226 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228 int i;
230 for (i = 0; i < n_dwords; ++i)
231 *dst++ = le64_to_cpu(*src++);
234 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 if (queue_empty(&q->llq))
237 return -EAGAIN;
239 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
240 queue_inc_cons(&q->llq);
241 queue_sync_cons_out(q);
242 return 0;
245 /* High-level queue accessors */
246 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
249 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251 switch (ent->opcode) {
252 case CMDQ_OP_TLBI_EL2_ALL:
253 case CMDQ_OP_TLBI_NSNH_ALL:
254 break;
255 case CMDQ_OP_PREFETCH_CFG:
256 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
257 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
258 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
259 break;
260 case CMDQ_OP_CFGI_CD:
261 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
262 fallthrough;
263 case CMDQ_OP_CFGI_STE:
264 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
265 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
266 break;
267 case CMDQ_OP_CFGI_CD_ALL:
268 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 break;
270 case CMDQ_OP_CFGI_ALL:
271 /* Cover the entire SID range */
272 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
273 break;
274 case CMDQ_OP_TLBI_NH_VA:
275 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
276 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
279 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
283 break;
284 case CMDQ_OP_TLBI_S2_IPA:
285 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
286 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
288 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
289 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
291 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
292 break;
293 case CMDQ_OP_TLBI_NH_ASID:
294 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
295 fallthrough;
296 case CMDQ_OP_TLBI_S12_VMALL:
297 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
298 break;
299 case CMDQ_OP_ATC_INV:
300 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
301 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
302 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
303 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
304 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
305 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
306 break;
307 case CMDQ_OP_PRI_RESP:
308 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
309 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
310 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
311 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
312 switch (ent->pri.resp) {
313 case PRI_RESP_DENY:
314 case PRI_RESP_FAIL:
315 case PRI_RESP_SUCC:
316 break;
317 default:
318 return -EINVAL;
320 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
321 break;
322 case CMDQ_OP_CMD_SYNC:
323 if (ent->sync.msiaddr) {
324 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
325 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
326 } else {
327 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
329 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
330 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
331 break;
332 default:
333 return -ENOENT;
336 return 0;
339 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
340 u32 prod)
342 struct arm_smmu_queue *q = &smmu->cmdq.q;
343 struct arm_smmu_cmdq_ent ent = {
344 .opcode = CMDQ_OP_CMD_SYNC,
348 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
349 * payload, so the write will zero the entire command on that platform.
351 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
352 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
353 q->ent_dwords * 8;
356 arm_smmu_cmdq_build_cmd(cmd, &ent);
359 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
361 static const char *cerror_str[] = {
362 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
363 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
364 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
365 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
368 int i;
369 u64 cmd[CMDQ_ENT_DWORDS];
370 struct arm_smmu_queue *q = &smmu->cmdq.q;
371 u32 cons = readl_relaxed(q->cons_reg);
372 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
373 struct arm_smmu_cmdq_ent cmd_sync = {
374 .opcode = CMDQ_OP_CMD_SYNC,
377 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
378 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
380 switch (idx) {
381 case CMDQ_ERR_CERROR_ABT_IDX:
382 dev_err(smmu->dev, "retrying command fetch\n");
383 case CMDQ_ERR_CERROR_NONE_IDX:
384 return;
385 case CMDQ_ERR_CERROR_ATC_INV_IDX:
387 * ATC Invalidation Completion timeout. CONS is still pointing
388 * at the CMD_SYNC. Attempt to complete other pending commands
389 * by repeating the CMD_SYNC, though we might well end up back
390 * here since the ATC invalidation may still be pending.
392 return;
393 case CMDQ_ERR_CERROR_ILL_IDX:
394 default:
395 break;
399 * We may have concurrent producers, so we need to be careful
400 * not to touch any of the shadow cmdq state.
402 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
403 dev_err(smmu->dev, "skipping command in error state:\n");
404 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
405 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
407 /* Convert the erroneous command into a CMD_SYNC */
408 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
409 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
410 return;
413 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
417 * Command queue locking.
418 * This is a form of bastardised rwlock with the following major changes:
420 * - The only LOCK routines are exclusive_trylock() and shared_lock().
421 * Neither have barrier semantics, and instead provide only a control
422 * dependency.
424 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
425 * fails if the caller appears to be the last lock holder (yes, this is
426 * racy). All successful UNLOCK routines have RELEASE semantics.
428 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
430 int val;
433 * We can try to avoid the cmpxchg() loop by simply incrementing the
434 * lock counter. When held in exclusive state, the lock counter is set
435 * to INT_MIN so these increments won't hurt as the value will remain
436 * negative.
438 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
439 return;
441 do {
442 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
443 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
446 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
448 (void)atomic_dec_return_release(&cmdq->lock);
451 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
453 if (atomic_read(&cmdq->lock) == 1)
454 return false;
456 arm_smmu_cmdq_shared_unlock(cmdq);
457 return true;
460 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
461 ({ \
462 bool __ret; \
463 local_irq_save(flags); \
464 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
465 if (!__ret) \
466 local_irq_restore(flags); \
467 __ret; \
470 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
471 ({ \
472 atomic_set_release(&cmdq->lock, 0); \
473 local_irq_restore(flags); \
478 * Command queue insertion.
479 * This is made fiddly by our attempts to achieve some sort of scalability
480 * since there is one queue shared amongst all of the CPUs in the system. If
481 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
482 * then you'll *love* this monstrosity.
484 * The basic idea is to split the queue up into ranges of commands that are
485 * owned by a given CPU; the owner may not have written all of the commands
486 * itself, but is responsible for advancing the hardware prod pointer when
487 * the time comes. The algorithm is roughly:
489 * 1. Allocate some space in the queue. At this point we also discover
490 * whether the head of the queue is currently owned by another CPU,
491 * or whether we are the owner.
493 * 2. Write our commands into our allocated slots in the queue.
495 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
497 * 4. If we are an owner:
498 * a. Wait for the previous owner to finish.
499 * b. Mark the queue head as unowned, which tells us the range
500 * that we are responsible for publishing.
501 * c. Wait for all commands in our owned range to become valid.
502 * d. Advance the hardware prod pointer.
503 * e. Tell the next owner we've finished.
505 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
506 * owner), then we need to stick around until it has completed:
507 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
508 * to clear the first 4 bytes.
509 * b. Otherwise, we spin waiting for the hardware cons pointer to
510 * advance past our command.
512 * The devil is in the details, particularly the use of locking for handling
513 * SYNC completion and freeing up space in the queue before we think that it is
514 * full.
516 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
517 u32 sprod, u32 eprod, bool set)
519 u32 swidx, sbidx, ewidx, ebidx;
520 struct arm_smmu_ll_queue llq = {
521 .max_n_shift = cmdq->q.llq.max_n_shift,
522 .prod = sprod,
525 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
526 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
528 while (llq.prod != eprod) {
529 unsigned long mask;
530 atomic_long_t *ptr;
531 u32 limit = BITS_PER_LONG;
533 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
534 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
536 ptr = &cmdq->valid_map[swidx];
538 if ((swidx == ewidx) && (sbidx < ebidx))
539 limit = ebidx;
541 mask = GENMASK(limit - 1, sbidx);
544 * The valid bit is the inverse of the wrap bit. This means
545 * that a zero-initialised queue is invalid and, after marking
546 * all entries as valid, they become invalid again when we
547 * wrap.
549 if (set) {
550 atomic_long_xor(mask, ptr);
551 } else { /* Poll */
552 unsigned long valid;
554 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
555 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
558 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
562 /* Mark all entries in the range [sprod, eprod) as valid */
563 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
564 u32 sprod, u32 eprod)
566 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
569 /* Wait for all entries in the range [sprod, eprod) to become valid */
570 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
571 u32 sprod, u32 eprod)
573 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
576 /* Wait for the command queue to become non-full */
577 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
578 struct arm_smmu_ll_queue *llq)
580 unsigned long flags;
581 struct arm_smmu_queue_poll qp;
582 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
583 int ret = 0;
586 * Try to update our copy of cons by grabbing exclusive cmdq access. If
587 * that fails, spin until somebody else updates it for us.
589 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
590 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
591 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
592 llq->val = READ_ONCE(cmdq->q.llq.val);
593 return 0;
596 queue_poll_init(smmu, &qp);
597 do {
598 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
599 if (!queue_full(llq))
600 break;
602 ret = queue_poll(&qp);
603 } while (!ret);
605 return ret;
609 * Wait until the SMMU signals a CMD_SYNC completion MSI.
610 * Must be called with the cmdq lock held in some capacity.
612 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
613 struct arm_smmu_ll_queue *llq)
615 int ret = 0;
616 struct arm_smmu_queue_poll qp;
617 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
618 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
620 queue_poll_init(smmu, &qp);
623 * The MSI won't generate an event, since it's being written back
624 * into the command queue.
626 qp.wfe = false;
627 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
628 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
629 return ret;
633 * Wait until the SMMU cons index passes llq->prod.
634 * Must be called with the cmdq lock held in some capacity.
636 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
637 struct arm_smmu_ll_queue *llq)
639 struct arm_smmu_queue_poll qp;
640 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
641 u32 prod = llq->prod;
642 int ret = 0;
644 queue_poll_init(smmu, &qp);
645 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
646 do {
647 if (queue_consumed(llq, prod))
648 break;
650 ret = queue_poll(&qp);
653 * This needs to be a readl() so that our subsequent call
654 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
656 * Specifically, we need to ensure that we observe all
657 * shared_lock()s by other CMD_SYNCs that share our owner,
658 * so that a failing call to tryunlock() means that we're
659 * the last one out and therefore we can safely advance
660 * cmdq->q.llq.cons. Roughly speaking:
662 * CPU 0 CPU1 CPU2 (us)
664 * if (sync)
665 * shared_lock();
667 * dma_wmb();
668 * set_valid_map();
670 * if (owner) {
671 * poll_valid_map();
672 * <control dependency>
673 * writel(prod_reg);
675 * readl(cons_reg);
676 * tryunlock();
678 * Requires us to see CPU 0's shared_lock() acquisition.
680 llq->cons = readl(cmdq->q.cons_reg);
681 } while (!ret);
683 return ret;
686 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
687 struct arm_smmu_ll_queue *llq)
689 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
690 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
692 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
695 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
696 u32 prod, int n)
698 int i;
699 struct arm_smmu_ll_queue llq = {
700 .max_n_shift = cmdq->q.llq.max_n_shift,
701 .prod = prod,
704 for (i = 0; i < n; ++i) {
705 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
707 prod = queue_inc_prod_n(&llq, i);
708 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
713 * This is the actual insertion function, and provides the following
714 * ordering guarantees to callers:
716 * - There is a dma_wmb() before publishing any commands to the queue.
717 * This can be relied upon to order prior writes to data structures
718 * in memory (such as a CD or an STE) before the command.
720 * - On completion of a CMD_SYNC, there is a control dependency.
721 * This can be relied upon to order subsequent writes to memory (e.g.
722 * freeing an IOVA) after completion of the CMD_SYNC.
724 * - Command insertion is totally ordered, so if two CPUs each race to
725 * insert their own list of commands then all of the commands from one
726 * CPU will appear before any of the commands from the other CPU.
728 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
729 u64 *cmds, int n, bool sync)
731 u64 cmd_sync[CMDQ_ENT_DWORDS];
732 u32 prod;
733 unsigned long flags;
734 bool owner;
735 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
736 struct arm_smmu_ll_queue llq = {
737 .max_n_shift = cmdq->q.llq.max_n_shift,
738 }, head = llq;
739 int ret = 0;
741 /* 1. Allocate some space in the queue */
742 local_irq_save(flags);
743 llq.val = READ_ONCE(cmdq->q.llq.val);
744 do {
745 u64 old;
747 while (!queue_has_space(&llq, n + sync)) {
748 local_irq_restore(flags);
749 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
750 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
751 local_irq_save(flags);
754 head.cons = llq.cons;
755 head.prod = queue_inc_prod_n(&llq, n + sync) |
756 CMDQ_PROD_OWNED_FLAG;
758 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
759 if (old == llq.val)
760 break;
762 llq.val = old;
763 } while (1);
764 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
765 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
769 * 2. Write our commands into the queue
770 * Dependency ordering from the cmpxchg() loop above.
772 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
773 if (sync) {
774 prod = queue_inc_prod_n(&llq, n);
775 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
776 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
779 * In order to determine completion of our CMD_SYNC, we must
780 * ensure that the queue can't wrap twice without us noticing.
781 * We achieve that by taking the cmdq lock as shared before
782 * marking our slot as valid.
784 arm_smmu_cmdq_shared_lock(cmdq);
787 /* 3. Mark our slots as valid, ensuring commands are visible first */
788 dma_wmb();
789 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
791 /* 4. If we are the owner, take control of the SMMU hardware */
792 if (owner) {
793 /* a. Wait for previous owner to finish */
794 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
796 /* b. Stop gathering work by clearing the owned flag */
797 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
798 &cmdq->q.llq.atomic.prod);
799 prod &= ~CMDQ_PROD_OWNED_FLAG;
802 * c. Wait for any gathered work to be written to the queue.
803 * Note that we read our own entries so that we have the control
804 * dependency required by (d).
806 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
809 * d. Advance the hardware prod pointer
810 * Control dependency ordering from the entries becoming valid.
812 writel_relaxed(prod, cmdq->q.prod_reg);
815 * e. Tell the next owner we're done
816 * Make sure we've updated the hardware first, so that we don't
817 * race to update prod and potentially move it backwards.
819 atomic_set_release(&cmdq->owner_prod, prod);
822 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
823 if (sync) {
824 llq.prod = queue_inc_prod_n(&llq, n);
825 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
826 if (ret) {
827 dev_err_ratelimited(smmu->dev,
828 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
829 llq.prod,
830 readl_relaxed(cmdq->q.prod_reg),
831 readl_relaxed(cmdq->q.cons_reg));
835 * Try to unlock the cmdq lock. This will fail if we're the last
836 * reader, in which case we can safely update cmdq->q.llq.cons
838 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
839 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
840 arm_smmu_cmdq_shared_unlock(cmdq);
844 local_irq_restore(flags);
845 return ret;
848 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
849 struct arm_smmu_cmdq_ent *ent)
851 u64 cmd[CMDQ_ENT_DWORDS];
853 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
854 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
855 ent->opcode);
856 return -EINVAL;
859 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
862 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
864 return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
867 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
868 struct arm_smmu_cmdq_batch *cmds,
869 struct arm_smmu_cmdq_ent *cmd)
871 if (cmds->num == CMDQ_BATCH_ENTRIES) {
872 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
873 cmds->num = 0;
875 arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
876 cmds->num++;
879 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
880 struct arm_smmu_cmdq_batch *cmds)
882 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
885 /* Context descriptor manipulation functions */
886 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
888 struct arm_smmu_cmdq_ent cmd = {
889 .opcode = CMDQ_OP_TLBI_NH_ASID,
890 .tlbi.asid = asid,
893 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
894 arm_smmu_cmdq_issue_sync(smmu);
897 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
898 int ssid, bool leaf)
900 size_t i;
901 unsigned long flags;
902 struct arm_smmu_master *master;
903 struct arm_smmu_cmdq_batch cmds = {};
904 struct arm_smmu_device *smmu = smmu_domain->smmu;
905 struct arm_smmu_cmdq_ent cmd = {
906 .opcode = CMDQ_OP_CFGI_CD,
907 .cfgi = {
908 .ssid = ssid,
909 .leaf = leaf,
913 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
914 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
915 for (i = 0; i < master->num_sids; i++) {
916 cmd.cfgi.sid = master->sids[i];
917 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
920 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
922 arm_smmu_cmdq_batch_submit(smmu, &cmds);
925 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
926 struct arm_smmu_l1_ctx_desc *l1_desc)
928 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
930 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
931 &l1_desc->l2ptr_dma, GFP_KERNEL);
932 if (!l1_desc->l2ptr) {
933 dev_warn(smmu->dev,
934 "failed to allocate context descriptor table\n");
935 return -ENOMEM;
937 return 0;
940 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
941 struct arm_smmu_l1_ctx_desc *l1_desc)
943 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
944 CTXDESC_L1_DESC_V;
946 /* See comment in arm_smmu_write_ctx_desc() */
947 WRITE_ONCE(*dst, cpu_to_le64(val));
950 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
951 u32 ssid)
953 __le64 *l1ptr;
954 unsigned int idx;
955 struct arm_smmu_l1_ctx_desc *l1_desc;
956 struct arm_smmu_device *smmu = smmu_domain->smmu;
957 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
959 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
960 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
962 idx = ssid >> CTXDESC_SPLIT;
963 l1_desc = &cdcfg->l1_desc[idx];
964 if (!l1_desc->l2ptr) {
965 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
966 return NULL;
968 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
969 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
970 /* An invalid L1CD can be cached */
971 arm_smmu_sync_cd(smmu_domain, ssid, false);
973 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
974 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
977 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
978 struct arm_smmu_ctx_desc *cd)
981 * This function handles the following cases:
983 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
984 * (2) Install a secondary CD, for SID+SSID traffic.
985 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
986 * CD, then invalidate the old entry and mappings.
987 * (4) Quiesce the context without clearing the valid bit. Disable
988 * translation, and ignore any translation fault.
989 * (5) Remove a secondary CD.
991 u64 val;
992 bool cd_live;
993 __le64 *cdptr;
994 struct arm_smmu_device *smmu = smmu_domain->smmu;
996 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
997 return -E2BIG;
999 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1000 if (!cdptr)
1001 return -ENOMEM;
1003 val = le64_to_cpu(cdptr[0]);
1004 cd_live = !!(val & CTXDESC_CD_0_V);
1006 if (!cd) { /* (5) */
1007 val = 0;
1008 } else if (cd == &quiet_cd) { /* (4) */
1009 val |= CTXDESC_CD_0_TCR_EPD0;
1010 } else if (cd_live) { /* (3) */
1011 val &= ~CTXDESC_CD_0_ASID;
1012 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1014 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1015 * this substream's traffic
1017 } else { /* (1) and (2) */
1018 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1019 cdptr[2] = 0;
1020 cdptr[3] = cpu_to_le64(cd->mair);
1023 * STE is live, and the SMMU might read dwords of this CD in any
1024 * order. Ensure that it observes valid values before reading
1025 * V=1.
1027 arm_smmu_sync_cd(smmu_domain, ssid, true);
1029 val = cd->tcr |
1030 #ifdef __BIG_ENDIAN
1031 CTXDESC_CD_0_ENDI |
1032 #endif
1033 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1034 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1035 CTXDESC_CD_0_AA64 |
1036 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1037 CTXDESC_CD_0_V;
1039 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1040 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1041 val |= CTXDESC_CD_0_S;
1045 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1046 * "Configuration structures and configuration invalidation completion"
1048 * The size of single-copy atomic reads made by the SMMU is
1049 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1050 * field within an aligned 64-bit span of a structure can be altered
1051 * without first making the structure invalid.
1053 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1054 arm_smmu_sync_cd(smmu_domain, ssid, true);
1055 return 0;
1058 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1060 int ret;
1061 size_t l1size;
1062 size_t max_contexts;
1063 struct arm_smmu_device *smmu = smmu_domain->smmu;
1064 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1065 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1067 max_contexts = 1 << cfg->s1cdmax;
1069 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1070 max_contexts <= CTXDESC_L2_ENTRIES) {
1071 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1072 cdcfg->num_l1_ents = max_contexts;
1074 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1075 } else {
1076 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1077 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1078 CTXDESC_L2_ENTRIES);
1080 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1081 sizeof(*cdcfg->l1_desc),
1082 GFP_KERNEL);
1083 if (!cdcfg->l1_desc)
1084 return -ENOMEM;
1086 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1089 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1090 GFP_KERNEL);
1091 if (!cdcfg->cdtab) {
1092 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1093 ret = -ENOMEM;
1094 goto err_free_l1;
1097 return 0;
1099 err_free_l1:
1100 if (cdcfg->l1_desc) {
1101 devm_kfree(smmu->dev, cdcfg->l1_desc);
1102 cdcfg->l1_desc = NULL;
1104 return ret;
1107 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1109 int i;
1110 size_t size, l1size;
1111 struct arm_smmu_device *smmu = smmu_domain->smmu;
1112 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1114 if (cdcfg->l1_desc) {
1115 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1117 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1118 if (!cdcfg->l1_desc[i].l2ptr)
1119 continue;
1121 dmam_free_coherent(smmu->dev, size,
1122 cdcfg->l1_desc[i].l2ptr,
1123 cdcfg->l1_desc[i].l2ptr_dma);
1125 devm_kfree(smmu->dev, cdcfg->l1_desc);
1126 cdcfg->l1_desc = NULL;
1128 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1129 } else {
1130 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1133 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1134 cdcfg->cdtab_dma = 0;
1135 cdcfg->cdtab = NULL;
1138 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1140 bool free;
1141 struct arm_smmu_ctx_desc *old_cd;
1143 if (!cd->asid)
1144 return false;
1146 free = refcount_dec_and_test(&cd->refs);
1147 if (free) {
1148 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1149 WARN_ON(old_cd != cd);
1151 return free;
1154 /* Stream table manipulation functions */
1155 static void
1156 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1158 u64 val = 0;
1160 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1161 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1163 /* See comment in arm_smmu_write_ctx_desc() */
1164 WRITE_ONCE(*dst, cpu_to_le64(val));
1167 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1169 struct arm_smmu_cmdq_ent cmd = {
1170 .opcode = CMDQ_OP_CFGI_STE,
1171 .cfgi = {
1172 .sid = sid,
1173 .leaf = true,
1177 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1178 arm_smmu_cmdq_issue_sync(smmu);
1181 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1182 __le64 *dst)
1185 * This is hideously complicated, but we only really care about
1186 * three cases at the moment:
1188 * 1. Invalid (all zero) -> bypass/fault (init)
1189 * 2. Bypass/fault -> translation/bypass (attach)
1190 * 3. Translation/bypass -> bypass/fault (detach)
1192 * Given that we can't update the STE atomically and the SMMU
1193 * doesn't read the thing in a defined order, that leaves us
1194 * with the following maintenance requirements:
1196 * 1. Update Config, return (init time STEs aren't live)
1197 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1198 * 3. Update Config, sync
1200 u64 val = le64_to_cpu(dst[0]);
1201 bool ste_live = false;
1202 struct arm_smmu_device *smmu = NULL;
1203 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1204 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1205 struct arm_smmu_domain *smmu_domain = NULL;
1206 struct arm_smmu_cmdq_ent prefetch_cmd = {
1207 .opcode = CMDQ_OP_PREFETCH_CFG,
1208 .prefetch = {
1209 .sid = sid,
1213 if (master) {
1214 smmu_domain = master->domain;
1215 smmu = master->smmu;
1218 if (smmu_domain) {
1219 switch (smmu_domain->stage) {
1220 case ARM_SMMU_DOMAIN_S1:
1221 s1_cfg = &smmu_domain->s1_cfg;
1222 break;
1223 case ARM_SMMU_DOMAIN_S2:
1224 case ARM_SMMU_DOMAIN_NESTED:
1225 s2_cfg = &smmu_domain->s2_cfg;
1226 break;
1227 default:
1228 break;
1232 if (val & STRTAB_STE_0_V) {
1233 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1234 case STRTAB_STE_0_CFG_BYPASS:
1235 break;
1236 case STRTAB_STE_0_CFG_S1_TRANS:
1237 case STRTAB_STE_0_CFG_S2_TRANS:
1238 ste_live = true;
1239 break;
1240 case STRTAB_STE_0_CFG_ABORT:
1241 BUG_ON(!disable_bypass);
1242 break;
1243 default:
1244 BUG(); /* STE corruption */
1248 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1249 val = STRTAB_STE_0_V;
1251 /* Bypass/fault */
1252 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1253 if (!smmu_domain && disable_bypass)
1254 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1255 else
1256 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1258 dst[0] = cpu_to_le64(val);
1259 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1260 STRTAB_STE_1_SHCFG_INCOMING));
1261 dst[2] = 0; /* Nuke the VMID */
1263 * The SMMU can perform negative caching, so we must sync
1264 * the STE regardless of whether the old value was live.
1266 if (smmu)
1267 arm_smmu_sync_ste_for_sid(smmu, sid);
1268 return;
1271 if (s1_cfg) {
1272 BUG_ON(ste_live);
1273 dst[1] = cpu_to_le64(
1274 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1275 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1276 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1277 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1278 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1280 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1281 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1282 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1284 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1285 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1286 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1287 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1290 if (s2_cfg) {
1291 BUG_ON(ste_live);
1292 dst[2] = cpu_to_le64(
1293 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1294 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1295 #ifdef __BIG_ENDIAN
1296 STRTAB_STE_2_S2ENDI |
1297 #endif
1298 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1299 STRTAB_STE_2_S2R);
1301 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1303 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1306 if (master->ats_enabled)
1307 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1308 STRTAB_STE_1_EATS_TRANS));
1310 arm_smmu_sync_ste_for_sid(smmu, sid);
1311 /* See comment in arm_smmu_write_ctx_desc() */
1312 WRITE_ONCE(dst[0], cpu_to_le64(val));
1313 arm_smmu_sync_ste_for_sid(smmu, sid);
1315 /* It's likely that we'll want to use the new STE soon */
1316 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1317 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1320 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1322 unsigned int i;
1324 for (i = 0; i < nent; ++i) {
1325 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1326 strtab += STRTAB_STE_DWORDS;
1330 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1332 size_t size;
1333 void *strtab;
1334 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1335 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1337 if (desc->l2ptr)
1338 return 0;
1340 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1341 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1343 desc->span = STRTAB_SPLIT + 1;
1344 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1345 GFP_KERNEL);
1346 if (!desc->l2ptr) {
1347 dev_err(smmu->dev,
1348 "failed to allocate l2 stream table for SID %u\n",
1349 sid);
1350 return -ENOMEM;
1353 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1354 arm_smmu_write_strtab_l1_desc(strtab, desc);
1355 return 0;
1358 /* IRQ and event handlers */
1359 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1361 int i;
1362 struct arm_smmu_device *smmu = dev;
1363 struct arm_smmu_queue *q = &smmu->evtq.q;
1364 struct arm_smmu_ll_queue *llq = &q->llq;
1365 u64 evt[EVTQ_ENT_DWORDS];
1367 do {
1368 while (!queue_remove_raw(q, evt)) {
1369 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1371 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1372 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1373 dev_info(smmu->dev, "\t0x%016llx\n",
1374 (unsigned long long)evt[i]);
1379 * Not much we can do on overflow, so scream and pretend we're
1380 * trying harder.
1382 if (queue_sync_prod_in(q) == -EOVERFLOW)
1383 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1384 } while (!queue_empty(llq));
1386 /* Sync our overflow flag, as we believe we're up to speed */
1387 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1388 Q_IDX(llq, llq->cons);
1389 return IRQ_HANDLED;
1392 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1394 u32 sid, ssid;
1395 u16 grpid;
1396 bool ssv, last;
1398 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1399 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1400 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1401 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1402 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1404 dev_info(smmu->dev, "unexpected PRI request received:\n");
1405 dev_info(smmu->dev,
1406 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1407 sid, ssid, grpid, last ? "L" : "",
1408 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1409 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1410 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1411 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1412 evt[1] & PRIQ_1_ADDR_MASK);
1414 if (last) {
1415 struct arm_smmu_cmdq_ent cmd = {
1416 .opcode = CMDQ_OP_PRI_RESP,
1417 .substream_valid = ssv,
1418 .pri = {
1419 .sid = sid,
1420 .ssid = ssid,
1421 .grpid = grpid,
1422 .resp = PRI_RESP_DENY,
1426 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1430 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1432 struct arm_smmu_device *smmu = dev;
1433 struct arm_smmu_queue *q = &smmu->priq.q;
1434 struct arm_smmu_ll_queue *llq = &q->llq;
1435 u64 evt[PRIQ_ENT_DWORDS];
1437 do {
1438 while (!queue_remove_raw(q, evt))
1439 arm_smmu_handle_ppr(smmu, evt);
1441 if (queue_sync_prod_in(q) == -EOVERFLOW)
1442 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1443 } while (!queue_empty(llq));
1445 /* Sync our overflow flag, as we believe we're up to speed */
1446 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1447 Q_IDX(llq, llq->cons);
1448 queue_sync_cons_out(q);
1449 return IRQ_HANDLED;
1452 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1454 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1456 u32 gerror, gerrorn, active;
1457 struct arm_smmu_device *smmu = dev;
1459 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1460 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1462 active = gerror ^ gerrorn;
1463 if (!(active & GERROR_ERR_MASK))
1464 return IRQ_NONE; /* No errors pending */
1466 dev_warn(smmu->dev,
1467 "unexpected global error reported (0x%08x), this could be serious\n",
1468 active);
1470 if (active & GERROR_SFM_ERR) {
1471 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1472 arm_smmu_device_disable(smmu);
1475 if (active & GERROR_MSI_GERROR_ABT_ERR)
1476 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1478 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1479 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1481 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1482 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1484 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1485 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1487 if (active & GERROR_PRIQ_ABT_ERR)
1488 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1490 if (active & GERROR_EVTQ_ABT_ERR)
1491 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1493 if (active & GERROR_CMDQ_ERR)
1494 arm_smmu_cmdq_skip_err(smmu);
1496 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1497 return IRQ_HANDLED;
1500 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1502 struct arm_smmu_device *smmu = dev;
1504 arm_smmu_evtq_thread(irq, dev);
1505 if (smmu->features & ARM_SMMU_FEAT_PRI)
1506 arm_smmu_priq_thread(irq, dev);
1508 return IRQ_HANDLED;
1511 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1513 arm_smmu_gerror_handler(irq, dev);
1514 return IRQ_WAKE_THREAD;
1517 static void
1518 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1519 struct arm_smmu_cmdq_ent *cmd)
1521 size_t log2_span;
1522 size_t span_mask;
1523 /* ATC invalidates are always on 4096-bytes pages */
1524 size_t inval_grain_shift = 12;
1525 unsigned long page_start, page_end;
1528 * ATS and PASID:
1530 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1531 * prefix. In that case all ATC entries within the address range are
1532 * invalidated, including those that were requested with a PASID! There
1533 * is no way to invalidate only entries without PASID.
1535 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1536 * traffic), translation requests without PASID create ATC entries
1537 * without PASID, which must be invalidated with substream_valid clear.
1538 * This has the unpleasant side-effect of invalidating all PASID-tagged
1539 * ATC entries within the address range.
1541 *cmd = (struct arm_smmu_cmdq_ent) {
1542 .opcode = CMDQ_OP_ATC_INV,
1543 .substream_valid = !!ssid,
1544 .atc.ssid = ssid,
1547 if (!size) {
1548 cmd->atc.size = ATC_INV_SIZE_ALL;
1549 return;
1552 page_start = iova >> inval_grain_shift;
1553 page_end = (iova + size - 1) >> inval_grain_shift;
1556 * In an ATS Invalidate Request, the address must be aligned on the
1557 * range size, which must be a power of two number of page sizes. We
1558 * thus have to choose between grossly over-invalidating the region, or
1559 * splitting the invalidation into multiple commands. For simplicity
1560 * we'll go with the first solution, but should refine it in the future
1561 * if multiple commands are shown to be more efficient.
1563 * Find the smallest power of two that covers the range. The most
1564 * significant differing bit between the start and end addresses,
1565 * fls(start ^ end), indicates the required span. For example:
1567 * We want to invalidate pages [8; 11]. This is already the ideal range:
1568 * x = 0b1000 ^ 0b1011 = 0b11
1569 * span = 1 << fls(x) = 4
1571 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1572 * x = 0b0111 ^ 0b1010 = 0b1101
1573 * span = 1 << fls(x) = 16
1575 log2_span = fls_long(page_start ^ page_end);
1576 span_mask = (1ULL << log2_span) - 1;
1578 page_start &= ~span_mask;
1580 cmd->atc.addr = page_start << inval_grain_shift;
1581 cmd->atc.size = log2_span;
1584 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1586 int i;
1587 struct arm_smmu_cmdq_ent cmd;
1589 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1591 for (i = 0; i < master->num_sids; i++) {
1592 cmd.atc.sid = master->sids[i];
1593 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1596 return arm_smmu_cmdq_issue_sync(master->smmu);
1599 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1600 unsigned long iova, size_t size)
1602 int i;
1603 unsigned long flags;
1604 struct arm_smmu_cmdq_ent cmd;
1605 struct arm_smmu_master *master;
1606 struct arm_smmu_cmdq_batch cmds = {};
1608 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1609 return 0;
1612 * Ensure that we've completed prior invalidation of the main TLBs
1613 * before we read 'nr_ats_masters' in case of a concurrent call to
1614 * arm_smmu_enable_ats():
1616 * // unmap() // arm_smmu_enable_ats()
1617 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1618 * smp_mb(); [...]
1619 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1621 * Ensures that we always see the incremented 'nr_ats_masters' count if
1622 * ATS was enabled at the PCI device before completion of the TLBI.
1624 smp_mb();
1625 if (!atomic_read(&smmu_domain->nr_ats_masters))
1626 return 0;
1628 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1630 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1631 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1632 if (!master->ats_enabled)
1633 continue;
1635 for (i = 0; i < master->num_sids; i++) {
1636 cmd.atc.sid = master->sids[i];
1637 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1640 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1642 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1645 /* IO_PGTABLE API */
1646 static void arm_smmu_tlb_inv_context(void *cookie)
1648 struct arm_smmu_domain *smmu_domain = cookie;
1649 struct arm_smmu_device *smmu = smmu_domain->smmu;
1650 struct arm_smmu_cmdq_ent cmd;
1653 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1654 * PTEs previously cleared by unmaps on the current CPU not yet visible
1655 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1656 * insertion to guarantee those are observed before the TLBI. Do be
1657 * careful, 007.
1659 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1660 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1661 } else {
1662 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1663 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1664 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1665 arm_smmu_cmdq_issue_sync(smmu);
1667 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1670 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1671 size_t granule, bool leaf,
1672 struct arm_smmu_domain *smmu_domain)
1674 struct arm_smmu_device *smmu = smmu_domain->smmu;
1675 unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1676 size_t inv_range = granule;
1677 struct arm_smmu_cmdq_batch cmds = {};
1678 struct arm_smmu_cmdq_ent cmd = {
1679 .tlbi = {
1680 .leaf = leaf,
1684 if (!size)
1685 return;
1687 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1688 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1689 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1690 } else {
1691 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1692 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1695 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1696 /* Get the leaf page size */
1697 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1699 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1700 cmd.tlbi.tg = (tg - 10) / 2;
1702 /* Determine what level the granule is at */
1703 cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1705 num_pages = size >> tg;
1708 while (iova < end) {
1709 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1711 * On each iteration of the loop, the range is 5 bits
1712 * worth of the aligned size remaining.
1713 * The range in pages is:
1715 * range = (num_pages & (0x1f << __ffs(num_pages)))
1717 unsigned long scale, num;
1719 /* Determine the power of 2 multiple number of pages */
1720 scale = __ffs(num_pages);
1721 cmd.tlbi.scale = scale;
1723 /* Determine how many chunks of 2^scale size we have */
1724 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1725 cmd.tlbi.num = num - 1;
1727 /* range is num * 2^scale * pgsize */
1728 inv_range = num << (scale + tg);
1730 /* Clear out the lower order bits for the next iteration */
1731 num_pages -= num << scale;
1734 cmd.tlbi.addr = iova;
1735 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1736 iova += inv_range;
1738 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1741 * Unfortunately, this can't be leaf-only since we may have
1742 * zapped an entire table.
1744 arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1747 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1748 unsigned long iova, size_t granule,
1749 void *cookie)
1751 struct arm_smmu_domain *smmu_domain = cookie;
1752 struct iommu_domain *domain = &smmu_domain->domain;
1754 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1757 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1758 size_t granule, void *cookie)
1760 arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1763 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1764 .tlb_flush_all = arm_smmu_tlb_inv_context,
1765 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1766 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
1769 /* IOMMU API */
1770 static bool arm_smmu_capable(enum iommu_cap cap)
1772 switch (cap) {
1773 case IOMMU_CAP_CACHE_COHERENCY:
1774 return true;
1775 case IOMMU_CAP_NOEXEC:
1776 return true;
1777 default:
1778 return false;
1782 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1784 struct arm_smmu_domain *smmu_domain;
1786 if (type != IOMMU_DOMAIN_UNMANAGED &&
1787 type != IOMMU_DOMAIN_DMA &&
1788 type != IOMMU_DOMAIN_IDENTITY)
1789 return NULL;
1792 * Allocate the domain and initialise some of its data structures.
1793 * We can't really do anything meaningful until we've added a
1794 * master.
1796 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1797 if (!smmu_domain)
1798 return NULL;
1800 if (type == IOMMU_DOMAIN_DMA &&
1801 iommu_get_dma_cookie(&smmu_domain->domain)) {
1802 kfree(smmu_domain);
1803 return NULL;
1806 mutex_init(&smmu_domain->init_mutex);
1807 INIT_LIST_HEAD(&smmu_domain->devices);
1808 spin_lock_init(&smmu_domain->devices_lock);
1809 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1811 return &smmu_domain->domain;
1814 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1816 int idx, size = 1 << span;
1818 do {
1819 idx = find_first_zero_bit(map, size);
1820 if (idx == size)
1821 return -ENOSPC;
1822 } while (test_and_set_bit(idx, map));
1824 return idx;
1827 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1829 clear_bit(idx, map);
1832 static void arm_smmu_domain_free(struct iommu_domain *domain)
1834 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1835 struct arm_smmu_device *smmu = smmu_domain->smmu;
1837 iommu_put_dma_cookie(domain);
1838 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1840 /* Free the CD and ASID, if we allocated them */
1841 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1842 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1844 /* Prevent SVA from touching the CD while we're freeing it */
1845 mutex_lock(&arm_smmu_asid_lock);
1846 if (cfg->cdcfg.cdtab)
1847 arm_smmu_free_cd_tables(smmu_domain);
1848 arm_smmu_free_asid(&cfg->cd);
1849 mutex_unlock(&arm_smmu_asid_lock);
1850 } else {
1851 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1852 if (cfg->vmid)
1853 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1856 kfree(smmu_domain);
1859 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1860 struct arm_smmu_master *master,
1861 struct io_pgtable_cfg *pgtbl_cfg)
1863 int ret;
1864 u32 asid;
1865 struct arm_smmu_device *smmu = smmu_domain->smmu;
1866 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1867 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1869 refcount_set(&cfg->cd.refs, 1);
1871 /* Prevent SVA from modifying the ASID until it is written to the CD */
1872 mutex_lock(&arm_smmu_asid_lock);
1873 ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1874 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1875 if (ret)
1876 goto out_unlock;
1878 cfg->s1cdmax = master->ssid_bits;
1880 ret = arm_smmu_alloc_cd_tables(smmu_domain);
1881 if (ret)
1882 goto out_free_asid;
1884 cfg->cd.asid = (u16)asid;
1885 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1886 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1887 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1888 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1889 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1890 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1891 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1892 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1893 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
1896 * Note that this will end up calling arm_smmu_sync_cd() before
1897 * the master has been added to the devices list for this domain.
1898 * This isn't an issue because the STE hasn't been installed yet.
1900 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1901 if (ret)
1902 goto out_free_cd_tables;
1904 mutex_unlock(&arm_smmu_asid_lock);
1905 return 0;
1907 out_free_cd_tables:
1908 arm_smmu_free_cd_tables(smmu_domain);
1909 out_free_asid:
1910 arm_smmu_free_asid(&cfg->cd);
1911 out_unlock:
1912 mutex_unlock(&arm_smmu_asid_lock);
1913 return ret;
1916 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1917 struct arm_smmu_master *master,
1918 struct io_pgtable_cfg *pgtbl_cfg)
1920 int vmid;
1921 struct arm_smmu_device *smmu = smmu_domain->smmu;
1922 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1923 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1925 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1926 if (vmid < 0)
1927 return vmid;
1929 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1930 cfg->vmid = (u16)vmid;
1931 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1932 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1933 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1934 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1935 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1936 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1937 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1938 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1939 return 0;
1942 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1943 struct arm_smmu_master *master)
1945 int ret;
1946 unsigned long ias, oas;
1947 enum io_pgtable_fmt fmt;
1948 struct io_pgtable_cfg pgtbl_cfg;
1949 struct io_pgtable_ops *pgtbl_ops;
1950 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1951 struct arm_smmu_master *,
1952 struct io_pgtable_cfg *);
1953 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1954 struct arm_smmu_device *smmu = smmu_domain->smmu;
1956 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1957 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1958 return 0;
1961 /* Restrict the stage to what we can actually support */
1962 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1963 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1964 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1965 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1967 switch (smmu_domain->stage) {
1968 case ARM_SMMU_DOMAIN_S1:
1969 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1970 ias = min_t(unsigned long, ias, VA_BITS);
1971 oas = smmu->ias;
1972 fmt = ARM_64_LPAE_S1;
1973 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1974 break;
1975 case ARM_SMMU_DOMAIN_NESTED:
1976 case ARM_SMMU_DOMAIN_S2:
1977 ias = smmu->ias;
1978 oas = smmu->oas;
1979 fmt = ARM_64_LPAE_S2;
1980 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1981 break;
1982 default:
1983 return -EINVAL;
1986 pgtbl_cfg = (struct io_pgtable_cfg) {
1987 .pgsize_bitmap = smmu->pgsize_bitmap,
1988 .ias = ias,
1989 .oas = oas,
1990 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1991 .tlb = &arm_smmu_flush_ops,
1992 .iommu_dev = smmu->dev,
1995 if (smmu_domain->non_strict)
1996 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1998 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1999 if (!pgtbl_ops)
2000 return -ENOMEM;
2002 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2003 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2004 domain->geometry.force_aperture = true;
2006 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2007 if (ret < 0) {
2008 free_io_pgtable_ops(pgtbl_ops);
2009 return ret;
2012 smmu_domain->pgtbl_ops = pgtbl_ops;
2013 return 0;
2016 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2018 __le64 *step;
2019 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2021 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2022 struct arm_smmu_strtab_l1_desc *l1_desc;
2023 int idx;
2025 /* Two-level walk */
2026 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2027 l1_desc = &cfg->l1_desc[idx];
2028 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2029 step = &l1_desc->l2ptr[idx];
2030 } else {
2031 /* Simple linear lookup */
2032 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2035 return step;
2038 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2040 int i, j;
2041 struct arm_smmu_device *smmu = master->smmu;
2043 for (i = 0; i < master->num_sids; ++i) {
2044 u32 sid = master->sids[i];
2045 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2047 /* Bridged PCI devices may end up with duplicated IDs */
2048 for (j = 0; j < i; j++)
2049 if (master->sids[j] == sid)
2050 break;
2051 if (j < i)
2052 continue;
2054 arm_smmu_write_strtab_ent(master, sid, step);
2058 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2060 struct device *dev = master->dev;
2061 struct arm_smmu_device *smmu = master->smmu;
2062 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2064 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2065 return false;
2067 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2068 return false;
2070 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2073 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2075 size_t stu;
2076 struct pci_dev *pdev;
2077 struct arm_smmu_device *smmu = master->smmu;
2078 struct arm_smmu_domain *smmu_domain = master->domain;
2080 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2081 if (!master->ats_enabled)
2082 return;
2084 /* Smallest Translation Unit: log2 of the smallest supported granule */
2085 stu = __ffs(smmu->pgsize_bitmap);
2086 pdev = to_pci_dev(master->dev);
2088 atomic_inc(&smmu_domain->nr_ats_masters);
2089 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2090 if (pci_enable_ats(pdev, stu))
2091 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2094 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2096 struct arm_smmu_domain *smmu_domain = master->domain;
2098 if (!master->ats_enabled)
2099 return;
2101 pci_disable_ats(to_pci_dev(master->dev));
2103 * Ensure ATS is disabled at the endpoint before we issue the
2104 * ATC invalidation via the SMMU.
2106 wmb();
2107 arm_smmu_atc_inv_master(master);
2108 atomic_dec(&smmu_domain->nr_ats_masters);
2111 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2113 int ret;
2114 int features;
2115 int num_pasids;
2116 struct pci_dev *pdev;
2118 if (!dev_is_pci(master->dev))
2119 return -ENODEV;
2121 pdev = to_pci_dev(master->dev);
2123 features = pci_pasid_features(pdev);
2124 if (features < 0)
2125 return features;
2127 num_pasids = pci_max_pasids(pdev);
2128 if (num_pasids <= 0)
2129 return num_pasids;
2131 ret = pci_enable_pasid(pdev, features);
2132 if (ret) {
2133 dev_err(&pdev->dev, "Failed to enable PASID\n");
2134 return ret;
2137 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2138 master->smmu->ssid_bits);
2139 return 0;
2142 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2144 struct pci_dev *pdev;
2146 if (!dev_is_pci(master->dev))
2147 return;
2149 pdev = to_pci_dev(master->dev);
2151 if (!pdev->pasid_enabled)
2152 return;
2154 master->ssid_bits = 0;
2155 pci_disable_pasid(pdev);
2158 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2160 unsigned long flags;
2161 struct arm_smmu_domain *smmu_domain = master->domain;
2163 if (!smmu_domain)
2164 return;
2166 arm_smmu_disable_ats(master);
2168 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2169 list_del(&master->domain_head);
2170 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2172 master->domain = NULL;
2173 master->ats_enabled = false;
2174 arm_smmu_install_ste_for_dev(master);
2177 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2179 int ret = 0;
2180 unsigned long flags;
2181 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2182 struct arm_smmu_device *smmu;
2183 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2184 struct arm_smmu_master *master;
2186 if (!fwspec)
2187 return -ENOENT;
2189 master = dev_iommu_priv_get(dev);
2190 smmu = master->smmu;
2193 * Checking that SVA is disabled ensures that this device isn't bound to
2194 * any mm, and can be safely detached from its old domain. Bonds cannot
2195 * be removed concurrently since we're holding the group mutex.
2197 if (arm_smmu_master_sva_enabled(master)) {
2198 dev_err(dev, "cannot attach - SVA enabled\n");
2199 return -EBUSY;
2202 arm_smmu_detach_dev(master);
2204 mutex_lock(&smmu_domain->init_mutex);
2206 if (!smmu_domain->smmu) {
2207 smmu_domain->smmu = smmu;
2208 ret = arm_smmu_domain_finalise(domain, master);
2209 if (ret) {
2210 smmu_domain->smmu = NULL;
2211 goto out_unlock;
2213 } else if (smmu_domain->smmu != smmu) {
2214 dev_err(dev,
2215 "cannot attach to SMMU %s (upstream of %s)\n",
2216 dev_name(smmu_domain->smmu->dev),
2217 dev_name(smmu->dev));
2218 ret = -ENXIO;
2219 goto out_unlock;
2220 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2221 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2222 dev_err(dev,
2223 "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2224 smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2225 ret = -EINVAL;
2226 goto out_unlock;
2229 master->domain = smmu_domain;
2231 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2232 master->ats_enabled = arm_smmu_ats_supported(master);
2234 arm_smmu_install_ste_for_dev(master);
2236 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2237 list_add(&master->domain_head, &smmu_domain->devices);
2238 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2240 arm_smmu_enable_ats(master);
2242 out_unlock:
2243 mutex_unlock(&smmu_domain->init_mutex);
2244 return ret;
2247 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2248 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2250 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2252 if (!ops)
2253 return -ENODEV;
2255 return ops->map(ops, iova, paddr, size, prot, gfp);
2258 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2259 size_t size, struct iommu_iotlb_gather *gather)
2261 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2262 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2264 if (!ops)
2265 return 0;
2267 return ops->unmap(ops, iova, size, gather);
2270 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2272 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2274 if (smmu_domain->smmu)
2275 arm_smmu_tlb_inv_context(smmu_domain);
2278 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2279 struct iommu_iotlb_gather *gather)
2281 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2283 arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2284 gather->pgsize, true, smmu_domain);
2287 static phys_addr_t
2288 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2290 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2292 if (domain->type == IOMMU_DOMAIN_IDENTITY)
2293 return iova;
2295 if (!ops)
2296 return 0;
2298 return ops->iova_to_phys(ops, iova);
2301 static struct platform_driver arm_smmu_driver;
2303 static
2304 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2306 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2307 fwnode);
2308 put_device(dev);
2309 return dev ? dev_get_drvdata(dev) : NULL;
2312 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2314 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2316 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2317 limit *= 1UL << STRTAB_SPLIT;
2319 return sid < limit;
2322 static struct iommu_ops arm_smmu_ops;
2324 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2326 int i, ret;
2327 struct arm_smmu_device *smmu;
2328 struct arm_smmu_master *master;
2329 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2331 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2332 return ERR_PTR(-ENODEV);
2334 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2335 return ERR_PTR(-EBUSY);
2337 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2338 if (!smmu)
2339 return ERR_PTR(-ENODEV);
2341 master = kzalloc(sizeof(*master), GFP_KERNEL);
2342 if (!master)
2343 return ERR_PTR(-ENOMEM);
2345 master->dev = dev;
2346 master->smmu = smmu;
2347 master->sids = fwspec->ids;
2348 master->num_sids = fwspec->num_ids;
2349 INIT_LIST_HEAD(&master->bonds);
2350 dev_iommu_priv_set(dev, master);
2352 /* Check the SIDs are in range of the SMMU and our stream table */
2353 for (i = 0; i < master->num_sids; i++) {
2354 u32 sid = master->sids[i];
2356 if (!arm_smmu_sid_in_range(smmu, sid)) {
2357 ret = -ERANGE;
2358 goto err_free_master;
2361 /* Ensure l2 strtab is initialised */
2362 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2363 ret = arm_smmu_init_l2_strtab(smmu, sid);
2364 if (ret)
2365 goto err_free_master;
2369 master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2372 * Note that PASID must be enabled before, and disabled after ATS:
2373 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2375 * Behavior is undefined if this bit is Set and the value of the PASID
2376 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2377 * are changed.
2379 arm_smmu_enable_pasid(master);
2381 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2382 master->ssid_bits = min_t(u8, master->ssid_bits,
2383 CTXDESC_LINEAR_CDMAX);
2385 return &smmu->iommu;
2387 err_free_master:
2388 kfree(master);
2389 dev_iommu_priv_set(dev, NULL);
2390 return ERR_PTR(ret);
2393 static void arm_smmu_release_device(struct device *dev)
2395 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2396 struct arm_smmu_master *master;
2398 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2399 return;
2401 master = dev_iommu_priv_get(dev);
2402 WARN_ON(arm_smmu_master_sva_enabled(master));
2403 arm_smmu_detach_dev(master);
2404 arm_smmu_disable_pasid(master);
2405 kfree(master);
2406 iommu_fwspec_free(dev);
2409 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2411 struct iommu_group *group;
2414 * We don't support devices sharing stream IDs other than PCI RID
2415 * aliases, since the necessary ID-to-device lookup becomes rather
2416 * impractical given a potential sparse 32-bit stream ID space.
2418 if (dev_is_pci(dev))
2419 group = pci_device_group(dev);
2420 else
2421 group = generic_device_group(dev);
2423 return group;
2426 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2427 enum iommu_attr attr, void *data)
2429 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2431 switch (domain->type) {
2432 case IOMMU_DOMAIN_UNMANAGED:
2433 switch (attr) {
2434 case DOMAIN_ATTR_NESTING:
2435 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2436 return 0;
2437 default:
2438 return -ENODEV;
2440 break;
2441 case IOMMU_DOMAIN_DMA:
2442 switch (attr) {
2443 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2444 *(int *)data = smmu_domain->non_strict;
2445 return 0;
2446 default:
2447 return -ENODEV;
2449 break;
2450 default:
2451 return -EINVAL;
2455 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2456 enum iommu_attr attr, void *data)
2458 int ret = 0;
2459 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2461 mutex_lock(&smmu_domain->init_mutex);
2463 switch (domain->type) {
2464 case IOMMU_DOMAIN_UNMANAGED:
2465 switch (attr) {
2466 case DOMAIN_ATTR_NESTING:
2467 if (smmu_domain->smmu) {
2468 ret = -EPERM;
2469 goto out_unlock;
2472 if (*(int *)data)
2473 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2474 else
2475 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2476 break;
2477 default:
2478 ret = -ENODEV;
2480 break;
2481 case IOMMU_DOMAIN_DMA:
2482 switch(attr) {
2483 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2484 smmu_domain->non_strict = *(int *)data;
2485 break;
2486 default:
2487 ret = -ENODEV;
2489 break;
2490 default:
2491 ret = -EINVAL;
2494 out_unlock:
2495 mutex_unlock(&smmu_domain->init_mutex);
2496 return ret;
2499 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2501 return iommu_fwspec_add_ids(dev, args->args, 1);
2504 static void arm_smmu_get_resv_regions(struct device *dev,
2505 struct list_head *head)
2507 struct iommu_resv_region *region;
2508 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2510 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2511 prot, IOMMU_RESV_SW_MSI);
2512 if (!region)
2513 return;
2515 list_add_tail(&region->list, head);
2517 iommu_dma_get_resv_regions(dev, head);
2520 static bool arm_smmu_dev_has_feature(struct device *dev,
2521 enum iommu_dev_features feat)
2523 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2525 if (!master)
2526 return false;
2528 switch (feat) {
2529 case IOMMU_DEV_FEAT_SVA:
2530 return arm_smmu_master_sva_supported(master);
2531 default:
2532 return false;
2536 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2537 enum iommu_dev_features feat)
2539 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2541 if (!master)
2542 return false;
2544 switch (feat) {
2545 case IOMMU_DEV_FEAT_SVA:
2546 return arm_smmu_master_sva_enabled(master);
2547 default:
2548 return false;
2552 static int arm_smmu_dev_enable_feature(struct device *dev,
2553 enum iommu_dev_features feat)
2555 if (!arm_smmu_dev_has_feature(dev, feat))
2556 return -ENODEV;
2558 if (arm_smmu_dev_feature_enabled(dev, feat))
2559 return -EBUSY;
2561 switch (feat) {
2562 case IOMMU_DEV_FEAT_SVA:
2563 return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2564 default:
2565 return -EINVAL;
2569 static int arm_smmu_dev_disable_feature(struct device *dev,
2570 enum iommu_dev_features feat)
2572 if (!arm_smmu_dev_feature_enabled(dev, feat))
2573 return -EINVAL;
2575 switch (feat) {
2576 case IOMMU_DEV_FEAT_SVA:
2577 return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2578 default:
2579 return -EINVAL;
2583 static struct iommu_ops arm_smmu_ops = {
2584 .capable = arm_smmu_capable,
2585 .domain_alloc = arm_smmu_domain_alloc,
2586 .domain_free = arm_smmu_domain_free,
2587 .attach_dev = arm_smmu_attach_dev,
2588 .map = arm_smmu_map,
2589 .unmap = arm_smmu_unmap,
2590 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2591 .iotlb_sync = arm_smmu_iotlb_sync,
2592 .iova_to_phys = arm_smmu_iova_to_phys,
2593 .probe_device = arm_smmu_probe_device,
2594 .release_device = arm_smmu_release_device,
2595 .device_group = arm_smmu_device_group,
2596 .domain_get_attr = arm_smmu_domain_get_attr,
2597 .domain_set_attr = arm_smmu_domain_set_attr,
2598 .of_xlate = arm_smmu_of_xlate,
2599 .get_resv_regions = arm_smmu_get_resv_regions,
2600 .put_resv_regions = generic_iommu_put_resv_regions,
2601 .dev_has_feat = arm_smmu_dev_has_feature,
2602 .dev_feat_enabled = arm_smmu_dev_feature_enabled,
2603 .dev_enable_feat = arm_smmu_dev_enable_feature,
2604 .dev_disable_feat = arm_smmu_dev_disable_feature,
2605 .sva_bind = arm_smmu_sva_bind,
2606 .sva_unbind = arm_smmu_sva_unbind,
2607 .sva_get_pasid = arm_smmu_sva_get_pasid,
2608 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2611 /* Probing and initialisation functions */
2612 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2613 struct arm_smmu_queue *q,
2614 unsigned long prod_off,
2615 unsigned long cons_off,
2616 size_t dwords, const char *name)
2618 size_t qsz;
2620 do {
2621 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2622 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2623 GFP_KERNEL);
2624 if (q->base || qsz < PAGE_SIZE)
2625 break;
2627 q->llq.max_n_shift--;
2628 } while (1);
2630 if (!q->base) {
2631 dev_err(smmu->dev,
2632 "failed to allocate queue (0x%zx bytes) for %s\n",
2633 qsz, name);
2634 return -ENOMEM;
2637 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2638 dev_info(smmu->dev, "allocated %u entries for %s\n",
2639 1 << q->llq.max_n_shift, name);
2642 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2643 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2644 q->ent_dwords = dwords;
2646 q->q_base = Q_BASE_RWA;
2647 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2648 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2650 q->llq.prod = q->llq.cons = 0;
2651 return 0;
2654 static void arm_smmu_cmdq_free_bitmap(void *data)
2656 unsigned long *bitmap = data;
2657 bitmap_free(bitmap);
2660 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2662 int ret = 0;
2663 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2664 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2665 atomic_long_t *bitmap;
2667 atomic_set(&cmdq->owner_prod, 0);
2668 atomic_set(&cmdq->lock, 0);
2670 bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2671 if (!bitmap) {
2672 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2673 ret = -ENOMEM;
2674 } else {
2675 cmdq->valid_map = bitmap;
2676 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2679 return ret;
2682 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2684 int ret;
2686 /* cmdq */
2687 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2688 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2689 "cmdq");
2690 if (ret)
2691 return ret;
2693 ret = arm_smmu_cmdq_init(smmu);
2694 if (ret)
2695 return ret;
2697 /* evtq */
2698 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2699 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2700 "evtq");
2701 if (ret)
2702 return ret;
2704 /* priq */
2705 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2706 return 0;
2708 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2709 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2710 "priq");
2713 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2715 unsigned int i;
2716 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2717 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2718 void *strtab = smmu->strtab_cfg.strtab;
2720 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2721 if (!cfg->l1_desc) {
2722 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2723 return -ENOMEM;
2726 for (i = 0; i < cfg->num_l1_ents; ++i) {
2727 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2728 strtab += STRTAB_L1_DESC_DWORDS << 3;
2731 return 0;
2734 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2736 void *strtab;
2737 u64 reg;
2738 u32 size, l1size;
2739 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2741 /* Calculate the L1 size, capped to the SIDSIZE. */
2742 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2743 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2744 cfg->num_l1_ents = 1 << size;
2746 size += STRTAB_SPLIT;
2747 if (size < smmu->sid_bits)
2748 dev_warn(smmu->dev,
2749 "2-level strtab only covers %u/%u bits of SID\n",
2750 size, smmu->sid_bits);
2752 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2753 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2754 GFP_KERNEL);
2755 if (!strtab) {
2756 dev_err(smmu->dev,
2757 "failed to allocate l1 stream table (%u bytes)\n",
2758 l1size);
2759 return -ENOMEM;
2761 cfg->strtab = strtab;
2763 /* Configure strtab_base_cfg for 2 levels */
2764 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2765 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2766 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2767 cfg->strtab_base_cfg = reg;
2769 return arm_smmu_init_l1_strtab(smmu);
2772 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2774 void *strtab;
2775 u64 reg;
2776 u32 size;
2777 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2779 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2780 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2781 GFP_KERNEL);
2782 if (!strtab) {
2783 dev_err(smmu->dev,
2784 "failed to allocate linear stream table (%u bytes)\n",
2785 size);
2786 return -ENOMEM;
2788 cfg->strtab = strtab;
2789 cfg->num_l1_ents = 1 << smmu->sid_bits;
2791 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2792 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2793 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2794 cfg->strtab_base_cfg = reg;
2796 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2797 return 0;
2800 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2802 u64 reg;
2803 int ret;
2805 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2806 ret = arm_smmu_init_strtab_2lvl(smmu);
2807 else
2808 ret = arm_smmu_init_strtab_linear(smmu);
2810 if (ret)
2811 return ret;
2813 /* Set the strtab base address */
2814 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2815 reg |= STRTAB_BASE_RA;
2816 smmu->strtab_cfg.strtab_base = reg;
2818 /* Allocate the first VMID for stage-2 bypass STEs */
2819 set_bit(0, smmu->vmid_map);
2820 return 0;
2823 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2825 int ret;
2827 ret = arm_smmu_init_queues(smmu);
2828 if (ret)
2829 return ret;
2831 return arm_smmu_init_strtab(smmu);
2834 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2835 unsigned int reg_off, unsigned int ack_off)
2837 u32 reg;
2839 writel_relaxed(val, smmu->base + reg_off);
2840 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2841 1, ARM_SMMU_POLL_TIMEOUT_US);
2844 /* GBPA is "special" */
2845 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2847 int ret;
2848 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2850 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2851 1, ARM_SMMU_POLL_TIMEOUT_US);
2852 if (ret)
2853 return ret;
2855 reg &= ~clr;
2856 reg |= set;
2857 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2858 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2859 1, ARM_SMMU_POLL_TIMEOUT_US);
2861 if (ret)
2862 dev_err(smmu->dev, "GBPA not responding to update\n");
2863 return ret;
2866 static void arm_smmu_free_msis(void *data)
2868 struct device *dev = data;
2869 platform_msi_domain_free_irqs(dev);
2872 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2874 phys_addr_t doorbell;
2875 struct device *dev = msi_desc_to_dev(desc);
2876 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2877 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2879 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2880 doorbell &= MSI_CFG0_ADDR_MASK;
2882 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2883 writel_relaxed(msg->data, smmu->base + cfg[1]);
2884 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2887 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2889 struct msi_desc *desc;
2890 int ret, nvec = ARM_SMMU_MAX_MSIS;
2891 struct device *dev = smmu->dev;
2893 /* Clear the MSI address regs */
2894 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2895 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2897 if (smmu->features & ARM_SMMU_FEAT_PRI)
2898 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2899 else
2900 nvec--;
2902 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2903 return;
2905 if (!dev->msi_domain) {
2906 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2907 return;
2910 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2911 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2912 if (ret) {
2913 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2914 return;
2917 for_each_msi_entry(desc, dev) {
2918 switch (desc->platform.msi_index) {
2919 case EVTQ_MSI_INDEX:
2920 smmu->evtq.q.irq = desc->irq;
2921 break;
2922 case GERROR_MSI_INDEX:
2923 smmu->gerr_irq = desc->irq;
2924 break;
2925 case PRIQ_MSI_INDEX:
2926 smmu->priq.q.irq = desc->irq;
2927 break;
2928 default: /* Unknown */
2929 continue;
2933 /* Add callback to free MSIs on teardown */
2934 devm_add_action(dev, arm_smmu_free_msis, dev);
2937 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2939 int irq, ret;
2941 arm_smmu_setup_msis(smmu);
2943 /* Request interrupt lines */
2944 irq = smmu->evtq.q.irq;
2945 if (irq) {
2946 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2947 arm_smmu_evtq_thread,
2948 IRQF_ONESHOT,
2949 "arm-smmu-v3-evtq", smmu);
2950 if (ret < 0)
2951 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2952 } else {
2953 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2956 irq = smmu->gerr_irq;
2957 if (irq) {
2958 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2959 0, "arm-smmu-v3-gerror", smmu);
2960 if (ret < 0)
2961 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2962 } else {
2963 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2966 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2967 irq = smmu->priq.q.irq;
2968 if (irq) {
2969 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2970 arm_smmu_priq_thread,
2971 IRQF_ONESHOT,
2972 "arm-smmu-v3-priq",
2973 smmu);
2974 if (ret < 0)
2975 dev_warn(smmu->dev,
2976 "failed to enable priq irq\n");
2977 } else {
2978 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2983 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2985 int ret, irq;
2986 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2988 /* Disable IRQs first */
2989 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2990 ARM_SMMU_IRQ_CTRLACK);
2991 if (ret) {
2992 dev_err(smmu->dev, "failed to disable irqs\n");
2993 return ret;
2996 irq = smmu->combined_irq;
2997 if (irq) {
2999 * Cavium ThunderX2 implementation doesn't support unique irq
3000 * lines. Use a single irq line for all the SMMUv3 interrupts.
3002 ret = devm_request_threaded_irq(smmu->dev, irq,
3003 arm_smmu_combined_irq_handler,
3004 arm_smmu_combined_irq_thread,
3005 IRQF_ONESHOT,
3006 "arm-smmu-v3-combined-irq", smmu);
3007 if (ret < 0)
3008 dev_warn(smmu->dev, "failed to enable combined irq\n");
3009 } else
3010 arm_smmu_setup_unique_irqs(smmu);
3012 if (smmu->features & ARM_SMMU_FEAT_PRI)
3013 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3015 /* Enable interrupt generation on the SMMU */
3016 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3017 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3018 if (ret)
3019 dev_warn(smmu->dev, "failed to enable irqs\n");
3021 return 0;
3024 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3026 int ret;
3028 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3029 if (ret)
3030 dev_err(smmu->dev, "failed to clear cr0\n");
3032 return ret;
3035 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3037 int ret;
3038 u32 reg, enables;
3039 struct arm_smmu_cmdq_ent cmd;
3041 /* Clear CR0 and sync (disables SMMU and queue processing) */
3042 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3043 if (reg & CR0_SMMUEN) {
3044 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3045 WARN_ON(is_kdump_kernel() && !disable_bypass);
3046 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3049 ret = arm_smmu_device_disable(smmu);
3050 if (ret)
3051 return ret;
3053 /* CR1 (table and queue memory attributes) */
3054 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3055 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3056 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3057 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3058 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3059 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3060 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3062 /* CR2 (random crap) */
3063 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3064 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3066 /* Stream table */
3067 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3068 smmu->base + ARM_SMMU_STRTAB_BASE);
3069 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3070 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3072 /* Command queue */
3073 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3074 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3075 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3077 enables = CR0_CMDQEN;
3078 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3079 ARM_SMMU_CR0ACK);
3080 if (ret) {
3081 dev_err(smmu->dev, "failed to enable command queue\n");
3082 return ret;
3085 /* Invalidate any cached configuration */
3086 cmd.opcode = CMDQ_OP_CFGI_ALL;
3087 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3088 arm_smmu_cmdq_issue_sync(smmu);
3090 /* Invalidate any stale TLB entries */
3091 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3092 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3093 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3096 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3097 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3098 arm_smmu_cmdq_issue_sync(smmu);
3100 /* Event queue */
3101 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3102 writel_relaxed(smmu->evtq.q.llq.prod,
3103 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3104 writel_relaxed(smmu->evtq.q.llq.cons,
3105 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3107 enables |= CR0_EVTQEN;
3108 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3109 ARM_SMMU_CR0ACK);
3110 if (ret) {
3111 dev_err(smmu->dev, "failed to enable event queue\n");
3112 return ret;
3115 /* PRI queue */
3116 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3117 writeq_relaxed(smmu->priq.q.q_base,
3118 smmu->base + ARM_SMMU_PRIQ_BASE);
3119 writel_relaxed(smmu->priq.q.llq.prod,
3120 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3121 writel_relaxed(smmu->priq.q.llq.cons,
3122 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3124 enables |= CR0_PRIQEN;
3125 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3126 ARM_SMMU_CR0ACK);
3127 if (ret) {
3128 dev_err(smmu->dev, "failed to enable PRI queue\n");
3129 return ret;
3133 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3134 enables |= CR0_ATSCHK;
3135 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3136 ARM_SMMU_CR0ACK);
3137 if (ret) {
3138 dev_err(smmu->dev, "failed to enable ATS check\n");
3139 return ret;
3143 ret = arm_smmu_setup_irqs(smmu);
3144 if (ret) {
3145 dev_err(smmu->dev, "failed to setup irqs\n");
3146 return ret;
3149 if (is_kdump_kernel())
3150 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3152 /* Enable the SMMU interface, or ensure bypass */
3153 if (!bypass || disable_bypass) {
3154 enables |= CR0_SMMUEN;
3155 } else {
3156 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3157 if (ret)
3158 return ret;
3160 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3161 ARM_SMMU_CR0ACK);
3162 if (ret) {
3163 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3164 return ret;
3167 return 0;
3170 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3172 u32 reg;
3173 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3175 /* IDR0 */
3176 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3178 /* 2-level structures */
3179 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3180 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3182 if (reg & IDR0_CD2L)
3183 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3186 * Translation table endianness.
3187 * We currently require the same endianness as the CPU, but this
3188 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3190 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3191 case IDR0_TTENDIAN_MIXED:
3192 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3193 break;
3194 #ifdef __BIG_ENDIAN
3195 case IDR0_TTENDIAN_BE:
3196 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3197 break;
3198 #else
3199 case IDR0_TTENDIAN_LE:
3200 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3201 break;
3202 #endif
3203 default:
3204 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3205 return -ENXIO;
3208 /* Boolean feature flags */
3209 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3210 smmu->features |= ARM_SMMU_FEAT_PRI;
3212 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3213 smmu->features |= ARM_SMMU_FEAT_ATS;
3215 if (reg & IDR0_SEV)
3216 smmu->features |= ARM_SMMU_FEAT_SEV;
3218 if (reg & IDR0_MSI) {
3219 smmu->features |= ARM_SMMU_FEAT_MSI;
3220 if (coherent && !disable_msipolling)
3221 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3224 if (reg & IDR0_HYP)
3225 smmu->features |= ARM_SMMU_FEAT_HYP;
3228 * The coherency feature as set by FW is used in preference to the ID
3229 * register, but warn on mismatch.
3231 if (!!(reg & IDR0_COHACC) != coherent)
3232 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3233 coherent ? "true" : "false");
3235 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3236 case IDR0_STALL_MODEL_FORCE:
3237 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3238 fallthrough;
3239 case IDR0_STALL_MODEL_STALL:
3240 smmu->features |= ARM_SMMU_FEAT_STALLS;
3243 if (reg & IDR0_S1P)
3244 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3246 if (reg & IDR0_S2P)
3247 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3249 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3250 dev_err(smmu->dev, "no translation support!\n");
3251 return -ENXIO;
3254 /* We only support the AArch64 table format at present */
3255 switch (FIELD_GET(IDR0_TTF, reg)) {
3256 case IDR0_TTF_AARCH32_64:
3257 smmu->ias = 40;
3258 fallthrough;
3259 case IDR0_TTF_AARCH64:
3260 break;
3261 default:
3262 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3263 return -ENXIO;
3266 /* ASID/VMID sizes */
3267 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3268 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3270 /* IDR1 */
3271 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3272 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3273 dev_err(smmu->dev, "embedded implementation not supported\n");
3274 return -ENXIO;
3277 /* Queue sizes, capped to ensure natural alignment */
3278 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3279 FIELD_GET(IDR1_CMDQS, reg));
3280 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3282 * We don't support splitting up batches, so one batch of
3283 * commands plus an extra sync needs to fit inside the command
3284 * queue. There's also no way we can handle the weird alignment
3285 * restrictions on the base pointer for a unit-length queue.
3287 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3288 CMDQ_BATCH_ENTRIES);
3289 return -ENXIO;
3292 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3293 FIELD_GET(IDR1_EVTQS, reg));
3294 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3295 FIELD_GET(IDR1_PRIQS, reg));
3297 /* SID/SSID sizes */
3298 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3299 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3302 * If the SMMU supports fewer bits than would fill a single L2 stream
3303 * table, use a linear table instead.
3305 if (smmu->sid_bits <= STRTAB_SPLIT)
3306 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3308 /* IDR3 */
3309 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3310 if (FIELD_GET(IDR3_RIL, reg))
3311 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3313 /* IDR5 */
3314 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3316 /* Maximum number of outstanding stalls */
3317 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3319 /* Page sizes */
3320 if (reg & IDR5_GRAN64K)
3321 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3322 if (reg & IDR5_GRAN16K)
3323 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3324 if (reg & IDR5_GRAN4K)
3325 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3327 /* Input address size */
3328 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3329 smmu->features |= ARM_SMMU_FEAT_VAX;
3331 /* Output address size */
3332 switch (FIELD_GET(IDR5_OAS, reg)) {
3333 case IDR5_OAS_32_BIT:
3334 smmu->oas = 32;
3335 break;
3336 case IDR5_OAS_36_BIT:
3337 smmu->oas = 36;
3338 break;
3339 case IDR5_OAS_40_BIT:
3340 smmu->oas = 40;
3341 break;
3342 case IDR5_OAS_42_BIT:
3343 smmu->oas = 42;
3344 break;
3345 case IDR5_OAS_44_BIT:
3346 smmu->oas = 44;
3347 break;
3348 case IDR5_OAS_52_BIT:
3349 smmu->oas = 52;
3350 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3351 break;
3352 default:
3353 dev_info(smmu->dev,
3354 "unknown output address size. Truncating to 48-bit\n");
3355 fallthrough;
3356 case IDR5_OAS_48_BIT:
3357 smmu->oas = 48;
3360 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3361 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3362 else
3363 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3365 /* Set the DMA mask for our table walker */
3366 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3367 dev_warn(smmu->dev,
3368 "failed to set DMA mask for table walker\n");
3370 smmu->ias = max(smmu->ias, smmu->oas);
3372 if (arm_smmu_sva_supported(smmu))
3373 smmu->features |= ARM_SMMU_FEAT_SVA;
3375 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3376 smmu->ias, smmu->oas, smmu->features);
3377 return 0;
3380 #ifdef CONFIG_ACPI
3381 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3383 switch (model) {
3384 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3385 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3386 break;
3387 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3388 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3389 break;
3392 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3395 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3396 struct arm_smmu_device *smmu)
3398 struct acpi_iort_smmu_v3 *iort_smmu;
3399 struct device *dev = smmu->dev;
3400 struct acpi_iort_node *node;
3402 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3404 /* Retrieve SMMUv3 specific data */
3405 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3407 acpi_smmu_get_options(iort_smmu->model, smmu);
3409 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3410 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3412 return 0;
3414 #else
3415 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3416 struct arm_smmu_device *smmu)
3418 return -ENODEV;
3420 #endif
3422 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3423 struct arm_smmu_device *smmu)
3425 struct device *dev = &pdev->dev;
3426 u32 cells;
3427 int ret = -EINVAL;
3429 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3430 dev_err(dev, "missing #iommu-cells property\n");
3431 else if (cells != 1)
3432 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3433 else
3434 ret = 0;
3436 parse_driver_options(smmu);
3438 if (of_dma_is_coherent(dev->of_node))
3439 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3441 return ret;
3444 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3446 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3447 return SZ_64K;
3448 else
3449 return SZ_128K;
3452 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3454 int err;
3456 #ifdef CONFIG_PCI
3457 if (pci_bus_type.iommu_ops != ops) {
3458 err = bus_set_iommu(&pci_bus_type, ops);
3459 if (err)
3460 return err;
3462 #endif
3463 #ifdef CONFIG_ARM_AMBA
3464 if (amba_bustype.iommu_ops != ops) {
3465 err = bus_set_iommu(&amba_bustype, ops);
3466 if (err)
3467 goto err_reset_pci_ops;
3469 #endif
3470 if (platform_bus_type.iommu_ops != ops) {
3471 err = bus_set_iommu(&platform_bus_type, ops);
3472 if (err)
3473 goto err_reset_amba_ops;
3476 return 0;
3478 err_reset_amba_ops:
3479 #ifdef CONFIG_ARM_AMBA
3480 bus_set_iommu(&amba_bustype, NULL);
3481 #endif
3482 err_reset_pci_ops: __maybe_unused;
3483 #ifdef CONFIG_PCI
3484 bus_set_iommu(&pci_bus_type, NULL);
3485 #endif
3486 return err;
3489 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3490 resource_size_t size)
3492 struct resource res = {
3493 .flags = IORESOURCE_MEM,
3494 .start = start,
3495 .end = start + size - 1,
3498 return devm_ioremap_resource(dev, &res);
3501 static int arm_smmu_device_probe(struct platform_device *pdev)
3503 int irq, ret;
3504 struct resource *res;
3505 resource_size_t ioaddr;
3506 struct arm_smmu_device *smmu;
3507 struct device *dev = &pdev->dev;
3508 bool bypass;
3510 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3511 if (!smmu) {
3512 dev_err(dev, "failed to allocate arm_smmu_device\n");
3513 return -ENOMEM;
3515 smmu->dev = dev;
3517 if (dev->of_node) {
3518 ret = arm_smmu_device_dt_probe(pdev, smmu);
3519 } else {
3520 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3521 if (ret == -ENODEV)
3522 return ret;
3525 /* Set bypass mode according to firmware probing result */
3526 bypass = !!ret;
3528 /* Base address */
3529 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3530 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3531 dev_err(dev, "MMIO region too small (%pr)\n", res);
3532 return -EINVAL;
3534 ioaddr = res->start;
3537 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3538 * the PMCG registers which are reserved by the PMU driver.
3540 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3541 if (IS_ERR(smmu->base))
3542 return PTR_ERR(smmu->base);
3544 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3545 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3546 ARM_SMMU_REG_SZ);
3547 if (IS_ERR(smmu->page1))
3548 return PTR_ERR(smmu->page1);
3549 } else {
3550 smmu->page1 = smmu->base;
3553 /* Interrupt lines */
3555 irq = platform_get_irq_byname_optional(pdev, "combined");
3556 if (irq > 0)
3557 smmu->combined_irq = irq;
3558 else {
3559 irq = platform_get_irq_byname_optional(pdev, "eventq");
3560 if (irq > 0)
3561 smmu->evtq.q.irq = irq;
3563 irq = platform_get_irq_byname_optional(pdev, "priq");
3564 if (irq > 0)
3565 smmu->priq.q.irq = irq;
3567 irq = platform_get_irq_byname_optional(pdev, "gerror");
3568 if (irq > 0)
3569 smmu->gerr_irq = irq;
3571 /* Probe the h/w */
3572 ret = arm_smmu_device_hw_probe(smmu);
3573 if (ret)
3574 return ret;
3576 /* Initialise in-memory data structures */
3577 ret = arm_smmu_init_structures(smmu);
3578 if (ret)
3579 return ret;
3581 /* Record our private device structure */
3582 platform_set_drvdata(pdev, smmu);
3584 /* Reset the device */
3585 ret = arm_smmu_device_reset(smmu, bypass);
3586 if (ret)
3587 return ret;
3589 /* And we're up. Go go go! */
3590 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3591 "smmu3.%pa", &ioaddr);
3592 if (ret)
3593 return ret;
3595 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3596 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3598 ret = iommu_device_register(&smmu->iommu);
3599 if (ret) {
3600 dev_err(dev, "Failed to register iommu\n");
3601 return ret;
3604 return arm_smmu_set_bus_ops(&arm_smmu_ops);
3607 static int arm_smmu_device_remove(struct platform_device *pdev)
3609 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3611 arm_smmu_set_bus_ops(NULL);
3612 iommu_device_unregister(&smmu->iommu);
3613 iommu_device_sysfs_remove(&smmu->iommu);
3614 arm_smmu_device_disable(smmu);
3616 return 0;
3619 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3621 arm_smmu_device_remove(pdev);
3624 static const struct of_device_id arm_smmu_of_match[] = {
3625 { .compatible = "arm,smmu-v3", },
3626 { },
3628 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3630 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3632 arm_smmu_sva_notifier_synchronize();
3633 platform_driver_unregister(drv);
3636 static struct platform_driver arm_smmu_driver = {
3637 .driver = {
3638 .name = "arm-smmu-v3",
3639 .of_match_table = arm_smmu_of_match,
3640 .suppress_bind_attrs = true,
3642 .probe = arm_smmu_device_probe,
3643 .remove = arm_smmu_device_remove,
3644 .shutdown = arm_smmu_device_shutdown,
3646 module_driver(arm_smmu_driver, platform_driver_register,
3647 arm_smmu_driver_unregister);
3649 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3650 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3651 MODULE_ALIAS("platform:arm-smmu-v3");
3652 MODULE_LICENSE("GPL v2");