1 // SPDX-License-Identifier: GPL-2.0
3 * intel-pasid.c - PASID idr, table and entry manipulation
5 * Copyright (C) 2018 Intel Corporation
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
10 #define pr_fmt(fmt) "DMAR: " fmt
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/iommu.h>
16 #include <linux/memory.h>
17 #include <linux/pci.h>
18 #include <linux/pci-ats.h>
19 #include <linux/spinlock.h>
23 #include "../iommu-pages.h"
26 * Intel IOMMU system wide PASID name space:
28 u32 intel_pasid_max_id
= PASID_MAX
;
31 * Per device pasid table management:
35 * Allocate a pasid table for @dev. It should be called in a
36 * single-thread context.
38 int intel_pasid_alloc_table(struct device
*dev
)
40 struct device_domain_info
*info
;
41 struct pasid_table
*pasid_table
;
42 struct pasid_dir_entry
*dir
;
47 info
= dev_iommu_priv_get(dev
);
48 if (WARN_ON(!info
|| !dev_is_pci(dev
)))
50 if (WARN_ON(info
->pasid_table
))
53 pasid_table
= kzalloc(sizeof(*pasid_table
), GFP_KERNEL
);
57 if (info
->pasid_supported
)
58 max_pasid
= min_t(u32
, pci_max_pasids(to_pci_dev(dev
)),
61 size
= max_pasid
>> (PASID_PDE_SHIFT
- 3);
62 order
= size
? get_order(size
) : 0;
63 dir
= iommu_alloc_pages_node(info
->iommu
->node
, GFP_KERNEL
, order
);
69 pasid_table
->table
= dir
;
70 pasid_table
->order
= order
;
71 pasid_table
->max_pasid
= 1 << (order
+ PAGE_SHIFT
+ 3);
72 info
->pasid_table
= pasid_table
;
74 if (!ecap_coherent(info
->iommu
->ecap
))
75 clflush_cache_range(pasid_table
->table
, (1 << order
) * PAGE_SIZE
);
80 void intel_pasid_free_table(struct device
*dev
)
82 struct device_domain_info
*info
;
83 struct pasid_table
*pasid_table
;
84 struct pasid_dir_entry
*dir
;
85 struct pasid_entry
*table
;
88 info
= dev_iommu_priv_get(dev
);
89 if (!info
|| !dev_is_pci(dev
) || !info
->pasid_table
)
92 pasid_table
= info
->pasid_table
;
93 info
->pasid_table
= NULL
;
95 /* Free scalable mode PASID directory tables: */
96 dir
= pasid_table
->table
;
97 max_pde
= pasid_table
->max_pasid
>> PASID_PDE_SHIFT
;
98 for (i
= 0; i
< max_pde
; i
++) {
99 table
= get_pasid_table_from_pde(&dir
[i
]);
100 iommu_free_page(table
);
103 iommu_free_pages(pasid_table
->table
, pasid_table
->order
);
107 struct pasid_table
*intel_pasid_get_table(struct device
*dev
)
109 struct device_domain_info
*info
;
111 info
= dev_iommu_priv_get(dev
);
115 return info
->pasid_table
;
118 static int intel_pasid_get_dev_max_id(struct device
*dev
)
120 struct device_domain_info
*info
;
122 info
= dev_iommu_priv_get(dev
);
123 if (!info
|| !info
->pasid_table
)
126 return info
->pasid_table
->max_pasid
;
129 static struct pasid_entry
*intel_pasid_get_entry(struct device
*dev
, u32 pasid
)
131 struct device_domain_info
*info
;
132 struct pasid_table
*pasid_table
;
133 struct pasid_dir_entry
*dir
;
134 struct pasid_entry
*entries
;
135 int dir_index
, index
;
137 pasid_table
= intel_pasid_get_table(dev
);
138 if (WARN_ON(!pasid_table
|| pasid
>= intel_pasid_get_dev_max_id(dev
)))
141 dir
= pasid_table
->table
;
142 info
= dev_iommu_priv_get(dev
);
143 dir_index
= pasid
>> PASID_PDE_SHIFT
;
144 index
= pasid
& PASID_PTE_MASK
;
147 entries
= get_pasid_table_from_pde(&dir
[dir_index
]);
151 entries
= iommu_alloc_page_node(info
->iommu
->node
, GFP_ATOMIC
);
156 * The pasid directory table entry won't be freed after
157 * allocation. No worry about the race with free and
158 * clear. However, this entry might be populated by others
159 * while we are preparing it. Use theirs with a retry.
162 if (!try_cmpxchg64(&dir
[dir_index
].val
, &tmp
,
163 (u64
)virt_to_phys(entries
) | PASID_PTE_PRESENT
)) {
164 iommu_free_page(entries
);
167 if (!ecap_coherent(info
->iommu
->ecap
)) {
168 clflush_cache_range(entries
, VTD_PAGE_SIZE
);
169 clflush_cache_range(&dir
[dir_index
].val
, sizeof(*dir
));
173 return &entries
[index
];
177 * Interfaces for PASID table entry manipulation:
180 intel_pasid_clear_entry(struct device
*dev
, u32 pasid
, bool fault_ignore
)
182 struct pasid_entry
*pe
;
184 pe
= intel_pasid_get_entry(dev
, pasid
);
188 if (fault_ignore
&& pasid_pte_is_present(pe
))
189 pasid_clear_entry_with_fpd(pe
);
191 pasid_clear_entry(pe
);
195 pasid_cache_invalidation_with_pasid(struct intel_iommu
*iommu
,
200 desc
.qw0
= QI_PC_DID(did
) | QI_PC_GRAN(QI_PC_PASID_SEL
) |
201 QI_PC_PASID(pasid
) | QI_PC_TYPE
;
206 qi_submit_sync(iommu
, &desc
, 1, 0);
210 devtlb_invalidation_with_pasid(struct intel_iommu
*iommu
,
211 struct device
*dev
, u32 pasid
)
213 struct device_domain_info
*info
;
214 u16 sid
, qdep
, pfsid
;
216 info
= dev_iommu_priv_get(dev
);
217 if (!info
|| !info
->ats_enabled
)
220 if (pci_dev_is_disconnected(to_pci_dev(dev
)))
223 sid
= PCI_DEVID(info
->bus
, info
->devfn
);
224 qdep
= info
->ats_qdep
;
228 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
229 * devTLB flush w/o PASID should be used. For non-zero PASID under
230 * SVA usage, device could do DMA with multiple PASIDs. It is more
231 * efficient to flush devTLB specific to the PASID.
233 if (pasid
== IOMMU_NO_PASID
)
234 qi_flush_dev_iotlb(iommu
, sid
, pfsid
, qdep
, 0, 64 - VTD_PAGE_SHIFT
);
236 qi_flush_dev_iotlb_pasid(iommu
, sid
, pfsid
, pasid
, qdep
, 0, 64 - VTD_PAGE_SHIFT
);
239 void intel_pasid_tear_down_entry(struct intel_iommu
*iommu
, struct device
*dev
,
240 u32 pasid
, bool fault_ignore
)
242 struct pasid_entry
*pte
;
245 spin_lock(&iommu
->lock
);
246 pte
= intel_pasid_get_entry(dev
, pasid
);
247 if (WARN_ON(!pte
) || !pasid_pte_is_present(pte
)) {
248 spin_unlock(&iommu
->lock
);
252 did
= pasid_get_domain_id(pte
);
253 pgtt
= pasid_pte_get_pgtt(pte
);
254 intel_pasid_clear_entry(dev
, pasid
, fault_ignore
);
255 spin_unlock(&iommu
->lock
);
257 if (!ecap_coherent(iommu
->ecap
))
258 clflush_cache_range(pte
, sizeof(*pte
));
260 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
262 if (pgtt
== PASID_ENTRY_PGTT_PT
|| pgtt
== PASID_ENTRY_PGTT_FL_ONLY
)
263 qi_flush_piotlb(iommu
, did
, pasid
, 0, -1, 0);
265 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
267 devtlb_invalidation_with_pasid(iommu
, dev
, pasid
);
268 intel_iommu_drain_pasid_prq(dev
, pasid
);
272 * This function flushes cache for a newly setup pasid table entry.
273 * Caller of it should not modify the in-use pasid table entries.
275 static void pasid_flush_caches(struct intel_iommu
*iommu
,
276 struct pasid_entry
*pte
,
279 if (!ecap_coherent(iommu
->ecap
))
280 clflush_cache_range(pte
, sizeof(*pte
));
282 if (cap_caching_mode(iommu
->cap
)) {
283 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
284 qi_flush_piotlb(iommu
, did
, pasid
, 0, -1, 0);
286 iommu_flush_write_buffer(iommu
);
291 * This function is supposed to be used after caller updates the fields
292 * except for the SSADE and P bit of a pasid table entry. It does the
294 * - Flush cacheline if needed
295 * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“
298 static void intel_pasid_flush_present(struct intel_iommu
*iommu
,
301 struct pasid_entry
*pte
)
303 if (!ecap_coherent(iommu
->ecap
))
304 clflush_cache_range(pte
, sizeof(*pte
));
307 * VT-d spec 5.0 table28 states guides for cache invalidation:
309 * - PASID-selective-within-Domain PASID-cache invalidation
310 * - PASID-selective PASID-based IOTLB invalidation
311 * - If (pasid is RID_PASID)
312 * - Global Device-TLB invalidation to affected functions
314 * - PASID-based Device-TLB invalidation (with S=1 and
315 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
317 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
318 qi_flush_piotlb(iommu
, did
, pasid
, 0, -1, 0);
320 devtlb_invalidation_with_pasid(iommu
, dev
, pasid
);
324 * Set up the scalable mode pasid table entry for first only
327 static void pasid_pte_config_first_level(struct intel_iommu
*iommu
,
328 struct pasid_entry
*pte
,
329 pgd_t
*pgd
, u16 did
, int flags
)
331 lockdep_assert_held(&iommu
->lock
);
333 pasid_clear_entry(pte
);
335 /* Setup the first level page table pointer: */
336 pasid_set_flptr(pte
, (u64
)__pa(pgd
));
338 if (flags
& PASID_FLAG_FL5LP
)
339 pasid_set_flpm(pte
, 1);
341 if (flags
& PASID_FLAG_PAGE_SNOOP
)
342 pasid_set_pgsnp(pte
);
344 pasid_set_domain_id(pte
, did
);
345 pasid_set_address_width(pte
, iommu
->agaw
);
346 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
348 /* Setup Present and PASID Granular Transfer Type: */
349 pasid_set_translation_type(pte
, PASID_ENTRY_PGTT_FL_ONLY
);
350 pasid_set_present(pte
);
353 int intel_pasid_setup_first_level(struct intel_iommu
*iommu
,
354 struct device
*dev
, pgd_t
*pgd
,
355 u32 pasid
, u16 did
, int flags
)
357 struct pasid_entry
*pte
;
359 if (!ecap_flts(iommu
->ecap
)) {
360 pr_err("No first level translation support on %s\n",
365 if ((flags
& PASID_FLAG_FL5LP
) && !cap_fl5lp_support(iommu
->cap
)) {
366 pr_err("No 5-level paging support for first-level on %s\n",
371 spin_lock(&iommu
->lock
);
372 pte
= intel_pasid_get_entry(dev
, pasid
);
374 spin_unlock(&iommu
->lock
);
378 if (pasid_pte_is_present(pte
)) {
379 spin_unlock(&iommu
->lock
);
383 pasid_pte_config_first_level(iommu
, pte
, pgd
, did
, flags
);
385 spin_unlock(&iommu
->lock
);
387 pasid_flush_caches(iommu
, pte
, pasid
, did
);
392 int intel_pasid_replace_first_level(struct intel_iommu
*iommu
,
393 struct device
*dev
, pgd_t
*pgd
,
394 u32 pasid
, u16 did
, u16 old_did
,
397 struct pasid_entry
*pte
, new_pte
;
399 if (!ecap_flts(iommu
->ecap
)) {
400 pr_err("No first level translation support on %s\n",
405 if ((flags
& PASID_FLAG_FL5LP
) && !cap_fl5lp_support(iommu
->cap
)) {
406 pr_err("No 5-level paging support for first-level on %s\n",
411 pasid_pte_config_first_level(iommu
, &new_pte
, pgd
, did
, flags
);
413 spin_lock(&iommu
->lock
);
414 pte
= intel_pasid_get_entry(dev
, pasid
);
416 spin_unlock(&iommu
->lock
);
420 if (!pasid_pte_is_present(pte
)) {
421 spin_unlock(&iommu
->lock
);
425 WARN_ON(old_did
!= pasid_get_domain_id(pte
));
428 spin_unlock(&iommu
->lock
);
430 intel_pasid_flush_present(iommu
, dev
, pasid
, old_did
, pte
);
431 intel_iommu_drain_pasid_prq(dev
, pasid
);
437 * Set up the scalable mode pasid entry for second only translation type.
439 static void pasid_pte_config_second_level(struct intel_iommu
*iommu
,
440 struct pasid_entry
*pte
,
441 u64 pgd_val
, int agaw
, u16 did
,
444 lockdep_assert_held(&iommu
->lock
);
446 pasid_clear_entry(pte
);
447 pasid_set_domain_id(pte
, did
);
448 pasid_set_slptr(pte
, pgd_val
);
449 pasid_set_address_width(pte
, agaw
);
450 pasid_set_translation_type(pte
, PASID_ENTRY_PGTT_SL_ONLY
);
451 pasid_set_fault_enable(pte
);
452 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
454 pasid_set_ssade(pte
);
456 pasid_set_present(pte
);
459 int intel_pasid_setup_second_level(struct intel_iommu
*iommu
,
460 struct dmar_domain
*domain
,
461 struct device
*dev
, u32 pasid
)
463 struct pasid_entry
*pte
;
469 * If hardware advertises no support for second level
470 * translation, return directly.
472 if (!ecap_slts(iommu
->ecap
)) {
473 pr_err("No second level translation support on %s\n",
479 pgd_val
= virt_to_phys(pgd
);
480 did
= domain_id_iommu(domain
, iommu
);
482 spin_lock(&iommu
->lock
);
483 pte
= intel_pasid_get_entry(dev
, pasid
);
485 spin_unlock(&iommu
->lock
);
489 if (pasid_pte_is_present(pte
)) {
490 spin_unlock(&iommu
->lock
);
494 pasid_pte_config_second_level(iommu
, pte
, pgd_val
, domain
->agaw
,
495 did
, domain
->dirty_tracking
);
496 spin_unlock(&iommu
->lock
);
498 pasid_flush_caches(iommu
, pte
, pasid
, did
);
503 int intel_pasid_replace_second_level(struct intel_iommu
*iommu
,
504 struct dmar_domain
*domain
,
505 struct device
*dev
, u16 old_did
,
508 struct pasid_entry
*pte
, new_pte
;
514 * If hardware advertises no support for second level
515 * translation, return directly.
517 if (!ecap_slts(iommu
->ecap
)) {
518 pr_err("No second level translation support on %s\n",
524 pgd_val
= virt_to_phys(pgd
);
525 did
= domain_id_iommu(domain
, iommu
);
527 pasid_pte_config_second_level(iommu
, &new_pte
, pgd_val
,
529 domain
->dirty_tracking
);
531 spin_lock(&iommu
->lock
);
532 pte
= intel_pasid_get_entry(dev
, pasid
);
534 spin_unlock(&iommu
->lock
);
538 if (!pasid_pte_is_present(pte
)) {
539 spin_unlock(&iommu
->lock
);
543 WARN_ON(old_did
!= pasid_get_domain_id(pte
));
546 spin_unlock(&iommu
->lock
);
548 intel_pasid_flush_present(iommu
, dev
, pasid
, old_did
, pte
);
549 intel_iommu_drain_pasid_prq(dev
, pasid
);
555 * Set up dirty tracking on a second only or nested translation type.
557 int intel_pasid_setup_dirty_tracking(struct intel_iommu
*iommu
,
558 struct device
*dev
, u32 pasid
,
561 struct pasid_entry
*pte
;
564 spin_lock(&iommu
->lock
);
566 pte
= intel_pasid_get_entry(dev
, pasid
);
568 spin_unlock(&iommu
->lock
);
570 dev
, "Failed to get pasid entry of PASID %d\n", pasid
);
574 did
= pasid_get_domain_id(pte
);
575 pgtt
= pasid_pte_get_pgtt(pte
);
576 if (pgtt
!= PASID_ENTRY_PGTT_SL_ONLY
&&
577 pgtt
!= PASID_ENTRY_PGTT_NESTED
) {
578 spin_unlock(&iommu
->lock
);
581 "Dirty tracking not supported on translation type %d\n",
586 if (pasid_get_ssade(pte
) == enabled
) {
587 spin_unlock(&iommu
->lock
);
592 pasid_set_ssade(pte
);
594 pasid_clear_ssade(pte
);
595 spin_unlock(&iommu
->lock
);
597 if (!ecap_coherent(iommu
->ecap
))
598 clflush_cache_range(pte
, sizeof(*pte
));
601 * From VT-d spec table 25 "Guidance to Software for Invalidations":
603 * - PASID-selective-within-Domain PASID-cache invalidation
604 * If (PGTT=SS or Nested)
605 * - Domain-selective IOTLB invalidation
607 * - PASID-selective PASID-based IOTLB invalidation
608 * - If (pasid is RID_PASID)
609 * - Global Device-TLB invalidation to affected functions
611 * - PASID-based Device-TLB invalidation (with S=1 and
612 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
614 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
616 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
618 devtlb_invalidation_with_pasid(iommu
, dev
, pasid
);
624 * Set up the scalable mode pasid entry for passthrough translation type.
626 static void pasid_pte_config_pass_through(struct intel_iommu
*iommu
,
627 struct pasid_entry
*pte
, u16 did
)
629 lockdep_assert_held(&iommu
->lock
);
631 pasid_clear_entry(pte
);
632 pasid_set_domain_id(pte
, did
);
633 pasid_set_address_width(pte
, iommu
->agaw
);
634 pasid_set_translation_type(pte
, PASID_ENTRY_PGTT_PT
);
635 pasid_set_fault_enable(pte
);
636 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
637 pasid_set_present(pte
);
640 int intel_pasid_setup_pass_through(struct intel_iommu
*iommu
,
641 struct device
*dev
, u32 pasid
)
643 u16 did
= FLPT_DEFAULT_DID
;
644 struct pasid_entry
*pte
;
646 spin_lock(&iommu
->lock
);
647 pte
= intel_pasid_get_entry(dev
, pasid
);
649 spin_unlock(&iommu
->lock
);
653 if (pasid_pte_is_present(pte
)) {
654 spin_unlock(&iommu
->lock
);
658 pasid_pte_config_pass_through(iommu
, pte
, did
);
659 spin_unlock(&iommu
->lock
);
661 pasid_flush_caches(iommu
, pte
, pasid
, did
);
666 int intel_pasid_replace_pass_through(struct intel_iommu
*iommu
,
667 struct device
*dev
, u16 old_did
,
670 struct pasid_entry
*pte
, new_pte
;
671 u16 did
= FLPT_DEFAULT_DID
;
673 pasid_pte_config_pass_through(iommu
, &new_pte
, did
);
675 spin_lock(&iommu
->lock
);
676 pte
= intel_pasid_get_entry(dev
, pasid
);
678 spin_unlock(&iommu
->lock
);
682 if (!pasid_pte_is_present(pte
)) {
683 spin_unlock(&iommu
->lock
);
687 WARN_ON(old_did
!= pasid_get_domain_id(pte
));
690 spin_unlock(&iommu
->lock
);
692 intel_pasid_flush_present(iommu
, dev
, pasid
, old_did
, pte
);
693 intel_iommu_drain_pasid_prq(dev
, pasid
);
699 * Set the page snoop control for a pasid entry which has been set up.
701 void intel_pasid_setup_page_snoop_control(struct intel_iommu
*iommu
,
702 struct device
*dev
, u32 pasid
)
704 struct pasid_entry
*pte
;
707 spin_lock(&iommu
->lock
);
708 pte
= intel_pasid_get_entry(dev
, pasid
);
709 if (WARN_ON(!pte
|| !pasid_pte_is_present(pte
))) {
710 spin_unlock(&iommu
->lock
);
714 pasid_set_pgsnp(pte
);
715 did
= pasid_get_domain_id(pte
);
716 spin_unlock(&iommu
->lock
);
718 intel_pasid_flush_present(iommu
, dev
, pasid
, did
, pte
);
721 static void pasid_pte_config_nestd(struct intel_iommu
*iommu
,
722 struct pasid_entry
*pte
,
723 struct iommu_hwpt_vtd_s1
*s1_cfg
,
724 struct dmar_domain
*s2_domain
,
727 struct dma_pte
*pgd
= s2_domain
->pgd
;
729 lockdep_assert_held(&iommu
->lock
);
731 pasid_clear_entry(pte
);
733 if (s1_cfg
->addr_width
== ADDR_WIDTH_5LEVEL
)
734 pasid_set_flpm(pte
, 1);
736 pasid_set_flptr(pte
, s1_cfg
->pgtbl_addr
);
738 if (s1_cfg
->flags
& IOMMU_VTD_S1_SRE
) {
740 if (s1_cfg
->flags
& IOMMU_VTD_S1_WPE
)
744 if (s1_cfg
->flags
& IOMMU_VTD_S1_EAFE
)
747 if (s2_domain
->force_snooping
)
748 pasid_set_pgsnp(pte
);
750 pasid_set_slptr(pte
, virt_to_phys(pgd
));
751 pasid_set_fault_enable(pte
);
752 pasid_set_domain_id(pte
, did
);
753 pasid_set_address_width(pte
, s2_domain
->agaw
);
754 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
755 if (s2_domain
->dirty_tracking
)
756 pasid_set_ssade(pte
);
757 pasid_set_translation_type(pte
, PASID_ENTRY_PGTT_NESTED
);
758 pasid_set_present(pte
);
762 * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
763 * @iommu: IOMMU which the device belong to
764 * @dev: Device to be set up for translation
765 * @pasid: PASID to be programmed in the device PASID table
766 * @domain: User stage-1 domain nested on a stage-2 domain
768 * This is used for nested translation. The input domain should be
769 * nested type and nested on a parent with 'is_nested_parent' flag
772 int intel_pasid_setup_nested(struct intel_iommu
*iommu
, struct device
*dev
,
773 u32 pasid
, struct dmar_domain
*domain
)
775 struct iommu_hwpt_vtd_s1
*s1_cfg
= &domain
->s1_cfg
;
776 struct dmar_domain
*s2_domain
= domain
->s2_domain
;
777 u16 did
= domain_id_iommu(domain
, iommu
);
778 struct pasid_entry
*pte
;
780 /* Address width should match the address width supported by hardware */
781 switch (s1_cfg
->addr_width
) {
782 case ADDR_WIDTH_4LEVEL
:
784 case ADDR_WIDTH_5LEVEL
:
785 if (!cap_fl5lp_support(iommu
->cap
)) {
786 dev_err_ratelimited(dev
,
787 "5-level paging not supported\n");
792 dev_err_ratelimited(dev
, "Invalid stage-1 address width %d\n",
797 if ((s1_cfg
->flags
& IOMMU_VTD_S1_SRE
) && !ecap_srs(iommu
->ecap
)) {
798 pr_err_ratelimited("No supervisor request support on %s\n",
803 if ((s1_cfg
->flags
& IOMMU_VTD_S1_EAFE
) && !ecap_eafs(iommu
->ecap
)) {
804 pr_err_ratelimited("No extended access flag support on %s\n",
809 spin_lock(&iommu
->lock
);
810 pte
= intel_pasid_get_entry(dev
, pasid
);
812 spin_unlock(&iommu
->lock
);
815 if (pasid_pte_is_present(pte
)) {
816 spin_unlock(&iommu
->lock
);
820 pasid_pte_config_nestd(iommu
, pte
, s1_cfg
, s2_domain
, did
);
821 spin_unlock(&iommu
->lock
);
823 pasid_flush_caches(iommu
, pte
, pasid
, did
);
828 int intel_pasid_replace_nested(struct intel_iommu
*iommu
,
829 struct device
*dev
, u32 pasid
,
830 u16 old_did
, struct dmar_domain
*domain
)
832 struct iommu_hwpt_vtd_s1
*s1_cfg
= &domain
->s1_cfg
;
833 struct dmar_domain
*s2_domain
= domain
->s2_domain
;
834 u16 did
= domain_id_iommu(domain
, iommu
);
835 struct pasid_entry
*pte
, new_pte
;
837 /* Address width should match the address width supported by hardware */
838 switch (s1_cfg
->addr_width
) {
839 case ADDR_WIDTH_4LEVEL
:
841 case ADDR_WIDTH_5LEVEL
:
842 if (!cap_fl5lp_support(iommu
->cap
)) {
843 dev_err_ratelimited(dev
,
844 "5-level paging not supported\n");
849 dev_err_ratelimited(dev
, "Invalid stage-1 address width %d\n",
854 if ((s1_cfg
->flags
& IOMMU_VTD_S1_SRE
) && !ecap_srs(iommu
->ecap
)) {
855 pr_err_ratelimited("No supervisor request support on %s\n",
860 if ((s1_cfg
->flags
& IOMMU_VTD_S1_EAFE
) && !ecap_eafs(iommu
->ecap
)) {
861 pr_err_ratelimited("No extended access flag support on %s\n",
866 pasid_pte_config_nestd(iommu
, &new_pte
, s1_cfg
, s2_domain
, did
);
868 spin_lock(&iommu
->lock
);
869 pte
= intel_pasid_get_entry(dev
, pasid
);
871 spin_unlock(&iommu
->lock
);
875 if (!pasid_pte_is_present(pte
)) {
876 spin_unlock(&iommu
->lock
);
880 WARN_ON(old_did
!= pasid_get_domain_id(pte
));
883 spin_unlock(&iommu
->lock
);
885 intel_pasid_flush_present(iommu
, dev
, pasid
, old_did
, pte
);
886 intel_iommu_drain_pasid_prq(dev
, pasid
);
892 * Interfaces to setup or teardown a pasid table to the scalable-mode
893 * context table entry:
896 static void device_pasid_table_teardown(struct device
*dev
, u8 bus
, u8 devfn
)
898 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
899 struct intel_iommu
*iommu
= info
->iommu
;
900 struct context_entry
*context
;
903 spin_lock(&iommu
->lock
);
904 context
= iommu_context_addr(iommu
, bus
, devfn
, false);
906 spin_unlock(&iommu
->lock
);
910 did
= context_domain_id(context
);
911 context_clear_entry(context
);
912 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
913 spin_unlock(&iommu
->lock
);
914 intel_context_flush_present(info
, context
, did
, false);
917 static int pci_pasid_table_teardown(struct pci_dev
*pdev
, u16 alias
, void *data
)
919 struct device
*dev
= data
;
921 if (dev
== &pdev
->dev
)
922 device_pasid_table_teardown(dev
, PCI_BUS_NUM(alias
), alias
& 0xff);
927 void intel_pasid_teardown_sm_context(struct device
*dev
)
929 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
931 if (!dev_is_pci(dev
)) {
932 device_pasid_table_teardown(dev
, info
->bus
, info
->devfn
);
936 pci_for_each_dma_alias(to_pci_dev(dev
), pci_pasid_table_teardown
, dev
);
940 * Get the PASID directory size for scalable mode context entry.
941 * Value of X in the PDTS field of a scalable mode context entry
942 * indicates PASID directory with 2^(X + 7) entries.
944 static unsigned long context_get_sm_pds(struct pasid_table
*table
)
946 unsigned long pds
, max_pde
;
948 max_pde
= table
->max_pasid
>> PASID_PDE_SHIFT
;
949 pds
= find_first_bit(&max_pde
, MAX_NR_PASID_BITS
);
956 static int context_entry_set_pasid_table(struct context_entry
*context
,
959 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
960 struct pasid_table
*table
= info
->pasid_table
;
961 struct intel_iommu
*iommu
= info
->iommu
;
964 context_clear_entry(context
);
966 pds
= context_get_sm_pds(table
);
967 context
->lo
= (u64
)virt_to_phys(table
->table
) | context_pdts(pds
);
968 context_set_sm_rid2pasid(context
, IOMMU_NO_PASID
);
970 if (info
->ats_supported
)
971 context_set_sm_dte(context
);
972 if (info
->pasid_supported
)
973 context_set_pasid(context
);
975 context_set_fault_enable(context
);
976 context_set_present(context
);
977 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
982 static int device_pasid_table_setup(struct device
*dev
, u8 bus
, u8 devfn
)
984 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
985 struct intel_iommu
*iommu
= info
->iommu
;
986 struct context_entry
*context
;
988 spin_lock(&iommu
->lock
);
989 context
= iommu_context_addr(iommu
, bus
, devfn
, true);
991 spin_unlock(&iommu
->lock
);
995 if (context_present(context
) && !context_copied(iommu
, bus
, devfn
)) {
996 spin_unlock(&iommu
->lock
);
1000 if (context_copied(iommu
, bus
, devfn
)) {
1001 context_clear_entry(context
);
1002 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
1005 * For kdump cases, old valid entries may be cached due to
1006 * the in-flight DMA and copied pgtable, but there is no
1007 * unmapping behaviour for them, thus we need explicit cache
1008 * flushes for all affected domain IDs and PASIDs used in
1009 * the copied PASID table. Given that we have no idea about
1010 * which domain IDs and PASIDs were used in the copied tables,
1011 * upgrade them to global PASID and IOTLB cache invalidation.
1013 iommu
->flush
.flush_context(iommu
, 0,
1014 PCI_DEVID(bus
, devfn
),
1015 DMA_CCMD_MASK_NOBIT
,
1016 DMA_CCMD_DEVICE_INVL
);
1017 qi_flush_pasid_cache(iommu
, 0, QI_PC_GLOBAL
, 0);
1018 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
1019 devtlb_invalidation_with_pasid(iommu
, dev
, IOMMU_NO_PASID
);
1022 * At this point, the device is supposed to finish reset at
1023 * its driver probe stage, so no in-flight DMA will exist,
1024 * and we don't need to worry anymore hereafter.
1026 clear_context_copied(iommu
, bus
, devfn
);
1029 context_entry_set_pasid_table(context
, dev
);
1030 spin_unlock(&iommu
->lock
);
1033 * It's a non-present to present mapping. If hardware doesn't cache
1034 * non-present entry we don't need to flush the caches. If it does
1035 * cache non-present entries, then it does so in the special
1036 * domain #0, which we have to flush:
1038 if (cap_caching_mode(iommu
->cap
)) {
1039 iommu
->flush
.flush_context(iommu
, 0,
1040 PCI_DEVID(bus
, devfn
),
1041 DMA_CCMD_MASK_NOBIT
,
1042 DMA_CCMD_DEVICE_INVL
);
1043 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_DSI_FLUSH
);
1049 static int pci_pasid_table_setup(struct pci_dev
*pdev
, u16 alias
, void *data
)
1051 struct device
*dev
= data
;
1053 if (dev
!= &pdev
->dev
)
1056 return device_pasid_table_setup(dev
, PCI_BUS_NUM(alias
), alias
& 0xff);
1060 * Set the device's PASID table to its context table entry.
1062 * The PASID table is set to the context entries of both device itself
1063 * and its alias requester ID for DMA.
1065 int intel_pasid_setup_sm_context(struct device
*dev
)
1067 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
1069 if (!dev_is_pci(dev
))
1070 return device_pasid_table_setup(dev
, info
->bus
, info
->devfn
);
1072 return pci_for_each_dma_alias(to_pci_dev(dev
), pci_pasid_table_setup
, dev
);
1076 * Global Device-TLB invalidation following changes in a context entry which
1079 static void __context_flush_dev_iotlb(struct device_domain_info
*info
)
1081 if (!info
->ats_enabled
)
1084 qi_flush_dev_iotlb(info
->iommu
, PCI_DEVID(info
->bus
, info
->devfn
),
1085 info
->pfsid
, info
->ats_qdep
, 0, MAX_AGAW_PFN_WIDTH
);
1088 * There is no guarantee that the device DMA is stopped when it reaches
1089 * here. Therefore, always attempt the extra device TLB invalidation
1090 * quirk. The impact on performance is acceptable since this is not a
1091 * performance-critical path.
1093 quirk_extra_dev_tlb_flush(info
, 0, MAX_AGAW_PFN_WIDTH
, IOMMU_NO_PASID
,
1098 * Cache invalidations after change in a context table entry that was present
1099 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
1100 * IOMMU is in scalable mode and all PASID table entries of the device were
1101 * non-present, set flush_domains to false. Otherwise, true.
1103 void intel_context_flush_present(struct device_domain_info
*info
,
1104 struct context_entry
*context
,
1105 u16 did
, bool flush_domains
)
1107 struct intel_iommu
*iommu
= info
->iommu
;
1108 struct pasid_entry
*pte
;
1112 * Device-selective context-cache invalidation. The Domain-ID field
1113 * of the Context-cache Invalidate Descriptor is ignored by hardware
1114 * when operating in scalable mode. Therefore the @did value doesn't
1115 * matter in scalable mode.
1117 iommu
->flush
.flush_context(iommu
, did
, PCI_DEVID(info
->bus
, info
->devfn
),
1118 DMA_CCMD_MASK_NOBIT
, DMA_CCMD_DEVICE_INVL
);
1122 * - Domain-selective IOTLB invalidation
1123 * - Global Device-TLB invalidation to all affected functions
1125 if (!sm_supported(iommu
)) {
1126 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
1127 __context_flush_dev_iotlb(info
);
1133 * For scalable mode:
1134 * - Domain-selective PASID-cache invalidation to affected domains
1135 * - Domain-selective IOTLB invalidation to affected domains
1136 * - Global Device-TLB invalidation to affected functions
1138 if (flush_domains
) {
1140 * If the IOMMU is running in scalable mode and there might
1141 * be potential PASID translations, the caller should hold
1142 * the lock to ensure that context changes and cache flushes
1145 assert_spin_locked(&iommu
->lock
);
1146 for (i
= 0; i
< info
->pasid_table
->max_pasid
; i
++) {
1147 pte
= intel_pasid_get_entry(info
->dev
, i
);
1148 if (!pte
|| !pasid_pte_is_present(pte
))
1151 did
= pasid_get_domain_id(pte
);
1152 qi_flush_pasid_cache(iommu
, did
, QI_PC_ALL_PASIDS
, 0);
1153 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
1157 __context_flush_dev_iotlb(info
);