1 // SPDX-License-Identifier: GPL-2.0
3 * intel-pasid.c - PASID idr, table and entry manipulation
5 * Copyright (C) 2018 Intel Corporation
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
10 #define pr_fmt(fmt) "DMAR: " fmt
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/intel-iommu.h>
16 #include <linux/iommu.h>
17 #include <linux/memory.h>
18 #include <linux/pci.h>
19 #include <linux/pci-ats.h>
20 #include <linux/spinlock.h>
22 #include "intel-pasid.h"
25 * Intel IOMMU system wide PASID name space:
27 static DEFINE_SPINLOCK(pasid_lock
);
28 u32 intel_pasid_max_id
= PASID_MAX
;
29 static DEFINE_IDR(pasid_idr
);
31 int intel_pasid_alloc_id(void *ptr
, int start
, int end
, gfp_t gfp
)
35 min
= max_t(int, start
, PASID_MIN
);
36 max
= min_t(int, end
, intel_pasid_max_id
);
38 WARN_ON(in_interrupt());
40 spin_lock(&pasid_lock
);
41 ret
= idr_alloc(&pasid_idr
, ptr
, min
, max
, GFP_ATOMIC
);
42 spin_unlock(&pasid_lock
);
48 void intel_pasid_free_id(int pasid
)
50 spin_lock(&pasid_lock
);
51 idr_remove(&pasid_idr
, pasid
);
52 spin_unlock(&pasid_lock
);
55 void *intel_pasid_lookup_id(int pasid
)
59 spin_lock(&pasid_lock
);
60 p
= idr_find(&pasid_idr
, pasid
);
61 spin_unlock(&pasid_lock
);
67 * Per device pasid table management:
70 device_attach_pasid_table(struct device_domain_info
*info
,
71 struct pasid_table
*pasid_table
)
73 info
->pasid_table
= pasid_table
;
74 list_add(&info
->table
, &pasid_table
->dev
);
78 device_detach_pasid_table(struct device_domain_info
*info
,
79 struct pasid_table
*pasid_table
)
81 info
->pasid_table
= NULL
;
82 list_del(&info
->table
);
85 struct pasid_table_opaque
{
86 struct pasid_table
**pasid_table
;
92 static int search_pasid_table(struct device_domain_info
*info
, void *opaque
)
94 struct pasid_table_opaque
*data
= opaque
;
96 if (info
->iommu
->segment
== data
->segment
&&
97 info
->bus
== data
->bus
&&
98 info
->devfn
== data
->devfn
&&
100 *data
->pasid_table
= info
->pasid_table
;
107 static int get_alias_pasid_table(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
109 struct pasid_table_opaque
*data
= opaque
;
111 data
->segment
= pci_domain_nr(pdev
->bus
);
112 data
->bus
= PCI_BUS_NUM(alias
);
113 data
->devfn
= alias
& 0xff;
115 return for_each_device_domain(&search_pasid_table
, data
);
119 * Allocate a pasid table for @dev. It should be called in a
120 * single-thread context.
122 int intel_pasid_alloc_table(struct device
*dev
)
124 struct device_domain_info
*info
;
125 struct pasid_table
*pasid_table
;
126 struct pasid_table_opaque data
;
133 info
= dev
->archdata
.iommu
;
134 if (WARN_ON(!info
|| !dev_is_pci(dev
) || info
->pasid_table
))
137 /* DMA alias device already has a pasid table, use it: */
138 data
.pasid_table
= &pasid_table
;
139 ret
= pci_for_each_dma_alias(to_pci_dev(dev
),
140 &get_alias_pasid_table
, &data
);
144 pasid_table
= kzalloc(sizeof(*pasid_table
), GFP_KERNEL
);
147 INIT_LIST_HEAD(&pasid_table
->dev
);
149 if (info
->pasid_supported
)
150 max_pasid
= min_t(int, pci_max_pasids(to_pci_dev(dev
)),
153 size
= max_pasid
>> (PASID_PDE_SHIFT
- 3);
154 order
= size
? get_order(size
) : 0;
155 pages
= alloc_pages_node(info
->iommu
->node
,
156 GFP_KERNEL
| __GFP_ZERO
, order
);
162 pasid_table
->table
= page_address(pages
);
163 pasid_table
->order
= order
;
164 pasid_table
->max_pasid
= 1 << (order
+ PAGE_SHIFT
+ 3);
167 device_attach_pasid_table(info
, pasid_table
);
172 /* Get PRESENT bit of a PASID directory entry. */
174 pasid_pde_is_present(struct pasid_dir_entry
*pde
)
176 return READ_ONCE(pde
->val
) & PASID_PTE_PRESENT
;
179 /* Get PASID table from a PASID directory entry. */
180 static inline struct pasid_entry
*
181 get_pasid_table_from_pde(struct pasid_dir_entry
*pde
)
183 if (!pasid_pde_is_present(pde
))
186 return phys_to_virt(READ_ONCE(pde
->val
) & PDE_PFN_MASK
);
189 void intel_pasid_free_table(struct device
*dev
)
191 struct device_domain_info
*info
;
192 struct pasid_table
*pasid_table
;
193 struct pasid_dir_entry
*dir
;
194 struct pasid_entry
*table
;
197 info
= dev
->archdata
.iommu
;
198 if (!info
|| !dev_is_pci(dev
) || !info
->pasid_table
)
201 pasid_table
= info
->pasid_table
;
202 device_detach_pasid_table(info
, pasid_table
);
204 if (!list_empty(&pasid_table
->dev
))
207 /* Free scalable mode PASID directory tables: */
208 dir
= pasid_table
->table
;
209 max_pde
= pasid_table
->max_pasid
>> PASID_PDE_SHIFT
;
210 for (i
= 0; i
< max_pde
; i
++) {
211 table
= get_pasid_table_from_pde(&dir
[i
]);
212 free_pgtable_page(table
);
215 free_pages((unsigned long)pasid_table
->table
, pasid_table
->order
);
219 struct pasid_table
*intel_pasid_get_table(struct device
*dev
)
221 struct device_domain_info
*info
;
223 info
= dev
->archdata
.iommu
;
227 return info
->pasid_table
;
230 int intel_pasid_get_dev_max_id(struct device
*dev
)
232 struct device_domain_info
*info
;
234 info
= dev
->archdata
.iommu
;
235 if (!info
|| !info
->pasid_table
)
238 return info
->pasid_table
->max_pasid
;
241 struct pasid_entry
*intel_pasid_get_entry(struct device
*dev
, int pasid
)
243 struct device_domain_info
*info
;
244 struct pasid_table
*pasid_table
;
245 struct pasid_dir_entry
*dir
;
246 struct pasid_entry
*entries
;
247 int dir_index
, index
;
249 pasid_table
= intel_pasid_get_table(dev
);
250 if (WARN_ON(!pasid_table
|| pasid
< 0 ||
251 pasid
>= intel_pasid_get_dev_max_id(dev
)))
254 dir
= pasid_table
->table
;
255 info
= dev
->archdata
.iommu
;
256 dir_index
= pasid
>> PASID_PDE_SHIFT
;
257 index
= pasid
& PASID_PTE_MASK
;
259 spin_lock(&pasid_lock
);
260 entries
= get_pasid_table_from_pde(&dir
[dir_index
]);
262 entries
= alloc_pgtable_page(info
->iommu
->node
);
264 spin_unlock(&pasid_lock
);
268 WRITE_ONCE(dir
[dir_index
].val
,
269 (u64
)virt_to_phys(entries
) | PASID_PTE_PRESENT
);
271 spin_unlock(&pasid_lock
);
273 return &entries
[index
];
277 * Interfaces for PASID table entry manipulation:
279 static inline void pasid_clear_entry(struct pasid_entry
*pe
)
281 WRITE_ONCE(pe
->val
[0], 0);
282 WRITE_ONCE(pe
->val
[1], 0);
283 WRITE_ONCE(pe
->val
[2], 0);
284 WRITE_ONCE(pe
->val
[3], 0);
285 WRITE_ONCE(pe
->val
[4], 0);
286 WRITE_ONCE(pe
->val
[5], 0);
287 WRITE_ONCE(pe
->val
[6], 0);
288 WRITE_ONCE(pe
->val
[7], 0);
291 static void intel_pasid_clear_entry(struct device
*dev
, int pasid
)
293 struct pasid_entry
*pe
;
295 pe
= intel_pasid_get_entry(dev
, pasid
);
299 pasid_clear_entry(pe
);
302 static inline void pasid_set_bits(u64
*ptr
, u64 mask
, u64 bits
)
306 old
= READ_ONCE(*ptr
);
307 WRITE_ONCE(*ptr
, (old
& ~mask
) | bits
);
311 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
315 pasid_set_domain_id(struct pasid_entry
*pe
, u64 value
)
317 pasid_set_bits(&pe
->val
[1], GENMASK_ULL(15, 0), value
);
321 * Get domain ID value of a scalable mode PASID entry.
324 pasid_get_domain_id(struct pasid_entry
*pe
)
326 return (u16
)(READ_ONCE(pe
->val
[1]) & GENMASK_ULL(15, 0));
330 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
331 * of a scalable mode PASID entry.
334 pasid_set_slptr(struct pasid_entry
*pe
, u64 value
)
336 pasid_set_bits(&pe
->val
[0], VTD_PAGE_MASK
, value
);
340 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
344 pasid_set_address_width(struct pasid_entry
*pe
, u64 value
)
346 pasid_set_bits(&pe
->val
[0], GENMASK_ULL(4, 2), value
<< 2);
350 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
351 * of a scalable mode PASID entry.
354 pasid_set_translation_type(struct pasid_entry
*pe
, u64 value
)
356 pasid_set_bits(&pe
->val
[0], GENMASK_ULL(8, 6), value
<< 6);
360 * Enable fault processing by clearing the FPD(Fault Processing
361 * Disable) field (Bit 1) of a scalable mode PASID entry.
363 static inline void pasid_set_fault_enable(struct pasid_entry
*pe
)
365 pasid_set_bits(&pe
->val
[0], 1 << 1, 0);
369 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
370 * scalable mode PASID entry.
372 static inline void pasid_set_sre(struct pasid_entry
*pe
)
374 pasid_set_bits(&pe
->val
[2], 1 << 0, 1);
378 * Setup the P(Present) field (Bit 0) of a scalable mode PASID
381 static inline void pasid_set_present(struct pasid_entry
*pe
)
383 pasid_set_bits(&pe
->val
[0], 1 << 0, 1);
387 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
390 static inline void pasid_set_page_snoop(struct pasid_entry
*pe
, bool value
)
392 pasid_set_bits(&pe
->val
[1], 1 << 23, value
);
396 * Setup the First Level Page table Pointer field (Bit 140~191)
397 * of a scalable mode PASID entry.
400 pasid_set_flptr(struct pasid_entry
*pe
, u64 value
)
402 pasid_set_bits(&pe
->val
[2], VTD_PAGE_MASK
, value
);
406 * Setup the First Level Paging Mode field (Bit 130~131) of a
407 * scalable mode PASID entry.
410 pasid_set_flpm(struct pasid_entry
*pe
, u64 value
)
412 pasid_set_bits(&pe
->val
[2], GENMASK_ULL(3, 2), value
<< 2);
416 pasid_cache_invalidation_with_pasid(struct intel_iommu
*iommu
,
421 desc
.qw0
= QI_PC_DID(did
) | QI_PC_PASID_SEL
| QI_PC_PASID(pasid
);
426 qi_submit_sync(&desc
, iommu
);
430 iotlb_invalidation_with_pasid(struct intel_iommu
*iommu
, u16 did
, u32 pasid
)
434 desc
.qw0
= QI_EIOTLB_PASID(pasid
) | QI_EIOTLB_DID(did
) |
435 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID
) | QI_EIOTLB_TYPE
;
440 qi_submit_sync(&desc
, iommu
);
444 devtlb_invalidation_with_pasid(struct intel_iommu
*iommu
,
445 struct device
*dev
, int pasid
)
447 struct device_domain_info
*info
;
448 u16 sid
, qdep
, pfsid
;
450 info
= dev
->archdata
.iommu
;
451 if (!info
|| !info
->ats_enabled
)
454 sid
= info
->bus
<< 8 | info
->devfn
;
455 qdep
= info
->ats_qdep
;
458 qi_flush_dev_iotlb(iommu
, sid
, pfsid
, qdep
, 0, 64 - VTD_PAGE_SHIFT
);
461 void intel_pasid_tear_down_entry(struct intel_iommu
*iommu
,
462 struct device
*dev
, int pasid
)
464 struct pasid_entry
*pte
;
467 pte
= intel_pasid_get_entry(dev
, pasid
);
471 did
= pasid_get_domain_id(pte
);
472 intel_pasid_clear_entry(dev
, pasid
);
474 if (!ecap_coherent(iommu
->ecap
))
475 clflush_cache_range(pte
, sizeof(*pte
));
477 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
478 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
480 /* Device IOTLB doesn't need to be flushed in caching mode. */
481 if (!cap_caching_mode(iommu
->cap
))
482 devtlb_invalidation_with_pasid(iommu
, dev
, pasid
);
486 * Set up the scalable mode pasid table entry for first only
489 int intel_pasid_setup_first_level(struct intel_iommu
*iommu
,
490 struct device
*dev
, pgd_t
*pgd
,
491 int pasid
, u16 did
, int flags
)
493 struct pasid_entry
*pte
;
495 if (!ecap_flts(iommu
->ecap
)) {
496 pr_err("No first level translation support on %s\n",
501 pte
= intel_pasid_get_entry(dev
, pasid
);
505 pasid_clear_entry(pte
);
507 /* Setup the first level page table pointer: */
508 pasid_set_flptr(pte
, (u64
)__pa(pgd
));
509 if (flags
& PASID_FLAG_SUPERVISOR_MODE
) {
510 if (!ecap_srs(iommu
->ecap
)) {
511 pr_err("No supervisor request support on %s\n",
519 if (cpu_feature_enabled(X86_FEATURE_LA57
))
520 pasid_set_flpm(pte
, 1);
521 #endif /* CONFIG_X86 */
523 pasid_set_domain_id(pte
, did
);
524 pasid_set_address_width(pte
, iommu
->agaw
);
525 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
527 /* Setup Present and PASID Granular Transfer Type: */
528 pasid_set_translation_type(pte
, 1);
529 pasid_set_present(pte
);
531 if (!ecap_coherent(iommu
->ecap
))
532 clflush_cache_range(pte
, sizeof(*pte
));
534 if (cap_caching_mode(iommu
->cap
)) {
535 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
536 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
538 iommu_flush_write_buffer(iommu
);
545 * Set up the scalable mode pasid entry for second only translation type.
547 int intel_pasid_setup_second_level(struct intel_iommu
*iommu
,
548 struct dmar_domain
*domain
,
549 struct device
*dev
, int pasid
)
551 struct pasid_entry
*pte
;
558 * If hardware advertises no support for second level
559 * translation, return directly.
561 if (!ecap_slts(iommu
->ecap
)) {
562 pr_err("No second level translation support on %s\n",
568 * Skip top levels of page tables for iommu which has less agaw
569 * than default. Unnecessary for PT mode.
572 for (agaw
= domain
->agaw
; agaw
> iommu
->agaw
; agaw
--) {
573 pgd
= phys_to_virt(dma_pte_addr(pgd
));
574 if (!dma_pte_present(pgd
)) {
575 dev_err(dev
, "Invalid domain page table\n");
580 pgd_val
= virt_to_phys(pgd
);
581 did
= domain
->iommu_did
[iommu
->seq_id
];
583 pte
= intel_pasid_get_entry(dev
, pasid
);
585 dev_err(dev
, "Failed to get pasid entry of PASID %d\n", pasid
);
589 pasid_clear_entry(pte
);
590 pasid_set_domain_id(pte
, did
);
591 pasid_set_slptr(pte
, pgd_val
);
592 pasid_set_address_width(pte
, agaw
);
593 pasid_set_translation_type(pte
, 2);
594 pasid_set_fault_enable(pte
);
595 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
598 * Since it is a second level only translation setup, we should
599 * set SRE bit as well (addresses are expected to be GPAs).
602 pasid_set_present(pte
);
604 if (!ecap_coherent(iommu
->ecap
))
605 clflush_cache_range(pte
, sizeof(*pte
));
607 if (cap_caching_mode(iommu
->cap
)) {
608 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
609 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
611 iommu_flush_write_buffer(iommu
);
618 * Set up the scalable mode pasid entry for passthrough translation type.
620 int intel_pasid_setup_pass_through(struct intel_iommu
*iommu
,
621 struct dmar_domain
*domain
,
622 struct device
*dev
, int pasid
)
624 u16 did
= FLPT_DEFAULT_DID
;
625 struct pasid_entry
*pte
;
627 pte
= intel_pasid_get_entry(dev
, pasid
);
629 dev_err(dev
, "Failed to get pasid entry of PASID %d\n", pasid
);
633 pasid_clear_entry(pte
);
634 pasid_set_domain_id(pte
, did
);
635 pasid_set_address_width(pte
, iommu
->agaw
);
636 pasid_set_translation_type(pte
, 4);
637 pasid_set_fault_enable(pte
);
638 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
641 * We should set SRE bit as well since the addresses are expected
645 pasid_set_present(pte
);
647 if (!ecap_coherent(iommu
->ecap
))
648 clflush_cache_range(pte
, sizeof(*pte
));
650 if (cap_caching_mode(iommu
->cap
)) {
651 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
652 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
654 iommu_flush_write_buffer(iommu
);