1 // SPDX-License-Identifier: GPL-2.0
3 * intel-pasid.c - PASID idr, table and entry manipulation
5 * Copyright (C) 2018 Intel Corporation
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
10 #define pr_fmt(fmt) "DMAR: " fmt
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/intel-iommu.h>
16 #include <linux/iommu.h>
17 #include <linux/memory.h>
18 #include <linux/pci.h>
19 #include <linux/pci-ats.h>
20 #include <linux/spinlock.h>
22 #include "intel-pasid.h"
25 * Intel IOMMU system wide PASID name space:
27 static DEFINE_SPINLOCK(pasid_lock
);
28 u32 intel_pasid_max_id
= PASID_MAX
;
31 * Per device pasid table management:
34 device_attach_pasid_table(struct device_domain_info
*info
,
35 struct pasid_table
*pasid_table
)
37 info
->pasid_table
= pasid_table
;
38 list_add(&info
->table
, &pasid_table
->dev
);
42 device_detach_pasid_table(struct device_domain_info
*info
,
43 struct pasid_table
*pasid_table
)
45 info
->pasid_table
= NULL
;
46 list_del(&info
->table
);
49 struct pasid_table_opaque
{
50 struct pasid_table
**pasid_table
;
56 static int search_pasid_table(struct device_domain_info
*info
, void *opaque
)
58 struct pasid_table_opaque
*data
= opaque
;
60 if (info
->iommu
->segment
== data
->segment
&&
61 info
->bus
== data
->bus
&&
62 info
->devfn
== data
->devfn
&&
64 *data
->pasid_table
= info
->pasid_table
;
71 static int get_alias_pasid_table(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
73 struct pasid_table_opaque
*data
= opaque
;
75 data
->segment
= pci_domain_nr(pdev
->bus
);
76 data
->bus
= PCI_BUS_NUM(alias
);
77 data
->devfn
= alias
& 0xff;
79 return for_each_device_domain(&search_pasid_table
, data
);
83 * Allocate a pasid table for @dev. It should be called in a
84 * single-thread context.
86 int intel_pasid_alloc_table(struct device
*dev
)
88 struct device_domain_info
*info
;
89 struct pasid_table
*pasid_table
;
90 struct pasid_table_opaque data
;
97 info
= dev
->archdata
.iommu
;
98 if (WARN_ON(!info
|| !dev_is_pci(dev
) || info
->pasid_table
))
101 /* DMA alias device already has a pasid table, use it: */
102 data
.pasid_table
= &pasid_table
;
103 ret
= pci_for_each_dma_alias(to_pci_dev(dev
),
104 &get_alias_pasid_table
, &data
);
108 pasid_table
= kzalloc(sizeof(*pasid_table
), GFP_KERNEL
);
111 INIT_LIST_HEAD(&pasid_table
->dev
);
113 if (info
->pasid_supported
)
114 max_pasid
= min_t(int, pci_max_pasids(to_pci_dev(dev
)),
117 size
= max_pasid
>> (PASID_PDE_SHIFT
- 3);
118 order
= size
? get_order(size
) : 0;
119 pages
= alloc_pages_node(info
->iommu
->node
,
120 GFP_KERNEL
| __GFP_ZERO
, order
);
126 pasid_table
->table
= page_address(pages
);
127 pasid_table
->order
= order
;
128 pasid_table
->max_pasid
= 1 << (order
+ PAGE_SHIFT
+ 3);
131 device_attach_pasid_table(info
, pasid_table
);
136 void intel_pasid_free_table(struct device
*dev
)
138 struct device_domain_info
*info
;
139 struct pasid_table
*pasid_table
;
140 struct pasid_dir_entry
*dir
;
141 struct pasid_entry
*table
;
144 info
= dev
->archdata
.iommu
;
145 if (!info
|| !dev_is_pci(dev
) || !info
->pasid_table
)
148 pasid_table
= info
->pasid_table
;
149 device_detach_pasid_table(info
, pasid_table
);
151 if (!list_empty(&pasid_table
->dev
))
154 /* Free scalable mode PASID directory tables: */
155 dir
= pasid_table
->table
;
156 max_pde
= pasid_table
->max_pasid
>> PASID_PDE_SHIFT
;
157 for (i
= 0; i
< max_pde
; i
++) {
158 table
= get_pasid_table_from_pde(&dir
[i
]);
159 free_pgtable_page(table
);
162 free_pages((unsigned long)pasid_table
->table
, pasid_table
->order
);
166 struct pasid_table
*intel_pasid_get_table(struct device
*dev
)
168 struct device_domain_info
*info
;
170 info
= dev
->archdata
.iommu
;
174 return info
->pasid_table
;
177 int intel_pasid_get_dev_max_id(struct device
*dev
)
179 struct device_domain_info
*info
;
181 info
= dev
->archdata
.iommu
;
182 if (!info
|| !info
->pasid_table
)
185 return info
->pasid_table
->max_pasid
;
188 struct pasid_entry
*intel_pasid_get_entry(struct device
*dev
, int pasid
)
190 struct device_domain_info
*info
;
191 struct pasid_table
*pasid_table
;
192 struct pasid_dir_entry
*dir
;
193 struct pasid_entry
*entries
;
194 int dir_index
, index
;
196 pasid_table
= intel_pasid_get_table(dev
);
197 if (WARN_ON(!pasid_table
|| pasid
< 0 ||
198 pasid
>= intel_pasid_get_dev_max_id(dev
)))
201 dir
= pasid_table
->table
;
202 info
= dev
->archdata
.iommu
;
203 dir_index
= pasid
>> PASID_PDE_SHIFT
;
204 index
= pasid
& PASID_PTE_MASK
;
206 spin_lock(&pasid_lock
);
207 entries
= get_pasid_table_from_pde(&dir
[dir_index
]);
209 entries
= alloc_pgtable_page(info
->iommu
->node
);
211 spin_unlock(&pasid_lock
);
215 WRITE_ONCE(dir
[dir_index
].val
,
216 (u64
)virt_to_phys(entries
) | PASID_PTE_PRESENT
);
218 spin_unlock(&pasid_lock
);
220 return &entries
[index
];
224 * Interfaces for PASID table entry manipulation:
226 static inline void pasid_clear_entry(struct pasid_entry
*pe
)
228 WRITE_ONCE(pe
->val
[0], 0);
229 WRITE_ONCE(pe
->val
[1], 0);
230 WRITE_ONCE(pe
->val
[2], 0);
231 WRITE_ONCE(pe
->val
[3], 0);
232 WRITE_ONCE(pe
->val
[4], 0);
233 WRITE_ONCE(pe
->val
[5], 0);
234 WRITE_ONCE(pe
->val
[6], 0);
235 WRITE_ONCE(pe
->val
[7], 0);
238 static void intel_pasid_clear_entry(struct device
*dev
, int pasid
)
240 struct pasid_entry
*pe
;
242 pe
= intel_pasid_get_entry(dev
, pasid
);
246 pasid_clear_entry(pe
);
249 static inline void pasid_set_bits(u64
*ptr
, u64 mask
, u64 bits
)
253 old
= READ_ONCE(*ptr
);
254 WRITE_ONCE(*ptr
, (old
& ~mask
) | bits
);
258 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
262 pasid_set_domain_id(struct pasid_entry
*pe
, u64 value
)
264 pasid_set_bits(&pe
->val
[1], GENMASK_ULL(15, 0), value
);
268 * Get domain ID value of a scalable mode PASID entry.
271 pasid_get_domain_id(struct pasid_entry
*pe
)
273 return (u16
)(READ_ONCE(pe
->val
[1]) & GENMASK_ULL(15, 0));
277 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
278 * of a scalable mode PASID entry.
281 pasid_set_slptr(struct pasid_entry
*pe
, u64 value
)
283 pasid_set_bits(&pe
->val
[0], VTD_PAGE_MASK
, value
);
287 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
291 pasid_set_address_width(struct pasid_entry
*pe
, u64 value
)
293 pasid_set_bits(&pe
->val
[0], GENMASK_ULL(4, 2), value
<< 2);
297 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
298 * of a scalable mode PASID entry.
301 pasid_set_translation_type(struct pasid_entry
*pe
, u64 value
)
303 pasid_set_bits(&pe
->val
[0], GENMASK_ULL(8, 6), value
<< 6);
307 * Enable fault processing by clearing the FPD(Fault Processing
308 * Disable) field (Bit 1) of a scalable mode PASID entry.
310 static inline void pasid_set_fault_enable(struct pasid_entry
*pe
)
312 pasid_set_bits(&pe
->val
[0], 1 << 1, 0);
316 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
317 * scalable mode PASID entry.
319 static inline void pasid_set_sre(struct pasid_entry
*pe
)
321 pasid_set_bits(&pe
->val
[2], 1 << 0, 1);
325 * Setup the P(Present) field (Bit 0) of a scalable mode PASID
328 static inline void pasid_set_present(struct pasid_entry
*pe
)
330 pasid_set_bits(&pe
->val
[0], 1 << 0, 1);
334 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
337 static inline void pasid_set_page_snoop(struct pasid_entry
*pe
, bool value
)
339 pasid_set_bits(&pe
->val
[1], 1 << 23, value
<< 23);
343 * Setup the First Level Page table Pointer field (Bit 140~191)
344 * of a scalable mode PASID entry.
347 pasid_set_flptr(struct pasid_entry
*pe
, u64 value
)
349 pasid_set_bits(&pe
->val
[2], VTD_PAGE_MASK
, value
);
353 * Setup the First Level Paging Mode field (Bit 130~131) of a
354 * scalable mode PASID entry.
357 pasid_set_flpm(struct pasid_entry
*pe
, u64 value
)
359 pasid_set_bits(&pe
->val
[2], GENMASK_ULL(3, 2), value
<< 2);
363 pasid_cache_invalidation_with_pasid(struct intel_iommu
*iommu
,
368 desc
.qw0
= QI_PC_DID(did
) | QI_PC_PASID_SEL
| QI_PC_PASID(pasid
);
373 qi_submit_sync(&desc
, iommu
);
377 iotlb_invalidation_with_pasid(struct intel_iommu
*iommu
, u16 did
, u32 pasid
)
381 desc
.qw0
= QI_EIOTLB_PASID(pasid
) | QI_EIOTLB_DID(did
) |
382 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID
) | QI_EIOTLB_TYPE
;
387 qi_submit_sync(&desc
, iommu
);
391 devtlb_invalidation_with_pasid(struct intel_iommu
*iommu
,
392 struct device
*dev
, int pasid
)
394 struct device_domain_info
*info
;
395 u16 sid
, qdep
, pfsid
;
397 info
= dev
->archdata
.iommu
;
398 if (!info
|| !info
->ats_enabled
)
401 sid
= info
->bus
<< 8 | info
->devfn
;
402 qdep
= info
->ats_qdep
;
405 qi_flush_dev_iotlb(iommu
, sid
, pfsid
, qdep
, 0, 64 - VTD_PAGE_SHIFT
);
408 void intel_pasid_tear_down_entry(struct intel_iommu
*iommu
,
409 struct device
*dev
, int pasid
)
411 struct pasid_entry
*pte
;
414 pte
= intel_pasid_get_entry(dev
, pasid
);
418 did
= pasid_get_domain_id(pte
);
419 intel_pasid_clear_entry(dev
, pasid
);
421 if (!ecap_coherent(iommu
->ecap
))
422 clflush_cache_range(pte
, sizeof(*pte
));
424 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
425 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
427 /* Device IOTLB doesn't need to be flushed in caching mode. */
428 if (!cap_caching_mode(iommu
->cap
))
429 devtlb_invalidation_with_pasid(iommu
, dev
, pasid
);
432 static void pasid_flush_caches(struct intel_iommu
*iommu
,
433 struct pasid_entry
*pte
,
436 if (!ecap_coherent(iommu
->ecap
))
437 clflush_cache_range(pte
, sizeof(*pte
));
439 if (cap_caching_mode(iommu
->cap
)) {
440 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
441 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
443 iommu_flush_write_buffer(iommu
);
448 * Set up the scalable mode pasid table entry for first only
451 int intel_pasid_setup_first_level(struct intel_iommu
*iommu
,
452 struct device
*dev
, pgd_t
*pgd
,
453 int pasid
, u16 did
, int flags
)
455 struct pasid_entry
*pte
;
457 if (!ecap_flts(iommu
->ecap
)) {
458 pr_err("No first level translation support on %s\n",
463 pte
= intel_pasid_get_entry(dev
, pasid
);
467 pasid_clear_entry(pte
);
469 /* Setup the first level page table pointer: */
470 pasid_set_flptr(pte
, (u64
)__pa(pgd
));
471 if (flags
& PASID_FLAG_SUPERVISOR_MODE
) {
472 if (!ecap_srs(iommu
->ecap
)) {
473 pr_err("No supervisor request support on %s\n",
480 if (flags
& PASID_FLAG_FL5LP
) {
481 if (cap_5lp_support(iommu
->cap
)) {
482 pasid_set_flpm(pte
, 1);
484 pr_err("No 5-level paging support for first-level\n");
485 pasid_clear_entry(pte
);
490 pasid_set_domain_id(pte
, did
);
491 pasid_set_address_width(pte
, iommu
->agaw
);
492 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
494 /* Setup Present and PASID Granular Transfer Type: */
495 pasid_set_translation_type(pte
, 1);
496 pasid_set_present(pte
);
497 pasid_flush_caches(iommu
, pte
, pasid
, did
);
503 * Set up the scalable mode pasid entry for second only translation type.
505 int intel_pasid_setup_second_level(struct intel_iommu
*iommu
,
506 struct dmar_domain
*domain
,
507 struct device
*dev
, int pasid
)
509 struct pasid_entry
*pte
;
516 * If hardware advertises no support for second level
517 * translation, return directly.
519 if (!ecap_slts(iommu
->ecap
)) {
520 pr_err("No second level translation support on %s\n",
526 * Skip top levels of page tables for iommu which has less agaw
527 * than default. Unnecessary for PT mode.
530 for (agaw
= domain
->agaw
; agaw
> iommu
->agaw
; agaw
--) {
531 pgd
= phys_to_virt(dma_pte_addr(pgd
));
532 if (!dma_pte_present(pgd
)) {
533 dev_err(dev
, "Invalid domain page table\n");
538 pgd_val
= virt_to_phys(pgd
);
539 did
= domain
->iommu_did
[iommu
->seq_id
];
541 pte
= intel_pasid_get_entry(dev
, pasid
);
543 dev_err(dev
, "Failed to get pasid entry of PASID %d\n", pasid
);
547 pasid_clear_entry(pte
);
548 pasid_set_domain_id(pte
, did
);
549 pasid_set_slptr(pte
, pgd_val
);
550 pasid_set_address_width(pte
, agaw
);
551 pasid_set_translation_type(pte
, 2);
552 pasid_set_fault_enable(pte
);
553 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
556 * Since it is a second level only translation setup, we should
557 * set SRE bit as well (addresses are expected to be GPAs).
560 pasid_set_present(pte
);
561 pasid_flush_caches(iommu
, pte
, pasid
, did
);
567 * Set up the scalable mode pasid entry for passthrough translation type.
569 int intel_pasid_setup_pass_through(struct intel_iommu
*iommu
,
570 struct dmar_domain
*domain
,
571 struct device
*dev
, int pasid
)
573 u16 did
= FLPT_DEFAULT_DID
;
574 struct pasid_entry
*pte
;
576 pte
= intel_pasid_get_entry(dev
, pasid
);
578 dev_err(dev
, "Failed to get pasid entry of PASID %d\n", pasid
);
582 pasid_clear_entry(pte
);
583 pasid_set_domain_id(pte
, did
);
584 pasid_set_address_width(pte
, iommu
->agaw
);
585 pasid_set_translation_type(pte
, 4);
586 pasid_set_fault_enable(pte
);
587 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
590 * We should set SRE bit as well since the addresses are expected
594 pasid_set_present(pte
);
595 pasid_flush_caches(iommu
, pte
, pasid
, did
);