1 // SPDX-License-Identifier: GPL-2.0
3 * intel-pasid.c - PASID idr, table and entry manipulation
5 * Copyright (C) 2018 Intel Corporation
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
10 #define pr_fmt(fmt) "DMAR: " fmt
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/intel-iommu.h>
16 #include <linux/iommu.h>
17 #include <linux/memory.h>
18 #include <linux/pci.h>
19 #include <linux/pci-ats.h>
20 #include <linux/spinlock.h>
22 #include "intel-pasid.h"
25 * Intel IOMMU system wide PASID name space:
27 static DEFINE_SPINLOCK(pasid_lock
);
28 u32 intel_pasid_max_id
= PASID_MAX
;
29 static DEFINE_IDR(pasid_idr
);
31 int intel_pasid_alloc_id(void *ptr
, int start
, int end
, gfp_t gfp
)
35 min
= max_t(int, start
, PASID_MIN
);
36 max
= min_t(int, end
, intel_pasid_max_id
);
38 WARN_ON(in_interrupt());
40 spin_lock(&pasid_lock
);
41 ret
= idr_alloc(&pasid_idr
, ptr
, min
, max
, GFP_ATOMIC
);
42 spin_unlock(&pasid_lock
);
48 void intel_pasid_free_id(int pasid
)
50 spin_lock(&pasid_lock
);
51 idr_remove(&pasid_idr
, pasid
);
52 spin_unlock(&pasid_lock
);
55 void *intel_pasid_lookup_id(int pasid
)
59 spin_lock(&pasid_lock
);
60 p
= idr_find(&pasid_idr
, pasid
);
61 spin_unlock(&pasid_lock
);
67 * Per device pasid table management:
70 device_attach_pasid_table(struct device_domain_info
*info
,
71 struct pasid_table
*pasid_table
)
73 info
->pasid_table
= pasid_table
;
74 list_add(&info
->table
, &pasid_table
->dev
);
78 device_detach_pasid_table(struct device_domain_info
*info
,
79 struct pasid_table
*pasid_table
)
81 info
->pasid_table
= NULL
;
82 list_del(&info
->table
);
85 struct pasid_table_opaque
{
86 struct pasid_table
**pasid_table
;
92 static int search_pasid_table(struct device_domain_info
*info
, void *opaque
)
94 struct pasid_table_opaque
*data
= opaque
;
96 if (info
->iommu
->segment
== data
->segment
&&
97 info
->bus
== data
->bus
&&
98 info
->devfn
== data
->devfn
&&
100 *data
->pasid_table
= info
->pasid_table
;
107 static int get_alias_pasid_table(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
109 struct pasid_table_opaque
*data
= opaque
;
111 data
->segment
= pci_domain_nr(pdev
->bus
);
112 data
->bus
= PCI_BUS_NUM(alias
);
113 data
->devfn
= alias
& 0xff;
115 return for_each_device_domain(&search_pasid_table
, data
);
119 * Allocate a pasid table for @dev. It should be called in a
120 * single-thread context.
122 int intel_pasid_alloc_table(struct device
*dev
)
124 struct device_domain_info
*info
;
125 struct pasid_table
*pasid_table
;
126 struct pasid_table_opaque data
;
133 info
= dev
->archdata
.iommu
;
134 if (WARN_ON(!info
|| !dev_is_pci(dev
) || info
->pasid_table
))
137 /* DMA alias device already has a pasid table, use it: */
138 data
.pasid_table
= &pasid_table
;
139 ret
= pci_for_each_dma_alias(to_pci_dev(dev
),
140 &get_alias_pasid_table
, &data
);
144 pasid_table
= kzalloc(sizeof(*pasid_table
), GFP_KERNEL
);
147 INIT_LIST_HEAD(&pasid_table
->dev
);
149 if (info
->pasid_supported
)
150 max_pasid
= min_t(int, pci_max_pasids(to_pci_dev(dev
)),
153 size
= max_pasid
>> (PASID_PDE_SHIFT
- 3);
154 order
= size
? get_order(size
) : 0;
155 pages
= alloc_pages_node(info
->iommu
->node
,
156 GFP_KERNEL
| __GFP_ZERO
, order
);
162 pasid_table
->table
= page_address(pages
);
163 pasid_table
->order
= order
;
164 pasid_table
->max_pasid
= 1 << (order
+ PAGE_SHIFT
+ 3);
167 device_attach_pasid_table(info
, pasid_table
);
172 void intel_pasid_free_table(struct device
*dev
)
174 struct device_domain_info
*info
;
175 struct pasid_table
*pasid_table
;
176 struct pasid_dir_entry
*dir
;
177 struct pasid_entry
*table
;
180 info
= dev
->archdata
.iommu
;
181 if (!info
|| !dev_is_pci(dev
) || !info
->pasid_table
)
184 pasid_table
= info
->pasid_table
;
185 device_detach_pasid_table(info
, pasid_table
);
187 if (!list_empty(&pasid_table
->dev
))
190 /* Free scalable mode PASID directory tables: */
191 dir
= pasid_table
->table
;
192 max_pde
= pasid_table
->max_pasid
>> PASID_PDE_SHIFT
;
193 for (i
= 0; i
< max_pde
; i
++) {
194 table
= get_pasid_table_from_pde(&dir
[i
]);
195 free_pgtable_page(table
);
198 free_pages((unsigned long)pasid_table
->table
, pasid_table
->order
);
202 struct pasid_table
*intel_pasid_get_table(struct device
*dev
)
204 struct device_domain_info
*info
;
206 info
= dev
->archdata
.iommu
;
210 return info
->pasid_table
;
213 int intel_pasid_get_dev_max_id(struct device
*dev
)
215 struct device_domain_info
*info
;
217 info
= dev
->archdata
.iommu
;
218 if (!info
|| !info
->pasid_table
)
221 return info
->pasid_table
->max_pasid
;
224 struct pasid_entry
*intel_pasid_get_entry(struct device
*dev
, int pasid
)
226 struct device_domain_info
*info
;
227 struct pasid_table
*pasid_table
;
228 struct pasid_dir_entry
*dir
;
229 struct pasid_entry
*entries
;
230 int dir_index
, index
;
232 pasid_table
= intel_pasid_get_table(dev
);
233 if (WARN_ON(!pasid_table
|| pasid
< 0 ||
234 pasid
>= intel_pasid_get_dev_max_id(dev
)))
237 dir
= pasid_table
->table
;
238 info
= dev
->archdata
.iommu
;
239 dir_index
= pasid
>> PASID_PDE_SHIFT
;
240 index
= pasid
& PASID_PTE_MASK
;
242 spin_lock(&pasid_lock
);
243 entries
= get_pasid_table_from_pde(&dir
[dir_index
]);
245 entries
= alloc_pgtable_page(info
->iommu
->node
);
247 spin_unlock(&pasid_lock
);
251 WRITE_ONCE(dir
[dir_index
].val
,
252 (u64
)virt_to_phys(entries
) | PASID_PTE_PRESENT
);
254 spin_unlock(&pasid_lock
);
256 return &entries
[index
];
260 * Interfaces for PASID table entry manipulation:
262 static inline void pasid_clear_entry(struct pasid_entry
*pe
)
264 WRITE_ONCE(pe
->val
[0], 0);
265 WRITE_ONCE(pe
->val
[1], 0);
266 WRITE_ONCE(pe
->val
[2], 0);
267 WRITE_ONCE(pe
->val
[3], 0);
268 WRITE_ONCE(pe
->val
[4], 0);
269 WRITE_ONCE(pe
->val
[5], 0);
270 WRITE_ONCE(pe
->val
[6], 0);
271 WRITE_ONCE(pe
->val
[7], 0);
274 static void intel_pasid_clear_entry(struct device
*dev
, int pasid
)
276 struct pasid_entry
*pe
;
278 pe
= intel_pasid_get_entry(dev
, pasid
);
282 pasid_clear_entry(pe
);
285 static inline void pasid_set_bits(u64
*ptr
, u64 mask
, u64 bits
)
289 old
= READ_ONCE(*ptr
);
290 WRITE_ONCE(*ptr
, (old
& ~mask
) | bits
);
294 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
298 pasid_set_domain_id(struct pasid_entry
*pe
, u64 value
)
300 pasid_set_bits(&pe
->val
[1], GENMASK_ULL(15, 0), value
);
304 * Get domain ID value of a scalable mode PASID entry.
307 pasid_get_domain_id(struct pasid_entry
*pe
)
309 return (u16
)(READ_ONCE(pe
->val
[1]) & GENMASK_ULL(15, 0));
313 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
314 * of a scalable mode PASID entry.
317 pasid_set_slptr(struct pasid_entry
*pe
, u64 value
)
319 pasid_set_bits(&pe
->val
[0], VTD_PAGE_MASK
, value
);
323 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
327 pasid_set_address_width(struct pasid_entry
*pe
, u64 value
)
329 pasid_set_bits(&pe
->val
[0], GENMASK_ULL(4, 2), value
<< 2);
333 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
334 * of a scalable mode PASID entry.
337 pasid_set_translation_type(struct pasid_entry
*pe
, u64 value
)
339 pasid_set_bits(&pe
->val
[0], GENMASK_ULL(8, 6), value
<< 6);
343 * Enable fault processing by clearing the FPD(Fault Processing
344 * Disable) field (Bit 1) of a scalable mode PASID entry.
346 static inline void pasid_set_fault_enable(struct pasid_entry
*pe
)
348 pasid_set_bits(&pe
->val
[0], 1 << 1, 0);
352 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
353 * scalable mode PASID entry.
355 static inline void pasid_set_sre(struct pasid_entry
*pe
)
357 pasid_set_bits(&pe
->val
[2], 1 << 0, 1);
361 * Setup the P(Present) field (Bit 0) of a scalable mode PASID
364 static inline void pasid_set_present(struct pasid_entry
*pe
)
366 pasid_set_bits(&pe
->val
[0], 1 << 0, 1);
370 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
373 static inline void pasid_set_page_snoop(struct pasid_entry
*pe
, bool value
)
375 pasid_set_bits(&pe
->val
[1], 1 << 23, value
<< 23);
379 * Setup the First Level Page table Pointer field (Bit 140~191)
380 * of a scalable mode PASID entry.
383 pasid_set_flptr(struct pasid_entry
*pe
, u64 value
)
385 pasid_set_bits(&pe
->val
[2], VTD_PAGE_MASK
, value
);
389 * Setup the First Level Paging Mode field (Bit 130~131) of a
390 * scalable mode PASID entry.
393 pasid_set_flpm(struct pasid_entry
*pe
, u64 value
)
395 pasid_set_bits(&pe
->val
[2], GENMASK_ULL(3, 2), value
<< 2);
399 pasid_cache_invalidation_with_pasid(struct intel_iommu
*iommu
,
404 desc
.qw0
= QI_PC_DID(did
) | QI_PC_PASID_SEL
| QI_PC_PASID(pasid
);
409 qi_submit_sync(&desc
, iommu
);
413 iotlb_invalidation_with_pasid(struct intel_iommu
*iommu
, u16 did
, u32 pasid
)
417 desc
.qw0
= QI_EIOTLB_PASID(pasid
) | QI_EIOTLB_DID(did
) |
418 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID
) | QI_EIOTLB_TYPE
;
423 qi_submit_sync(&desc
, iommu
);
427 devtlb_invalidation_with_pasid(struct intel_iommu
*iommu
,
428 struct device
*dev
, int pasid
)
430 struct device_domain_info
*info
;
431 u16 sid
, qdep
, pfsid
;
433 info
= dev
->archdata
.iommu
;
434 if (!info
|| !info
->ats_enabled
)
437 sid
= info
->bus
<< 8 | info
->devfn
;
438 qdep
= info
->ats_qdep
;
441 qi_flush_dev_iotlb(iommu
, sid
, pfsid
, qdep
, 0, 64 - VTD_PAGE_SHIFT
);
444 void intel_pasid_tear_down_entry(struct intel_iommu
*iommu
,
445 struct device
*dev
, int pasid
)
447 struct pasid_entry
*pte
;
450 pte
= intel_pasid_get_entry(dev
, pasid
);
454 did
= pasid_get_domain_id(pte
);
455 intel_pasid_clear_entry(dev
, pasid
);
457 if (!ecap_coherent(iommu
->ecap
))
458 clflush_cache_range(pte
, sizeof(*pte
));
460 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
461 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
463 /* Device IOTLB doesn't need to be flushed in caching mode. */
464 if (!cap_caching_mode(iommu
->cap
))
465 devtlb_invalidation_with_pasid(iommu
, dev
, pasid
);
469 * Set up the scalable mode pasid table entry for first only
472 int intel_pasid_setup_first_level(struct intel_iommu
*iommu
,
473 struct device
*dev
, pgd_t
*pgd
,
474 int pasid
, u16 did
, int flags
)
476 struct pasid_entry
*pte
;
478 if (!ecap_flts(iommu
->ecap
)) {
479 pr_err("No first level translation support on %s\n",
484 pte
= intel_pasid_get_entry(dev
, pasid
);
488 pasid_clear_entry(pte
);
490 /* Setup the first level page table pointer: */
491 pasid_set_flptr(pte
, (u64
)__pa(pgd
));
492 if (flags
& PASID_FLAG_SUPERVISOR_MODE
) {
493 if (!ecap_srs(iommu
->ecap
)) {
494 pr_err("No supervisor request support on %s\n",
502 /* Both CPU and IOMMU paging mode need to match */
503 if (cpu_feature_enabled(X86_FEATURE_LA57
)) {
504 if (cap_5lp_support(iommu
->cap
)) {
505 pasid_set_flpm(pte
, 1);
507 pr_err("VT-d has no 5-level paging support for CPU\n");
508 pasid_clear_entry(pte
);
512 #endif /* CONFIG_X86 */
514 pasid_set_domain_id(pte
, did
);
515 pasid_set_address_width(pte
, iommu
->agaw
);
516 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
518 /* Setup Present and PASID Granular Transfer Type: */
519 pasid_set_translation_type(pte
, 1);
520 pasid_set_present(pte
);
522 if (!ecap_coherent(iommu
->ecap
))
523 clflush_cache_range(pte
, sizeof(*pte
));
525 if (cap_caching_mode(iommu
->cap
)) {
526 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
527 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
529 iommu_flush_write_buffer(iommu
);
536 * Set up the scalable mode pasid entry for second only translation type.
538 int intel_pasid_setup_second_level(struct intel_iommu
*iommu
,
539 struct dmar_domain
*domain
,
540 struct device
*dev
, int pasid
)
542 struct pasid_entry
*pte
;
549 * If hardware advertises no support for second level
550 * translation, return directly.
552 if (!ecap_slts(iommu
->ecap
)) {
553 pr_err("No second level translation support on %s\n",
559 * Skip top levels of page tables for iommu which has less agaw
560 * than default. Unnecessary for PT mode.
563 for (agaw
= domain
->agaw
; agaw
> iommu
->agaw
; agaw
--) {
564 pgd
= phys_to_virt(dma_pte_addr(pgd
));
565 if (!dma_pte_present(pgd
)) {
566 dev_err(dev
, "Invalid domain page table\n");
571 pgd_val
= virt_to_phys(pgd
);
572 did
= domain
->iommu_did
[iommu
->seq_id
];
574 pte
= intel_pasid_get_entry(dev
, pasid
);
576 dev_err(dev
, "Failed to get pasid entry of PASID %d\n", pasid
);
580 pasid_clear_entry(pte
);
581 pasid_set_domain_id(pte
, did
);
582 pasid_set_slptr(pte
, pgd_val
);
583 pasid_set_address_width(pte
, agaw
);
584 pasid_set_translation_type(pte
, 2);
585 pasid_set_fault_enable(pte
);
586 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
589 * Since it is a second level only translation setup, we should
590 * set SRE bit as well (addresses are expected to be GPAs).
593 pasid_set_present(pte
);
595 if (!ecap_coherent(iommu
->ecap
))
596 clflush_cache_range(pte
, sizeof(*pte
));
598 if (cap_caching_mode(iommu
->cap
)) {
599 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
600 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
602 iommu_flush_write_buffer(iommu
);
609 * Set up the scalable mode pasid entry for passthrough translation type.
611 int intel_pasid_setup_pass_through(struct intel_iommu
*iommu
,
612 struct dmar_domain
*domain
,
613 struct device
*dev
, int pasid
)
615 u16 did
= FLPT_DEFAULT_DID
;
616 struct pasid_entry
*pte
;
618 pte
= intel_pasid_get_entry(dev
, pasid
);
620 dev_err(dev
, "Failed to get pasid entry of PASID %d\n", pasid
);
624 pasid_clear_entry(pte
);
625 pasid_set_domain_id(pte
, did
);
626 pasid_set_address_width(pte
, iommu
->agaw
);
627 pasid_set_translation_type(pte
, 4);
628 pasid_set_fault_enable(pte
);
629 pasid_set_page_snoop(pte
, !!ecap_smpwc(iommu
->ecap
));
632 * We should set SRE bit as well since the addresses are expected
636 pasid_set_present(pte
);
638 if (!ecap_coherent(iommu
->ecap
))
639 clflush_cache_range(pte
, sizeof(*pte
));
641 if (cap_caching_mode(iommu
->cap
)) {
642 pasid_cache_invalidation_with_pasid(iommu
, did
, pasid
);
643 iotlb_invalidation_with_pasid(iommu
, did
, pasid
);
645 iommu_flush_write_buffer(iommu
);