1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-contiguous.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
50 #include "irq_remapping.h"
51 #include "intel-pasid.h"
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
84 /* page table handling */
85 #define LEVEL_STRIDE (9)
86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
104 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
106 static inline int agaw_to_level(int agaw
)
111 static inline int agaw_to_width(int agaw
)
113 return min_t(int, 30 + agaw
* LEVEL_STRIDE
, MAX_AGAW_WIDTH
);
116 static inline int width_to_agaw(int width
)
118 return DIV_ROUND_UP(width
- 30, LEVEL_STRIDE
);
121 static inline unsigned int level_to_offset_bits(int level
)
123 return (level
- 1) * LEVEL_STRIDE
;
126 static inline int pfn_level_offset(unsigned long pfn
, int level
)
128 return (pfn
>> level_to_offset_bits(level
)) & LEVEL_MASK
;
131 static inline unsigned long level_mask(int level
)
133 return -1UL << level_to_offset_bits(level
);
136 static inline unsigned long level_size(int level
)
138 return 1UL << level_to_offset_bits(level
);
141 static inline unsigned long align_to_level(unsigned long pfn
, int level
)
143 return (pfn
+ level_size(level
) - 1) & level_mask(level
);
146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl
)
148 return 1 << min_t(int, (lvl
- 1) * LEVEL_STRIDE
, MAX_AGAW_PFN_WIDTH
);
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn
)
155 return dma_pfn
>> (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn
)
160 return mm_pfn
<< (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
162 static inline unsigned long page_to_dma_pfn(struct page
*pg
)
164 return mm_to_dma_pfn(page_to_pfn(pg
));
166 static inline unsigned long virt_to_dma_pfn(void *p
)
168 return page_to_dma_pfn(virt_to_page(p
));
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu
**g_iommus
;
174 static void __init
check_tylersburg_isoch(void);
175 static int rwbf_quirk
;
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
181 static int force_on
= 0;
182 int intel_iommu_tboot_noforce
;
183 static int no_platform_optin
;
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
191 static phys_addr_t
root_entry_lctp(struct root_entry
*re
)
196 return re
->lo
& VTD_PAGE_MASK
;
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
203 static phys_addr_t
root_entry_uctp(struct root_entry
*re
)
208 return re
->hi
& VTD_PAGE_MASK
;
211 static inline void context_clear_pasid_enable(struct context_entry
*context
)
213 context
->lo
&= ~(1ULL << 11);
216 static inline bool context_pasid_enabled(struct context_entry
*context
)
218 return !!(context
->lo
& (1ULL << 11));
221 static inline void context_set_copied(struct context_entry
*context
)
223 context
->hi
|= (1ull << 3);
226 static inline bool context_copied(struct context_entry
*context
)
228 return !!(context
->hi
& (1ULL << 3));
231 static inline bool __context_present(struct context_entry
*context
)
233 return (context
->lo
& 1);
236 bool context_present(struct context_entry
*context
)
238 return context_pasid_enabled(context
) ?
239 __context_present(context
) :
240 __context_present(context
) && !context_copied(context
);
243 static inline void context_set_present(struct context_entry
*context
)
248 static inline void context_set_fault_enable(struct context_entry
*context
)
250 context
->lo
&= (((u64
)-1) << 2) | 1;
253 static inline void context_set_translation_type(struct context_entry
*context
,
256 context
->lo
&= (((u64
)-1) << 4) | 3;
257 context
->lo
|= (value
& 3) << 2;
260 static inline void context_set_address_root(struct context_entry
*context
,
263 context
->lo
&= ~VTD_PAGE_MASK
;
264 context
->lo
|= value
& VTD_PAGE_MASK
;
267 static inline void context_set_address_width(struct context_entry
*context
,
270 context
->hi
|= value
& 7;
273 static inline void context_set_domain_id(struct context_entry
*context
,
276 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
279 static inline int context_domain_id(struct context_entry
*c
)
281 return((c
->hi
>> 8) & 0xffff);
284 static inline void context_clear_entry(struct context_entry
*context
)
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
296 static struct dmar_domain
*si_domain
;
297 static int hw_pass_through
= 1;
299 /* si_domain contains mulitple devices */
300 #define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
303 * This is a DMA domain allocated through the iommu domain allocation
304 * interface. But one or more devices belonging to this domain have
305 * been chosen to use a private domain. We should avoid to use the
306 * map/unmap/iova_to_phys APIs on it.
308 #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
311 * When VT-d works in the scalable mode, it allows DMA translation to
312 * happen through either first level or second level page table. This
313 * bit marks that the DMA translation for the domain goes through the
314 * first level page table, otherwise, it goes through the second level.
316 #define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2)
319 * Domain represents a virtual machine which demands iommu nested
320 * translation mode support.
322 #define DOMAIN_FLAG_NESTING_MODE BIT(3)
324 #define for_each_domain_iommu(idx, domain) \
325 for (idx = 0; idx < g_num_of_iommus; idx++) \
326 if (domain->iommu_refcnt[idx])
328 struct dmar_rmrr_unit
{
329 struct list_head list
; /* list of rmrr units */
330 struct acpi_dmar_header
*hdr
; /* ACPI header */
331 u64 base_address
; /* reserved base address*/
332 u64 end_address
; /* reserved end address */
333 struct dmar_dev_scope
*devices
; /* target devices */
334 int devices_cnt
; /* target device count */
337 struct dmar_atsr_unit
{
338 struct list_head list
; /* list of ATSR units */
339 struct acpi_dmar_header
*hdr
; /* ACPI header */
340 struct dmar_dev_scope
*devices
; /* target devices */
341 int devices_cnt
; /* target device count */
342 u8 include_all
:1; /* include all ports */
345 static LIST_HEAD(dmar_atsr_units
);
346 static LIST_HEAD(dmar_rmrr_units
);
348 #define for_each_rmrr_units(rmrr) \
349 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
351 /* bitmap for indexing intel_iommus */
352 static int g_num_of_iommus
;
354 static void domain_exit(struct dmar_domain
*domain
);
355 static void domain_remove_dev_info(struct dmar_domain
*domain
);
356 static void dmar_remove_one_dev_info(struct device
*dev
);
357 static void __dmar_remove_one_dev_info(struct device_domain_info
*info
);
358 static void domain_context_clear(struct intel_iommu
*iommu
,
360 static int domain_detach_iommu(struct dmar_domain
*domain
,
361 struct intel_iommu
*iommu
);
362 static bool device_is_rmrr_locked(struct device
*dev
);
363 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
365 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
368 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
369 int dmar_disabled
= 0;
371 int dmar_disabled
= 1;
372 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
374 #ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
375 int intel_iommu_sm
= 1;
378 #endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
380 int intel_iommu_enabled
= 0;
381 EXPORT_SYMBOL_GPL(intel_iommu_enabled
);
383 static int dmar_map_gfx
= 1;
384 static int dmar_forcedac
;
385 static int intel_iommu_strict
;
386 static int intel_iommu_superpage
= 1;
387 static int iommu_identity_mapping
;
388 static int intel_no_bounce
;
390 #define IDENTMAP_GFX 2
391 #define IDENTMAP_AZALIA 4
393 int intel_iommu_gfx_mapped
;
394 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped
);
396 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
397 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
398 DEFINE_SPINLOCK(device_domain_lock
);
399 static LIST_HEAD(device_domain_list
);
401 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
402 to_pci_dev(d)->untrusted)
405 * Iterate over elements in device_domain_list and call the specified
406 * callback @fn against each element.
408 int for_each_device_domain(int (*fn
)(struct device_domain_info
*info
,
409 void *data
), void *data
)
413 struct device_domain_info
*info
;
415 spin_lock_irqsave(&device_domain_lock
, flags
);
416 list_for_each_entry(info
, &device_domain_list
, global
) {
417 ret
= fn(info
, data
);
419 spin_unlock_irqrestore(&device_domain_lock
, flags
);
423 spin_unlock_irqrestore(&device_domain_lock
, flags
);
428 const struct iommu_ops intel_iommu_ops
;
430 static bool translation_pre_enabled(struct intel_iommu
*iommu
)
432 return (iommu
->flags
& VTD_FLAG_TRANS_PRE_ENABLED
);
435 static void clear_translation_pre_enabled(struct intel_iommu
*iommu
)
437 iommu
->flags
&= ~VTD_FLAG_TRANS_PRE_ENABLED
;
440 static void init_translation_status(struct intel_iommu
*iommu
)
444 gsts
= readl(iommu
->reg
+ DMAR_GSTS_REG
);
445 if (gsts
& DMA_GSTS_TES
)
446 iommu
->flags
|= VTD_FLAG_TRANS_PRE_ENABLED
;
449 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
450 static struct dmar_domain
*to_dmar_domain(struct iommu_domain
*dom
)
452 return container_of(dom
, struct dmar_domain
, domain
);
455 static int __init
intel_iommu_setup(char *str
)
460 if (!strncmp(str
, "on", 2)) {
462 pr_info("IOMMU enabled\n");
463 } else if (!strncmp(str
, "off", 3)) {
465 no_platform_optin
= 1;
466 pr_info("IOMMU disabled\n");
467 } else if (!strncmp(str
, "igfx_off", 8)) {
469 pr_info("Disable GFX device mapping\n");
470 } else if (!strncmp(str
, "forcedac", 8)) {
471 pr_info("Forcing DAC for PCI devices\n");
473 } else if (!strncmp(str
, "strict", 6)) {
474 pr_info("Disable batched IOTLB flush\n");
475 intel_iommu_strict
= 1;
476 } else if (!strncmp(str
, "sp_off", 6)) {
477 pr_info("Disable supported super page\n");
478 intel_iommu_superpage
= 0;
479 } else if (!strncmp(str
, "sm_on", 5)) {
480 pr_info("Intel-IOMMU: scalable mode supported\n");
482 } else if (!strncmp(str
, "tboot_noforce", 13)) {
484 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
485 intel_iommu_tboot_noforce
= 1;
486 } else if (!strncmp(str
, "nobounce", 8)) {
487 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
491 str
+= strcspn(str
, ",");
497 __setup("intel_iommu=", intel_iommu_setup
);
499 static struct kmem_cache
*iommu_domain_cache
;
500 static struct kmem_cache
*iommu_devinfo_cache
;
502 static struct dmar_domain
* get_iommu_domain(struct intel_iommu
*iommu
, u16 did
)
504 struct dmar_domain
**domains
;
507 domains
= iommu
->domains
[idx
];
511 return domains
[did
& 0xff];
514 static void set_iommu_domain(struct intel_iommu
*iommu
, u16 did
,
515 struct dmar_domain
*domain
)
517 struct dmar_domain
**domains
;
520 if (!iommu
->domains
[idx
]) {
521 size_t size
= 256 * sizeof(struct dmar_domain
*);
522 iommu
->domains
[idx
] = kzalloc(size
, GFP_ATOMIC
);
525 domains
= iommu
->domains
[idx
];
526 if (WARN_ON(!domains
))
529 domains
[did
& 0xff] = domain
;
532 void *alloc_pgtable_page(int node
)
537 page
= alloc_pages_node(node
, GFP_ATOMIC
| __GFP_ZERO
, 0);
539 vaddr
= page_address(page
);
543 void free_pgtable_page(void *vaddr
)
545 free_page((unsigned long)vaddr
);
548 static inline void *alloc_domain_mem(void)
550 return kmem_cache_alloc(iommu_domain_cache
, GFP_ATOMIC
);
553 static void free_domain_mem(void *vaddr
)
555 kmem_cache_free(iommu_domain_cache
, vaddr
);
558 static inline void * alloc_devinfo_mem(void)
560 return kmem_cache_alloc(iommu_devinfo_cache
, GFP_ATOMIC
);
563 static inline void free_devinfo_mem(void *vaddr
)
565 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
568 static inline int domain_type_is_si(struct dmar_domain
*domain
)
570 return domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
;
573 static inline bool domain_use_first_level(struct dmar_domain
*domain
)
575 return domain
->flags
& DOMAIN_FLAG_USE_FIRST_LEVEL
;
578 static inline int domain_pfn_supported(struct dmar_domain
*domain
,
581 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
583 return !(addr_width
< BITS_PER_LONG
&& pfn
>> addr_width
);
586 static int __iommu_calculate_agaw(struct intel_iommu
*iommu
, int max_gaw
)
591 sagaw
= cap_sagaw(iommu
->cap
);
592 for (agaw
= width_to_agaw(max_gaw
);
594 if (test_bit(agaw
, &sagaw
))
602 * Calculate max SAGAW for each iommu.
604 int iommu_calculate_max_sagaw(struct intel_iommu
*iommu
)
606 return __iommu_calculate_agaw(iommu
, MAX_AGAW_WIDTH
);
610 * calculate agaw for each iommu.
611 * "SAGAW" may be different across iommus, use a default agaw, and
612 * get a supported less agaw for iommus that don't support the default agaw.
614 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
616 return __iommu_calculate_agaw(iommu
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
619 /* This functionin only returns single iommu in a domain */
620 struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
624 /* si_domain and vm domain should not get here. */
625 if (WARN_ON(domain
->domain
.type
!= IOMMU_DOMAIN_DMA
))
628 for_each_domain_iommu(iommu_id
, domain
)
631 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
634 return g_iommus
[iommu_id
];
637 static inline bool iommu_paging_structure_coherency(struct intel_iommu
*iommu
)
639 return sm_supported(iommu
) ?
640 ecap_smpwc(iommu
->ecap
) : ecap_coherent(iommu
->ecap
);
643 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
645 struct dmar_drhd_unit
*drhd
;
646 struct intel_iommu
*iommu
;
650 domain
->iommu_coherency
= 1;
652 for_each_domain_iommu(i
, domain
) {
654 if (!iommu_paging_structure_coherency(g_iommus
[i
])) {
655 domain
->iommu_coherency
= 0;
662 /* No hardware attached; use lowest common denominator */
664 for_each_active_iommu(iommu
, drhd
) {
665 if (!iommu_paging_structure_coherency(iommu
)) {
666 domain
->iommu_coherency
= 0;
673 static int domain_update_iommu_snooping(struct intel_iommu
*skip
)
675 struct dmar_drhd_unit
*drhd
;
676 struct intel_iommu
*iommu
;
680 for_each_active_iommu(iommu
, drhd
) {
682 if (!ecap_sc_support(iommu
->ecap
)) {
693 static int domain_update_iommu_superpage(struct dmar_domain
*domain
,
694 struct intel_iommu
*skip
)
696 struct dmar_drhd_unit
*drhd
;
697 struct intel_iommu
*iommu
;
700 if (!intel_iommu_superpage
) {
704 /* set iommu_superpage to the smallest common denominator */
706 for_each_active_iommu(iommu
, drhd
) {
708 if (domain
&& domain_use_first_level(domain
)) {
709 if (!cap_fl1gp_support(iommu
->cap
))
712 mask
&= cap_super_page_val(iommu
->cap
);
724 /* Some capabilities may be different across iommus */
725 static void domain_update_iommu_cap(struct dmar_domain
*domain
)
727 domain_update_iommu_coherency(domain
);
728 domain
->iommu_snooping
= domain_update_iommu_snooping(NULL
);
729 domain
->iommu_superpage
= domain_update_iommu_superpage(domain
, NULL
);
732 struct context_entry
*iommu_context_addr(struct intel_iommu
*iommu
, u8 bus
,
735 struct root_entry
*root
= &iommu
->root_entry
[bus
];
736 struct context_entry
*context
;
740 if (sm_supported(iommu
)) {
748 context
= phys_to_virt(*entry
& VTD_PAGE_MASK
);
750 unsigned long phy_addr
;
754 context
= alloc_pgtable_page(iommu
->node
);
758 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
759 phy_addr
= virt_to_phys((void *)context
);
760 *entry
= phy_addr
| 1;
761 __iommu_flush_cache(iommu
, entry
, sizeof(*entry
));
763 return &context
[devfn
];
766 static int iommu_dummy(struct device
*dev
)
768 return dev
->archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
;
771 static bool attach_deferred(struct device
*dev
)
773 return dev
->archdata
.iommu
== DEFER_DEVICE_DOMAIN_INFO
;
777 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
778 * sub-hierarchy of a candidate PCI-PCI bridge
779 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
780 * @bridge: the candidate PCI-PCI bridge
782 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
785 is_downstream_to_pci_bridge(struct device
*dev
, struct device
*bridge
)
787 struct pci_dev
*pdev
, *pbridge
;
789 if (!dev_is_pci(dev
) || !dev_is_pci(bridge
))
792 pdev
= to_pci_dev(dev
);
793 pbridge
= to_pci_dev(bridge
);
795 if (pbridge
->subordinate
&&
796 pbridge
->subordinate
->number
<= pdev
->bus
->number
&&
797 pbridge
->subordinate
->busn_res
.end
>= pdev
->bus
->number
)
803 static struct intel_iommu
*device_to_iommu(struct device
*dev
, u8
*bus
, u8
*devfn
)
805 struct dmar_drhd_unit
*drhd
= NULL
;
806 struct intel_iommu
*iommu
;
808 struct pci_dev
*pdev
= NULL
;
812 if (iommu_dummy(dev
))
815 if (dev_is_pci(dev
)) {
816 struct pci_dev
*pf_pdev
;
818 pdev
= pci_real_dma_dev(to_pci_dev(dev
));
820 /* VFs aren't listed in scope tables; we need to look up
821 * the PF instead to find the IOMMU. */
822 pf_pdev
= pci_physfn(pdev
);
824 segment
= pci_domain_nr(pdev
->bus
);
825 } else if (has_acpi_companion(dev
))
826 dev
= &ACPI_COMPANION(dev
)->dev
;
829 for_each_active_iommu(iommu
, drhd
) {
830 if (pdev
&& segment
!= drhd
->segment
)
833 for_each_active_dev_scope(drhd
->devices
,
834 drhd
->devices_cnt
, i
, tmp
) {
836 /* For a VF use its original BDF# not that of the PF
837 * which we used for the IOMMU lookup. Strictly speaking
838 * we could do this for all PCI devices; we only need to
839 * get the BDF# from the scope table for ACPI matches. */
840 if (pdev
&& pdev
->is_virtfn
)
843 *bus
= drhd
->devices
[i
].bus
;
844 *devfn
= drhd
->devices
[i
].devfn
;
848 if (is_downstream_to_pci_bridge(dev
, tmp
))
852 if (pdev
&& drhd
->include_all
) {
854 *bus
= pdev
->bus
->number
;
855 *devfn
= pdev
->devfn
;
866 static void domain_flush_cache(struct dmar_domain
*domain
,
867 void *addr
, int size
)
869 if (!domain
->iommu_coherency
)
870 clflush_cache_range(addr
, size
);
873 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
875 struct context_entry
*context
;
879 spin_lock_irqsave(&iommu
->lock
, flags
);
880 context
= iommu_context_addr(iommu
, bus
, devfn
, 0);
882 ret
= context_present(context
);
883 spin_unlock_irqrestore(&iommu
->lock
, flags
);
887 static void free_context_table(struct intel_iommu
*iommu
)
891 struct context_entry
*context
;
893 spin_lock_irqsave(&iommu
->lock
, flags
);
894 if (!iommu
->root_entry
) {
897 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
898 context
= iommu_context_addr(iommu
, i
, 0, 0);
900 free_pgtable_page(context
);
902 if (!sm_supported(iommu
))
905 context
= iommu_context_addr(iommu
, i
, 0x80, 0);
907 free_pgtable_page(context
);
910 free_pgtable_page(iommu
->root_entry
);
911 iommu
->root_entry
= NULL
;
913 spin_unlock_irqrestore(&iommu
->lock
, flags
);
916 static struct dma_pte
*pfn_to_dma_pte(struct dmar_domain
*domain
,
917 unsigned long pfn
, int *target_level
)
919 struct dma_pte
*parent
, *pte
;
920 int level
= agaw_to_level(domain
->agaw
);
923 BUG_ON(!domain
->pgd
);
925 if (!domain_pfn_supported(domain
, pfn
))
926 /* Address beyond IOMMU's addressing capabilities. */
929 parent
= domain
->pgd
;
934 offset
= pfn_level_offset(pfn
, level
);
935 pte
= &parent
[offset
];
936 if (!*target_level
&& (dma_pte_superpage(pte
) || !dma_pte_present(pte
)))
938 if (level
== *target_level
)
941 if (!dma_pte_present(pte
)) {
944 tmp_page
= alloc_pgtable_page(domain
->nid
);
949 domain_flush_cache(domain
, tmp_page
, VTD_PAGE_SIZE
);
950 pteval
= ((uint64_t)virt_to_dma_pfn(tmp_page
) << VTD_PAGE_SHIFT
) | DMA_PTE_READ
| DMA_PTE_WRITE
;
951 if (domain_use_first_level(domain
))
952 pteval
|= DMA_FL_PTE_XD
| DMA_FL_PTE_US
;
953 if (cmpxchg64(&pte
->val
, 0ULL, pteval
))
954 /* Someone else set it while we were thinking; use theirs. */
955 free_pgtable_page(tmp_page
);
957 domain_flush_cache(domain
, pte
, sizeof(*pte
));
962 parent
= phys_to_virt(dma_pte_addr(pte
));
967 *target_level
= level
;
972 /* return address's pte at specific level */
973 static struct dma_pte
*dma_pfn_level_pte(struct dmar_domain
*domain
,
975 int level
, int *large_page
)
977 struct dma_pte
*parent
, *pte
;
978 int total
= agaw_to_level(domain
->agaw
);
981 parent
= domain
->pgd
;
982 while (level
<= total
) {
983 offset
= pfn_level_offset(pfn
, total
);
984 pte
= &parent
[offset
];
988 if (!dma_pte_present(pte
)) {
993 if (dma_pte_superpage(pte
)) {
998 parent
= phys_to_virt(dma_pte_addr(pte
));
1004 /* clear last level pte, a tlb flush should be followed */
1005 static void dma_pte_clear_range(struct dmar_domain
*domain
,
1006 unsigned long start_pfn
,
1007 unsigned long last_pfn
)
1009 unsigned int large_page
;
1010 struct dma_pte
*first_pte
, *pte
;
1012 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
1013 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
1014 BUG_ON(start_pfn
> last_pfn
);
1016 /* we don't need lock here; nobody else touches the iova range */
1019 first_pte
= pte
= dma_pfn_level_pte(domain
, start_pfn
, 1, &large_page
);
1021 start_pfn
= align_to_level(start_pfn
+ 1, large_page
+ 1);
1026 start_pfn
+= lvl_to_nr_pages(large_page
);
1028 } while (start_pfn
<= last_pfn
&& !first_pte_in_page(pte
));
1030 domain_flush_cache(domain
, first_pte
,
1031 (void *)pte
- (void *)first_pte
);
1033 } while (start_pfn
&& start_pfn
<= last_pfn
);
1036 static void dma_pte_free_level(struct dmar_domain
*domain
, int level
,
1037 int retain_level
, struct dma_pte
*pte
,
1038 unsigned long pfn
, unsigned long start_pfn
,
1039 unsigned long last_pfn
)
1041 pfn
= max(start_pfn
, pfn
);
1042 pte
= &pte
[pfn_level_offset(pfn
, level
)];
1045 unsigned long level_pfn
;
1046 struct dma_pte
*level_pte
;
1048 if (!dma_pte_present(pte
) || dma_pte_superpage(pte
))
1051 level_pfn
= pfn
& level_mask(level
);
1052 level_pte
= phys_to_virt(dma_pte_addr(pte
));
1055 dma_pte_free_level(domain
, level
- 1, retain_level
,
1056 level_pte
, level_pfn
, start_pfn
,
1061 * Free the page table if we're below the level we want to
1062 * retain and the range covers the entire table.
1064 if (level
< retain_level
&& !(start_pfn
> level_pfn
||
1065 last_pfn
< level_pfn
+ level_size(level
) - 1)) {
1067 domain_flush_cache(domain
, pte
, sizeof(*pte
));
1068 free_pgtable_page(level_pte
);
1071 pfn
+= level_size(level
);
1072 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
1076 * clear last level (leaf) ptes and free page table pages below the
1077 * level we wish to keep intact.
1079 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
1080 unsigned long start_pfn
,
1081 unsigned long last_pfn
,
1084 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
1085 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
1086 BUG_ON(start_pfn
> last_pfn
);
1088 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
1090 /* We don't need lock here; nobody else touches the iova range */
1091 dma_pte_free_level(domain
, agaw_to_level(domain
->agaw
), retain_level
,
1092 domain
->pgd
, 0, start_pfn
, last_pfn
);
1095 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1096 free_pgtable_page(domain
->pgd
);
1101 /* When a page at a given level is being unlinked from its parent, we don't
1102 need to *modify* it at all. All we need to do is make a list of all the
1103 pages which can be freed just as soon as we've flushed the IOTLB and we
1104 know the hardware page-walk will no longer touch them.
1105 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1107 static struct page
*dma_pte_list_pagetables(struct dmar_domain
*domain
,
1108 int level
, struct dma_pte
*pte
,
1109 struct page
*freelist
)
1113 pg
= pfn_to_page(dma_pte_addr(pte
) >> PAGE_SHIFT
);
1114 pg
->freelist
= freelist
;
1120 pte
= page_address(pg
);
1122 if (dma_pte_present(pte
) && !dma_pte_superpage(pte
))
1123 freelist
= dma_pte_list_pagetables(domain
, level
- 1,
1126 } while (!first_pte_in_page(pte
));
1131 static struct page
*dma_pte_clear_level(struct dmar_domain
*domain
, int level
,
1132 struct dma_pte
*pte
, unsigned long pfn
,
1133 unsigned long start_pfn
,
1134 unsigned long last_pfn
,
1135 struct page
*freelist
)
1137 struct dma_pte
*first_pte
= NULL
, *last_pte
= NULL
;
1139 pfn
= max(start_pfn
, pfn
);
1140 pte
= &pte
[pfn_level_offset(pfn
, level
)];
1143 unsigned long level_pfn
;
1145 if (!dma_pte_present(pte
))
1148 level_pfn
= pfn
& level_mask(level
);
1150 /* If range covers entire pagetable, free it */
1151 if (start_pfn
<= level_pfn
&&
1152 last_pfn
>= level_pfn
+ level_size(level
) - 1) {
1153 /* These suborbinate page tables are going away entirely. Don't
1154 bother to clear them; we're just going to *free* them. */
1155 if (level
> 1 && !dma_pte_superpage(pte
))
1156 freelist
= dma_pte_list_pagetables(domain
, level
- 1, pte
, freelist
);
1162 } else if (level
> 1) {
1163 /* Recurse down into a level that isn't *entirely* obsolete */
1164 freelist
= dma_pte_clear_level(domain
, level
- 1,
1165 phys_to_virt(dma_pte_addr(pte
)),
1166 level_pfn
, start_pfn
, last_pfn
,
1170 pfn
+= level_size(level
);
1171 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
1174 domain_flush_cache(domain
, first_pte
,
1175 (void *)++last_pte
- (void *)first_pte
);
1180 /* We can't just free the pages because the IOMMU may still be walking
1181 the page tables, and may have cached the intermediate levels. The
1182 pages can only be freed after the IOTLB flush has been done. */
1183 static struct page
*domain_unmap(struct dmar_domain
*domain
,
1184 unsigned long start_pfn
,
1185 unsigned long last_pfn
)
1187 struct page
*freelist
;
1189 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
1190 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
1191 BUG_ON(start_pfn
> last_pfn
);
1193 /* we don't need lock here; nobody else touches the iova range */
1194 freelist
= dma_pte_clear_level(domain
, agaw_to_level(domain
->agaw
),
1195 domain
->pgd
, 0, start_pfn
, last_pfn
, NULL
);
1198 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1199 struct page
*pgd_page
= virt_to_page(domain
->pgd
);
1200 pgd_page
->freelist
= freelist
;
1201 freelist
= pgd_page
;
1209 static void dma_free_pagelist(struct page
*freelist
)
1213 while ((pg
= freelist
)) {
1214 freelist
= pg
->freelist
;
1215 free_pgtable_page(page_address(pg
));
1219 static void iova_entry_free(unsigned long data
)
1221 struct page
*freelist
= (struct page
*)data
;
1223 dma_free_pagelist(freelist
);
1226 /* iommu handling */
1227 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
1229 struct root_entry
*root
;
1230 unsigned long flags
;
1232 root
= (struct root_entry
*)alloc_pgtable_page(iommu
->node
);
1234 pr_err("Allocating root entry for %s failed\n",
1239 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
1241 spin_lock_irqsave(&iommu
->lock
, flags
);
1242 iommu
->root_entry
= root
;
1243 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1248 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
1254 addr
= virt_to_phys(iommu
->root_entry
);
1255 if (sm_supported(iommu
))
1256 addr
|= DMA_RTADDR_SMT
;
1258 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1259 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, addr
);
1261 writel(iommu
->gcmd
| DMA_GCMD_SRTP
, iommu
->reg
+ DMAR_GCMD_REG
);
1263 /* Make sure hardware complete it */
1264 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1265 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
1267 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1270 void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
1275 if (!rwbf_quirk
&& !cap_rwbf(iommu
->cap
))
1278 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1279 writel(iommu
->gcmd
| DMA_GCMD_WBF
, iommu
->reg
+ DMAR_GCMD_REG
);
1281 /* Make sure hardware complete it */
1282 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1283 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
1285 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1288 /* return value determine if we need a write buffer flush */
1289 static void __iommu_flush_context(struct intel_iommu
*iommu
,
1290 u16 did
, u16 source_id
, u8 function_mask
,
1297 case DMA_CCMD_GLOBAL_INVL
:
1298 val
= DMA_CCMD_GLOBAL_INVL
;
1300 case DMA_CCMD_DOMAIN_INVL
:
1301 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
1303 case DMA_CCMD_DEVICE_INVL
:
1304 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
1305 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
1310 val
|= DMA_CCMD_ICC
;
1312 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1313 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
1315 /* Make sure hardware complete it */
1316 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
1317 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
1319 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1322 /* return value determine if we need a write buffer flush */
1323 static void __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
1324 u64 addr
, unsigned int size_order
, u64 type
)
1326 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
1327 u64 val
= 0, val_iva
= 0;
1331 case DMA_TLB_GLOBAL_FLUSH
:
1332 /* global flush doesn't need set IVA_REG */
1333 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
1335 case DMA_TLB_DSI_FLUSH
:
1336 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1338 case DMA_TLB_PSI_FLUSH
:
1339 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1340 /* IH bit is passed in as part of address */
1341 val_iva
= size_order
| addr
;
1346 /* Note: set drain read/write */
1349 * This is probably to be super secure.. Looks like we can
1350 * ignore it without any impact.
1352 if (cap_read_drain(iommu
->cap
))
1353 val
|= DMA_TLB_READ_DRAIN
;
1355 if (cap_write_drain(iommu
->cap
))
1356 val
|= DMA_TLB_WRITE_DRAIN
;
1358 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1359 /* Note: Only uses first TLB reg currently */
1361 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
1362 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
1364 /* Make sure hardware complete it */
1365 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
1366 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
1368 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1370 /* check IOTLB invalidation granularity */
1371 if (DMA_TLB_IAIG(val
) == 0)
1372 pr_err("Flush IOTLB failed\n");
1373 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
1374 pr_debug("TLB flush request %Lx, actual %Lx\n",
1375 (unsigned long long)DMA_TLB_IIRG(type
),
1376 (unsigned long long)DMA_TLB_IAIG(val
));
1379 static struct device_domain_info
*
1380 iommu_support_dev_iotlb (struct dmar_domain
*domain
, struct intel_iommu
*iommu
,
1383 struct device_domain_info
*info
;
1385 assert_spin_locked(&device_domain_lock
);
1390 list_for_each_entry(info
, &domain
->devices
, link
)
1391 if (info
->iommu
== iommu
&& info
->bus
== bus
&&
1392 info
->devfn
== devfn
) {
1393 if (info
->ats_supported
&& info
->dev
)
1401 static void domain_update_iotlb(struct dmar_domain
*domain
)
1403 struct device_domain_info
*info
;
1404 bool has_iotlb_device
= false;
1406 assert_spin_locked(&device_domain_lock
);
1408 list_for_each_entry(info
, &domain
->devices
, link
) {
1409 struct pci_dev
*pdev
;
1411 if (!info
->dev
|| !dev_is_pci(info
->dev
))
1414 pdev
= to_pci_dev(info
->dev
);
1415 if (pdev
->ats_enabled
) {
1416 has_iotlb_device
= true;
1421 domain
->has_iotlb_device
= has_iotlb_device
;
1424 static void iommu_enable_dev_iotlb(struct device_domain_info
*info
)
1426 struct pci_dev
*pdev
;
1428 assert_spin_locked(&device_domain_lock
);
1430 if (!info
|| !dev_is_pci(info
->dev
))
1433 pdev
= to_pci_dev(info
->dev
);
1434 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1435 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1436 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1437 * reserved, which should be set to 0.
1439 if (!ecap_dit(info
->iommu
->ecap
))
1442 struct pci_dev
*pf_pdev
;
1444 /* pdev will be returned if device is not a vf */
1445 pf_pdev
= pci_physfn(pdev
);
1446 info
->pfsid
= pci_dev_id(pf_pdev
);
1449 #ifdef CONFIG_INTEL_IOMMU_SVM
1450 /* The PCIe spec, in its wisdom, declares that the behaviour of
1451 the device if you enable PASID support after ATS support is
1452 undefined. So always enable PASID support on devices which
1453 have it, even if we can't yet know if we're ever going to
1455 if (info
->pasid_supported
&& !pci_enable_pasid(pdev
, info
->pasid_supported
& ~1))
1456 info
->pasid_enabled
= 1;
1458 if (info
->pri_supported
&&
1459 (info
->pasid_enabled
? pci_prg_resp_pasid_required(pdev
) : 1) &&
1460 !pci_reset_pri(pdev
) && !pci_enable_pri(pdev
, 32))
1461 info
->pri_enabled
= 1;
1463 if (!pdev
->untrusted
&& info
->ats_supported
&&
1464 pci_ats_page_aligned(pdev
) &&
1465 !pci_enable_ats(pdev
, VTD_PAGE_SHIFT
)) {
1466 info
->ats_enabled
= 1;
1467 domain_update_iotlb(info
->domain
);
1468 info
->ats_qdep
= pci_ats_queue_depth(pdev
);
1472 static void iommu_disable_dev_iotlb(struct device_domain_info
*info
)
1474 struct pci_dev
*pdev
;
1476 assert_spin_locked(&device_domain_lock
);
1478 if (!dev_is_pci(info
->dev
))
1481 pdev
= to_pci_dev(info
->dev
);
1483 if (info
->ats_enabled
) {
1484 pci_disable_ats(pdev
);
1485 info
->ats_enabled
= 0;
1486 domain_update_iotlb(info
->domain
);
1488 #ifdef CONFIG_INTEL_IOMMU_SVM
1489 if (info
->pri_enabled
) {
1490 pci_disable_pri(pdev
);
1491 info
->pri_enabled
= 0;
1493 if (info
->pasid_enabled
) {
1494 pci_disable_pasid(pdev
);
1495 info
->pasid_enabled
= 0;
1500 static void iommu_flush_dev_iotlb(struct dmar_domain
*domain
,
1501 u64 addr
, unsigned mask
)
1504 unsigned long flags
;
1505 struct device_domain_info
*info
;
1507 if (!domain
->has_iotlb_device
)
1510 spin_lock_irqsave(&device_domain_lock
, flags
);
1511 list_for_each_entry(info
, &domain
->devices
, link
) {
1512 if (!info
->ats_enabled
)
1515 sid
= info
->bus
<< 8 | info
->devfn
;
1516 qdep
= info
->ats_qdep
;
1517 qi_flush_dev_iotlb(info
->iommu
, sid
, info
->pfsid
,
1520 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1523 static void domain_flush_piotlb(struct intel_iommu
*iommu
,
1524 struct dmar_domain
*domain
,
1525 u64 addr
, unsigned long npages
, bool ih
)
1527 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1529 if (domain
->default_pasid
)
1530 qi_flush_piotlb(iommu
, did
, domain
->default_pasid
,
1533 if (!list_empty(&domain
->devices
))
1534 qi_flush_piotlb(iommu
, did
, PASID_RID2PASID
, addr
, npages
, ih
);
1537 static void iommu_flush_iotlb_psi(struct intel_iommu
*iommu
,
1538 struct dmar_domain
*domain
,
1539 unsigned long pfn
, unsigned int pages
,
1542 unsigned int mask
= ilog2(__roundup_pow_of_two(pages
));
1543 uint64_t addr
= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
1544 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1551 if (domain_use_first_level(domain
)) {
1552 domain_flush_piotlb(iommu
, domain
, addr
, pages
, ih
);
1555 * Fallback to domain selective flush if no PSI support or
1556 * the size is too big. PSI requires page size to be 2 ^ x,
1557 * and the base address is naturally aligned to the size.
1559 if (!cap_pgsel_inv(iommu
->cap
) ||
1560 mask
> cap_max_amask_val(iommu
->cap
))
1561 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1564 iommu
->flush
.flush_iotlb(iommu
, did
, addr
| ih
, mask
,
1569 * In caching mode, changes of pages from non-present to present require
1570 * flush. However, device IOTLB doesn't need to be flushed in this case.
1572 if (!cap_caching_mode(iommu
->cap
) || !map
)
1573 iommu_flush_dev_iotlb(domain
, addr
, mask
);
1576 /* Notification for newly created mappings */
1577 static inline void __mapping_notify_one(struct intel_iommu
*iommu
,
1578 struct dmar_domain
*domain
,
1579 unsigned long pfn
, unsigned int pages
)
1582 * It's a non-present to present mapping. Only flush if caching mode
1585 if (cap_caching_mode(iommu
->cap
) && !domain_use_first_level(domain
))
1586 iommu_flush_iotlb_psi(iommu
, domain
, pfn
, pages
, 0, 1);
1588 iommu_flush_write_buffer(iommu
);
1591 static void iommu_flush_iova(struct iova_domain
*iovad
)
1593 struct dmar_domain
*domain
;
1596 domain
= container_of(iovad
, struct dmar_domain
, iovad
);
1598 for_each_domain_iommu(idx
, domain
) {
1599 struct intel_iommu
*iommu
= g_iommus
[idx
];
1600 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1602 if (domain_use_first_level(domain
))
1603 domain_flush_piotlb(iommu
, domain
, 0, -1, 0);
1605 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1608 if (!cap_caching_mode(iommu
->cap
))
1609 iommu_flush_dev_iotlb(get_iommu_domain(iommu
, did
),
1610 0, MAX_AGAW_PFN_WIDTH
);
1614 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
1617 unsigned long flags
;
1619 if (!cap_plmr(iommu
->cap
) && !cap_phmr(iommu
->cap
))
1622 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1623 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
1624 pmen
&= ~DMA_PMEN_EPM
;
1625 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
1627 /* wait for the protected region status bit to clear */
1628 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
1629 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
1631 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1634 static void iommu_enable_translation(struct intel_iommu
*iommu
)
1637 unsigned long flags
;
1639 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1640 iommu
->gcmd
|= DMA_GCMD_TE
;
1641 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1643 /* Make sure hardware complete it */
1644 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1645 readl
, (sts
& DMA_GSTS_TES
), sts
);
1647 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1650 static void iommu_disable_translation(struct intel_iommu
*iommu
)
1655 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1656 iommu
->gcmd
&= ~DMA_GCMD_TE
;
1657 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1659 /* Make sure hardware complete it */
1660 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1661 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
1663 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1666 static int iommu_init_domains(struct intel_iommu
*iommu
)
1668 u32 ndomains
, nlongs
;
1671 ndomains
= cap_ndoms(iommu
->cap
);
1672 pr_debug("%s: Number of Domains supported <%d>\n",
1673 iommu
->name
, ndomains
);
1674 nlongs
= BITS_TO_LONGS(ndomains
);
1676 spin_lock_init(&iommu
->lock
);
1678 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1679 if (!iommu
->domain_ids
) {
1680 pr_err("%s: Allocating domain id array failed\n",
1685 size
= (ALIGN(ndomains
, 256) >> 8) * sizeof(struct dmar_domain
**);
1686 iommu
->domains
= kzalloc(size
, GFP_KERNEL
);
1688 if (iommu
->domains
) {
1689 size
= 256 * sizeof(struct dmar_domain
*);
1690 iommu
->domains
[0] = kzalloc(size
, GFP_KERNEL
);
1693 if (!iommu
->domains
|| !iommu
->domains
[0]) {
1694 pr_err("%s: Allocating domain array failed\n",
1696 kfree(iommu
->domain_ids
);
1697 kfree(iommu
->domains
);
1698 iommu
->domain_ids
= NULL
;
1699 iommu
->domains
= NULL
;
1704 * If Caching mode is set, then invalid translations are tagged
1705 * with domain-id 0, hence we need to pre-allocate it. We also
1706 * use domain-id 0 as a marker for non-allocated domain-id, so
1707 * make sure it is not used for a real domain.
1709 set_bit(0, iommu
->domain_ids
);
1712 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1713 * entry for first-level or pass-through translation modes should
1714 * be programmed with a domain id different from those used for
1715 * second-level or nested translation. We reserve a domain id for
1718 if (sm_supported(iommu
))
1719 set_bit(FLPT_DEFAULT_DID
, iommu
->domain_ids
);
1724 static void disable_dmar_iommu(struct intel_iommu
*iommu
)
1726 struct device_domain_info
*info
, *tmp
;
1727 unsigned long flags
;
1729 if (!iommu
->domains
|| !iommu
->domain_ids
)
1732 spin_lock_irqsave(&device_domain_lock
, flags
);
1733 list_for_each_entry_safe(info
, tmp
, &device_domain_list
, global
) {
1734 if (info
->iommu
!= iommu
)
1737 if (!info
->dev
|| !info
->domain
)
1740 __dmar_remove_one_dev_info(info
);
1742 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1744 if (iommu
->gcmd
& DMA_GCMD_TE
)
1745 iommu_disable_translation(iommu
);
1748 static void free_dmar_iommu(struct intel_iommu
*iommu
)
1750 if ((iommu
->domains
) && (iommu
->domain_ids
)) {
1751 int elems
= ALIGN(cap_ndoms(iommu
->cap
), 256) >> 8;
1754 for (i
= 0; i
< elems
; i
++)
1755 kfree(iommu
->domains
[i
]);
1756 kfree(iommu
->domains
);
1757 kfree(iommu
->domain_ids
);
1758 iommu
->domains
= NULL
;
1759 iommu
->domain_ids
= NULL
;
1762 g_iommus
[iommu
->seq_id
] = NULL
;
1764 /* free context mapping */
1765 free_context_table(iommu
);
1767 #ifdef CONFIG_INTEL_IOMMU_SVM
1768 if (pasid_supported(iommu
)) {
1769 if (ecap_prs(iommu
->ecap
))
1770 intel_svm_finish_prq(iommu
);
1776 * Check and return whether first level is used by default for
1779 static bool first_level_by_default(void)
1781 struct dmar_drhd_unit
*drhd
;
1782 struct intel_iommu
*iommu
;
1783 static int first_level_support
= -1;
1785 if (likely(first_level_support
!= -1))
1786 return first_level_support
;
1788 first_level_support
= 1;
1791 for_each_active_iommu(iommu
, drhd
) {
1792 if (!sm_supported(iommu
) || !ecap_flts(iommu
->ecap
)) {
1793 first_level_support
= 0;
1799 return first_level_support
;
1802 static struct dmar_domain
*alloc_domain(int flags
)
1804 struct dmar_domain
*domain
;
1806 domain
= alloc_domain_mem();
1810 memset(domain
, 0, sizeof(*domain
));
1811 domain
->nid
= NUMA_NO_NODE
;
1812 domain
->flags
= flags
;
1813 if (first_level_by_default())
1814 domain
->flags
|= DOMAIN_FLAG_USE_FIRST_LEVEL
;
1815 domain
->has_iotlb_device
= false;
1816 INIT_LIST_HEAD(&domain
->devices
);
1821 /* Must be called with iommu->lock */
1822 static int domain_attach_iommu(struct dmar_domain
*domain
,
1823 struct intel_iommu
*iommu
)
1825 unsigned long ndomains
;
1828 assert_spin_locked(&device_domain_lock
);
1829 assert_spin_locked(&iommu
->lock
);
1831 domain
->iommu_refcnt
[iommu
->seq_id
] += 1;
1832 domain
->iommu_count
+= 1;
1833 if (domain
->iommu_refcnt
[iommu
->seq_id
] == 1) {
1834 ndomains
= cap_ndoms(iommu
->cap
);
1835 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1837 if (num
>= ndomains
) {
1838 pr_err("%s: No free domain ids\n", iommu
->name
);
1839 domain
->iommu_refcnt
[iommu
->seq_id
] -= 1;
1840 domain
->iommu_count
-= 1;
1844 set_bit(num
, iommu
->domain_ids
);
1845 set_iommu_domain(iommu
, num
, domain
);
1847 domain
->iommu_did
[iommu
->seq_id
] = num
;
1848 domain
->nid
= iommu
->node
;
1850 domain_update_iommu_cap(domain
);
1856 static int domain_detach_iommu(struct dmar_domain
*domain
,
1857 struct intel_iommu
*iommu
)
1861 assert_spin_locked(&device_domain_lock
);
1862 assert_spin_locked(&iommu
->lock
);
1864 domain
->iommu_refcnt
[iommu
->seq_id
] -= 1;
1865 count
= --domain
->iommu_count
;
1866 if (domain
->iommu_refcnt
[iommu
->seq_id
] == 0) {
1867 num
= domain
->iommu_did
[iommu
->seq_id
];
1868 clear_bit(num
, iommu
->domain_ids
);
1869 set_iommu_domain(iommu
, num
, NULL
);
1871 domain_update_iommu_cap(domain
);
1872 domain
->iommu_did
[iommu
->seq_id
] = 0;
1878 static struct iova_domain reserved_iova_list
;
1879 static struct lock_class_key reserved_rbtree_key
;
1881 static int dmar_init_reserved_ranges(void)
1883 struct pci_dev
*pdev
= NULL
;
1887 init_iova_domain(&reserved_iova_list
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
1889 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1890 &reserved_rbtree_key
);
1892 /* IOAPIC ranges shouldn't be accessed by DMA */
1893 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1894 IOVA_PFN(IOAPIC_RANGE_END
));
1896 pr_err("Reserve IOAPIC range failed\n");
1900 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1901 for_each_pci_dev(pdev
) {
1904 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1905 r
= &pdev
->resource
[i
];
1906 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1908 iova
= reserve_iova(&reserved_iova_list
,
1912 pci_err(pdev
, "Reserve iova for %pR failed\n", r
);
1920 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1922 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1925 static inline int guestwidth_to_adjustwidth(int gaw
)
1928 int r
= (gaw
- 12) % 9;
1939 static int domain_init(struct dmar_domain
*domain
, struct intel_iommu
*iommu
,
1942 int adjust_width
, agaw
;
1943 unsigned long sagaw
;
1946 init_iova_domain(&domain
->iovad
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
1948 if (!intel_iommu_strict
) {
1949 ret
= init_iova_flush_queue(&domain
->iovad
,
1950 iommu_flush_iova
, iova_entry_free
);
1952 pr_info("iova flush queue initialization failed\n");
1955 domain_reserve_special_ranges(domain
);
1957 /* calculate AGAW */
1958 if (guest_width
> cap_mgaw(iommu
->cap
))
1959 guest_width
= cap_mgaw(iommu
->cap
);
1960 domain
->gaw
= guest_width
;
1961 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1962 agaw
= width_to_agaw(adjust_width
);
1963 sagaw
= cap_sagaw(iommu
->cap
);
1964 if (!test_bit(agaw
, &sagaw
)) {
1965 /* hardware doesn't support it, choose a bigger one */
1966 pr_debug("Hardware doesn't support agaw %d\n", agaw
);
1967 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1971 domain
->agaw
= agaw
;
1973 if (ecap_coherent(iommu
->ecap
))
1974 domain
->iommu_coherency
= 1;
1976 domain
->iommu_coherency
= 0;
1978 if (ecap_sc_support(iommu
->ecap
))
1979 domain
->iommu_snooping
= 1;
1981 domain
->iommu_snooping
= 0;
1983 if (intel_iommu_superpage
)
1984 domain
->iommu_superpage
= fls(cap_super_page_val(iommu
->cap
));
1986 domain
->iommu_superpage
= 0;
1988 domain
->nid
= iommu
->node
;
1990 /* always allocate the top pgd */
1991 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
1994 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1998 static void domain_exit(struct dmar_domain
*domain
)
2001 /* Remove associated devices and clear attached or cached domains */
2002 domain_remove_dev_info(domain
);
2005 put_iova_domain(&domain
->iovad
);
2008 struct page
*freelist
;
2010 freelist
= domain_unmap(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
2011 dma_free_pagelist(freelist
);
2014 free_domain_mem(domain
);
2018 * Get the PASID directory size for scalable mode context entry.
2019 * Value of X in the PDTS field of a scalable mode context entry
2020 * indicates PASID directory with 2^(X + 7) entries.
2022 static inline unsigned long context_get_sm_pds(struct pasid_table
*table
)
2026 max_pde
= table
->max_pasid
>> PASID_PDE_SHIFT
;
2027 pds
= find_first_bit((unsigned long *)&max_pde
, MAX_NR_PASID_BITS
);
2035 * Set the RID_PASID field of a scalable mode context entry. The
2036 * IOMMU hardware will use the PASID value set in this field for
2037 * DMA translations of DMA requests without PASID.
2040 context_set_sm_rid2pasid(struct context_entry
*context
, unsigned long pasid
)
2042 context
->hi
|= pasid
& ((1 << 20) - 1);
2046 * Set the DTE(Device-TLB Enable) field of a scalable mode context
2049 static inline void context_set_sm_dte(struct context_entry
*context
)
2051 context
->lo
|= (1 << 2);
2055 * Set the PRE(Page Request Enable) field of a scalable mode context
2058 static inline void context_set_sm_pre(struct context_entry
*context
)
2060 context
->lo
|= (1 << 4);
2063 /* Convert value to context PASID directory size field coding. */
2064 #define context_pdts(pds) (((pds) & 0x7) << 9)
2066 static int domain_context_mapping_one(struct dmar_domain
*domain
,
2067 struct intel_iommu
*iommu
,
2068 struct pasid_table
*table
,
2071 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
2072 int translation
= CONTEXT_TT_MULTI_LEVEL
;
2073 struct device_domain_info
*info
= NULL
;
2074 struct context_entry
*context
;
2075 unsigned long flags
;
2080 if (hw_pass_through
&& domain_type_is_si(domain
))
2081 translation
= CONTEXT_TT_PASS_THROUGH
;
2083 pr_debug("Set context mapping for %02x:%02x.%d\n",
2084 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
2086 BUG_ON(!domain
->pgd
);
2088 spin_lock_irqsave(&device_domain_lock
, flags
);
2089 spin_lock(&iommu
->lock
);
2092 context
= iommu_context_addr(iommu
, bus
, devfn
, 1);
2097 if (context_present(context
))
2101 * For kdump cases, old valid entries may be cached due to the
2102 * in-flight DMA and copied pgtable, but there is no unmapping
2103 * behaviour for them, thus we need an explicit cache flush for
2104 * the newly-mapped device. For kdump, at this point, the device
2105 * is supposed to finish reset at its driver probe stage, so no
2106 * in-flight DMA will exist, and we don't need to worry anymore
2109 if (context_copied(context
)) {
2110 u16 did_old
= context_domain_id(context
);
2112 if (did_old
< cap_ndoms(iommu
->cap
)) {
2113 iommu
->flush
.flush_context(iommu
, did_old
,
2114 (((u16
)bus
) << 8) | devfn
,
2115 DMA_CCMD_MASK_NOBIT
,
2116 DMA_CCMD_DEVICE_INVL
);
2117 iommu
->flush
.flush_iotlb(iommu
, did_old
, 0, 0,
2122 context_clear_entry(context
);
2124 if (sm_supported(iommu
)) {
2129 /* Setup the PASID DIR pointer: */
2130 pds
= context_get_sm_pds(table
);
2131 context
->lo
= (u64
)virt_to_phys(table
->table
) |
2134 /* Setup the RID_PASID field: */
2135 context_set_sm_rid2pasid(context
, PASID_RID2PASID
);
2138 * Setup the Device-TLB enable bit and Page request
2141 info
= iommu_support_dev_iotlb(domain
, iommu
, bus
, devfn
);
2142 if (info
&& info
->ats_supported
)
2143 context_set_sm_dte(context
);
2144 if (info
&& info
->pri_supported
)
2145 context_set_sm_pre(context
);
2147 struct dma_pte
*pgd
= domain
->pgd
;
2150 context_set_domain_id(context
, did
);
2152 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
2154 * Skip top levels of page tables for iommu which has
2155 * less agaw than default. Unnecessary for PT mode.
2157 for (agaw
= domain
->agaw
; agaw
> iommu
->agaw
; agaw
--) {
2159 pgd
= phys_to_virt(dma_pte_addr(pgd
));
2160 if (!dma_pte_present(pgd
))
2164 info
= iommu_support_dev_iotlb(domain
, iommu
, bus
, devfn
);
2165 if (info
&& info
->ats_supported
)
2166 translation
= CONTEXT_TT_DEV_IOTLB
;
2168 translation
= CONTEXT_TT_MULTI_LEVEL
;
2170 context_set_address_root(context
, virt_to_phys(pgd
));
2171 context_set_address_width(context
, agaw
);
2174 * In pass through mode, AW must be programmed to
2175 * indicate the largest AGAW value supported by
2176 * hardware. And ASR is ignored by hardware.
2178 context_set_address_width(context
, iommu
->msagaw
);
2181 context_set_translation_type(context
, translation
);
2184 context_set_fault_enable(context
);
2185 context_set_present(context
);
2186 if (!ecap_coherent(iommu
->ecap
))
2187 clflush_cache_range(context
, sizeof(*context
));
2190 * It's a non-present to present mapping. If hardware doesn't cache
2191 * non-present entry we only need to flush the write-buffer. If the
2192 * _does_ cache non-present entries, then it does so in the special
2193 * domain #0, which we have to flush:
2195 if (cap_caching_mode(iommu
->cap
)) {
2196 iommu
->flush
.flush_context(iommu
, 0,
2197 (((u16
)bus
) << 8) | devfn
,
2198 DMA_CCMD_MASK_NOBIT
,
2199 DMA_CCMD_DEVICE_INVL
);
2200 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
2202 iommu_flush_write_buffer(iommu
);
2204 iommu_enable_dev_iotlb(info
);
2209 spin_unlock(&iommu
->lock
);
2210 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2215 struct domain_context_mapping_data
{
2216 struct dmar_domain
*domain
;
2217 struct intel_iommu
*iommu
;
2218 struct pasid_table
*table
;
2221 static int domain_context_mapping_cb(struct pci_dev
*pdev
,
2222 u16 alias
, void *opaque
)
2224 struct domain_context_mapping_data
*data
= opaque
;
2226 return domain_context_mapping_one(data
->domain
, data
->iommu
,
2227 data
->table
, PCI_BUS_NUM(alias
),
2232 domain_context_mapping(struct dmar_domain
*domain
, struct device
*dev
)
2234 struct domain_context_mapping_data data
;
2235 struct pasid_table
*table
;
2236 struct intel_iommu
*iommu
;
2239 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2243 table
= intel_pasid_get_table(dev
);
2245 if (!dev_is_pci(dev
))
2246 return domain_context_mapping_one(domain
, iommu
, table
,
2249 data
.domain
= domain
;
2253 return pci_for_each_dma_alias(to_pci_dev(dev
),
2254 &domain_context_mapping_cb
, &data
);
2257 static int domain_context_mapped_cb(struct pci_dev
*pdev
,
2258 u16 alias
, void *opaque
)
2260 struct intel_iommu
*iommu
= opaque
;
2262 return !device_context_mapped(iommu
, PCI_BUS_NUM(alias
), alias
& 0xff);
2265 static int domain_context_mapped(struct device
*dev
)
2267 struct intel_iommu
*iommu
;
2270 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2274 if (!dev_is_pci(dev
))
2275 return device_context_mapped(iommu
, bus
, devfn
);
2277 return !pci_for_each_dma_alias(to_pci_dev(dev
),
2278 domain_context_mapped_cb
, iommu
);
2281 /* Returns a number of VTD pages, but aligned to MM page size */
2282 static inline unsigned long aligned_nrpages(unsigned long host_addr
,
2285 host_addr
&= ~PAGE_MASK
;
2286 return PAGE_ALIGN(host_addr
+ size
) >> VTD_PAGE_SHIFT
;
2289 /* Return largest possible superpage level for a given mapping */
2290 static inline int hardware_largepage_caps(struct dmar_domain
*domain
,
2291 unsigned long iov_pfn
,
2292 unsigned long phy_pfn
,
2293 unsigned long pages
)
2295 int support
, level
= 1;
2296 unsigned long pfnmerge
;
2298 support
= domain
->iommu_superpage
;
2300 /* To use a large page, the virtual *and* physical addresses
2301 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2302 of them will mean we have to use smaller pages. So just
2303 merge them and check both at once. */
2304 pfnmerge
= iov_pfn
| phy_pfn
;
2306 while (support
&& !(pfnmerge
& ~VTD_STRIDE_MASK
)) {
2307 pages
>>= VTD_STRIDE_SHIFT
;
2310 pfnmerge
>>= VTD_STRIDE_SHIFT
;
2317 static int __domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2318 struct scatterlist
*sg
, unsigned long phys_pfn
,
2319 unsigned long nr_pages
, int prot
)
2321 struct dma_pte
*first_pte
= NULL
, *pte
= NULL
;
2322 phys_addr_t
uninitialized_var(pteval
);
2323 unsigned long sg_res
= 0;
2324 unsigned int largepage_lvl
= 0;
2325 unsigned long lvl_pages
= 0;
2328 BUG_ON(!domain_pfn_supported(domain
, iov_pfn
+ nr_pages
- 1));
2330 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
2333 attr
= prot
& (DMA_PTE_READ
| DMA_PTE_WRITE
| DMA_PTE_SNP
);
2334 if (domain_use_first_level(domain
))
2335 attr
|= DMA_FL_PTE_PRESENT
| DMA_FL_PTE_XD
| DMA_FL_PTE_US
;
2339 pteval
= ((phys_addr_t
)phys_pfn
<< VTD_PAGE_SHIFT
) | attr
;
2342 while (nr_pages
> 0) {
2346 unsigned int pgoff
= sg
->offset
& ~PAGE_MASK
;
2348 sg_res
= aligned_nrpages(sg
->offset
, sg
->length
);
2349 sg
->dma_address
= ((dma_addr_t
)iov_pfn
<< VTD_PAGE_SHIFT
) + pgoff
;
2350 sg
->dma_length
= sg
->length
;
2351 pteval
= (sg_phys(sg
) - pgoff
) | attr
;
2352 phys_pfn
= pteval
>> VTD_PAGE_SHIFT
;
2356 largepage_lvl
= hardware_largepage_caps(domain
, iov_pfn
, phys_pfn
, sg_res
);
2358 first_pte
= pte
= pfn_to_dma_pte(domain
, iov_pfn
, &largepage_lvl
);
2361 /* It is large page*/
2362 if (largepage_lvl
> 1) {
2363 unsigned long nr_superpages
, end_pfn
;
2365 pteval
|= DMA_PTE_LARGE_PAGE
;
2366 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
2368 nr_superpages
= sg_res
/ lvl_pages
;
2369 end_pfn
= iov_pfn
+ nr_superpages
* lvl_pages
- 1;
2372 * Ensure that old small page tables are
2373 * removed to make room for superpage(s).
2374 * We're adding new large pages, so make sure
2375 * we don't remove their parent tables.
2377 dma_pte_free_pagetable(domain
, iov_pfn
, end_pfn
,
2380 pteval
&= ~(uint64_t)DMA_PTE_LARGE_PAGE
;
2384 /* We don't need lock here, nobody else
2385 * touches the iova range
2387 tmp
= cmpxchg64_local(&pte
->val
, 0ULL, pteval
);
2389 static int dumps
= 5;
2390 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2391 iov_pfn
, tmp
, (unsigned long long)pteval
);
2394 debug_dma_dump_mappings(NULL
);
2399 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
2401 BUG_ON(nr_pages
< lvl_pages
);
2402 BUG_ON(sg_res
< lvl_pages
);
2404 nr_pages
-= lvl_pages
;
2405 iov_pfn
+= lvl_pages
;
2406 phys_pfn
+= lvl_pages
;
2407 pteval
+= lvl_pages
* VTD_PAGE_SIZE
;
2408 sg_res
-= lvl_pages
;
2410 /* If the next PTE would be the first in a new page, then we
2411 need to flush the cache on the entries we've just written.
2412 And then we'll need to recalculate 'pte', so clear it and
2413 let it get set again in the if (!pte) block above.
2415 If we're done (!nr_pages) we need to flush the cache too.
2417 Also if we've been setting superpages, we may need to
2418 recalculate 'pte' and switch back to smaller pages for the
2419 end of the mapping, if the trailing size is not enough to
2420 use another superpage (i.e. sg_res < lvl_pages). */
2422 if (!nr_pages
|| first_pte_in_page(pte
) ||
2423 (largepage_lvl
> 1 && sg_res
< lvl_pages
)) {
2424 domain_flush_cache(domain
, first_pte
,
2425 (void *)pte
- (void *)first_pte
);
2429 if (!sg_res
&& nr_pages
)
2435 static int domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2436 struct scatterlist
*sg
, unsigned long phys_pfn
,
2437 unsigned long nr_pages
, int prot
)
2440 struct intel_iommu
*iommu
;
2442 /* Do the real mapping first */
2443 ret
= __domain_mapping(domain
, iov_pfn
, sg
, phys_pfn
, nr_pages
, prot
);
2447 for_each_domain_iommu(iommu_id
, domain
) {
2448 iommu
= g_iommus
[iommu_id
];
2449 __mapping_notify_one(iommu
, domain
, iov_pfn
, nr_pages
);
2455 static inline int domain_sg_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2456 struct scatterlist
*sg
, unsigned long nr_pages
,
2459 return domain_mapping(domain
, iov_pfn
, sg
, 0, nr_pages
, prot
);
2462 static inline int domain_pfn_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2463 unsigned long phys_pfn
, unsigned long nr_pages
,
2466 return domain_mapping(domain
, iov_pfn
, NULL
, phys_pfn
, nr_pages
, prot
);
2469 static void domain_context_clear_one(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
2471 unsigned long flags
;
2472 struct context_entry
*context
;
2478 spin_lock_irqsave(&iommu
->lock
, flags
);
2479 context
= iommu_context_addr(iommu
, bus
, devfn
, 0);
2481 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2484 did_old
= context_domain_id(context
);
2485 context_clear_entry(context
);
2486 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
2487 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2488 iommu
->flush
.flush_context(iommu
,
2490 (((u16
)bus
) << 8) | devfn
,
2491 DMA_CCMD_MASK_NOBIT
,
2492 DMA_CCMD_DEVICE_INVL
);
2493 iommu
->flush
.flush_iotlb(iommu
,
2500 static inline void unlink_domain_info(struct device_domain_info
*info
)
2502 assert_spin_locked(&device_domain_lock
);
2503 list_del(&info
->link
);
2504 list_del(&info
->global
);
2506 info
->dev
->archdata
.iommu
= NULL
;
2509 static void domain_remove_dev_info(struct dmar_domain
*domain
)
2511 struct device_domain_info
*info
, *tmp
;
2512 unsigned long flags
;
2514 spin_lock_irqsave(&device_domain_lock
, flags
);
2515 list_for_each_entry_safe(info
, tmp
, &domain
->devices
, link
)
2516 __dmar_remove_one_dev_info(info
);
2517 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2520 struct dmar_domain
*find_domain(struct device
*dev
)
2522 struct device_domain_info
*info
;
2524 if (unlikely(attach_deferred(dev
) || iommu_dummy(dev
)))
2527 /* No lock here, assumes no domain exit in normal case */
2528 info
= dev
->archdata
.iommu
;
2530 return info
->domain
;
2535 static void do_deferred_attach(struct device
*dev
)
2537 struct iommu_domain
*domain
;
2539 dev
->archdata
.iommu
= NULL
;
2540 domain
= iommu_get_domain_for_dev(dev
);
2542 intel_iommu_attach_device(domain
, dev
);
2545 static inline struct device_domain_info
*
2546 dmar_search_domain_by_dev_info(int segment
, int bus
, int devfn
)
2548 struct device_domain_info
*info
;
2550 list_for_each_entry(info
, &device_domain_list
, global
)
2551 if (info
->segment
== segment
&& info
->bus
== bus
&&
2552 info
->devfn
== devfn
)
2558 static int domain_setup_first_level(struct intel_iommu
*iommu
,
2559 struct dmar_domain
*domain
,
2563 int flags
= PASID_FLAG_SUPERVISOR_MODE
;
2564 struct dma_pte
*pgd
= domain
->pgd
;
2568 * Skip top levels of page tables for iommu which has
2569 * less agaw than default. Unnecessary for PT mode.
2571 for (agaw
= domain
->agaw
; agaw
> iommu
->agaw
; agaw
--) {
2572 pgd
= phys_to_virt(dma_pte_addr(pgd
));
2573 if (!dma_pte_present(pgd
))
2577 level
= agaw_to_level(agaw
);
2578 if (level
!= 4 && level
!= 5)
2581 flags
|= (level
== 5) ? PASID_FLAG_FL5LP
: 0;
2583 return intel_pasid_setup_first_level(iommu
, dev
, (pgd_t
*)pgd
, pasid
,
2584 domain
->iommu_did
[iommu
->seq_id
],
2588 static bool dev_is_real_dma_subdevice(struct device
*dev
)
2590 return dev
&& dev_is_pci(dev
) &&
2591 pci_real_dma_dev(to_pci_dev(dev
)) != to_pci_dev(dev
);
2594 static struct dmar_domain
*dmar_insert_one_dev_info(struct intel_iommu
*iommu
,
2597 struct dmar_domain
*domain
)
2599 struct dmar_domain
*found
= NULL
;
2600 struct device_domain_info
*info
;
2601 unsigned long flags
;
2604 info
= alloc_devinfo_mem();
2608 if (!dev_is_real_dma_subdevice(dev
)) {
2610 info
->devfn
= devfn
;
2611 info
->segment
= iommu
->segment
;
2613 struct pci_dev
*pdev
= to_pci_dev(dev
);
2615 info
->bus
= pdev
->bus
->number
;
2616 info
->devfn
= pdev
->devfn
;
2617 info
->segment
= pci_domain_nr(pdev
->bus
);
2620 info
->ats_supported
= info
->pasid_supported
= info
->pri_supported
= 0;
2621 info
->ats_enabled
= info
->pasid_enabled
= info
->pri_enabled
= 0;
2624 info
->domain
= domain
;
2625 info
->iommu
= iommu
;
2626 info
->pasid_table
= NULL
;
2627 info
->auxd_enabled
= 0;
2628 INIT_LIST_HEAD(&info
->auxiliary_domains
);
2630 if (dev
&& dev_is_pci(dev
)) {
2631 struct pci_dev
*pdev
= to_pci_dev(info
->dev
);
2633 if (!pdev
->untrusted
&&
2634 !pci_ats_disabled() &&
2635 ecap_dev_iotlb_support(iommu
->ecap
) &&
2636 pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_ATS
) &&
2637 dmar_find_matched_atsr_unit(pdev
))
2638 info
->ats_supported
= 1;
2640 if (sm_supported(iommu
)) {
2641 if (pasid_supported(iommu
)) {
2642 int features
= pci_pasid_features(pdev
);
2644 info
->pasid_supported
= features
| 1;
2647 if (info
->ats_supported
&& ecap_prs(iommu
->ecap
) &&
2648 pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_PRI
))
2649 info
->pri_supported
= 1;
2653 spin_lock_irqsave(&device_domain_lock
, flags
);
2655 found
= find_domain(dev
);
2658 struct device_domain_info
*info2
;
2659 info2
= dmar_search_domain_by_dev_info(info
->segment
, info
->bus
,
2662 found
= info2
->domain
;
2668 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2669 free_devinfo_mem(info
);
2670 /* Caller must free the original domain */
2674 spin_lock(&iommu
->lock
);
2675 ret
= domain_attach_iommu(domain
, iommu
);
2676 spin_unlock(&iommu
->lock
);
2679 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2680 free_devinfo_mem(info
);
2684 list_add(&info
->link
, &domain
->devices
);
2685 list_add(&info
->global
, &device_domain_list
);
2687 dev
->archdata
.iommu
= info
;
2688 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2690 /* PASID table is mandatory for a PCI device in scalable mode. */
2691 if (dev
&& dev_is_pci(dev
) && sm_supported(iommu
)) {
2692 ret
= intel_pasid_alloc_table(dev
);
2694 dev_err(dev
, "PASID table allocation failed\n");
2695 dmar_remove_one_dev_info(dev
);
2699 /* Setup the PASID entry for requests without PASID: */
2700 spin_lock(&iommu
->lock
);
2701 if (hw_pass_through
&& domain_type_is_si(domain
))
2702 ret
= intel_pasid_setup_pass_through(iommu
, domain
,
2703 dev
, PASID_RID2PASID
);
2704 else if (domain_use_first_level(domain
))
2705 ret
= domain_setup_first_level(iommu
, domain
, dev
,
2708 ret
= intel_pasid_setup_second_level(iommu
, domain
,
2709 dev
, PASID_RID2PASID
);
2710 spin_unlock(&iommu
->lock
);
2712 dev_err(dev
, "Setup RID2PASID failed\n");
2713 dmar_remove_one_dev_info(dev
);
2718 if (dev
&& domain_context_mapping(domain
, dev
)) {
2719 dev_err(dev
, "Domain context map failed\n");
2720 dmar_remove_one_dev_info(dev
);
2727 static int get_last_alias(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
2729 *(u16
*)opaque
= alias
;
2733 static struct dmar_domain
*find_or_alloc_domain(struct device
*dev
, int gaw
)
2735 struct device_domain_info
*info
;
2736 struct dmar_domain
*domain
= NULL
;
2737 struct intel_iommu
*iommu
;
2739 unsigned long flags
;
2742 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2746 if (dev_is_pci(dev
)) {
2747 struct pci_dev
*pdev
= to_pci_dev(dev
);
2749 pci_for_each_dma_alias(pdev
, get_last_alias
, &dma_alias
);
2751 spin_lock_irqsave(&device_domain_lock
, flags
);
2752 info
= dmar_search_domain_by_dev_info(pci_domain_nr(pdev
->bus
),
2753 PCI_BUS_NUM(dma_alias
),
2756 iommu
= info
->iommu
;
2757 domain
= info
->domain
;
2759 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2761 /* DMA alias already has a domain, use it */
2766 /* Allocate and initialize new domain for the device */
2767 domain
= alloc_domain(0);
2770 if (domain_init(domain
, iommu
, gaw
)) {
2771 domain_exit(domain
);
2779 static struct dmar_domain
*set_domain_for_dev(struct device
*dev
,
2780 struct dmar_domain
*domain
)
2782 struct intel_iommu
*iommu
;
2783 struct dmar_domain
*tmp
;
2784 u16 req_id
, dma_alias
;
2787 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2791 req_id
= ((u16
)bus
<< 8) | devfn
;
2793 if (dev_is_pci(dev
)) {
2794 struct pci_dev
*pdev
= to_pci_dev(dev
);
2796 pci_for_each_dma_alias(pdev
, get_last_alias
, &dma_alias
);
2798 /* register PCI DMA alias device */
2799 if (req_id
!= dma_alias
) {
2800 tmp
= dmar_insert_one_dev_info(iommu
, PCI_BUS_NUM(dma_alias
),
2801 dma_alias
& 0xff, NULL
, domain
);
2803 if (!tmp
|| tmp
!= domain
)
2808 tmp
= dmar_insert_one_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2809 if (!tmp
|| tmp
!= domain
)
2815 static int iommu_domain_identity_map(struct dmar_domain
*domain
,
2816 unsigned long long start
,
2817 unsigned long long end
)
2819 unsigned long first_vpfn
= start
>> VTD_PAGE_SHIFT
;
2820 unsigned long last_vpfn
= end
>> VTD_PAGE_SHIFT
;
2822 if (!reserve_iova(&domain
->iovad
, dma_to_mm_pfn(first_vpfn
),
2823 dma_to_mm_pfn(last_vpfn
))) {
2824 pr_err("Reserving iova failed\n");
2828 pr_debug("Mapping reserved region %llx-%llx\n", start
, end
);
2830 * RMRR range might have overlap with physical memory range,
2833 dma_pte_clear_range(domain
, first_vpfn
, last_vpfn
);
2835 return __domain_mapping(domain
, first_vpfn
, NULL
,
2836 first_vpfn
, last_vpfn
- first_vpfn
+ 1,
2837 DMA_PTE_READ
|DMA_PTE_WRITE
);
2840 static int domain_prepare_identity_map(struct device
*dev
,
2841 struct dmar_domain
*domain
,
2842 unsigned long long start
,
2843 unsigned long long end
)
2845 /* For _hardware_ passthrough, don't bother. But for software
2846 passthrough, we do it anyway -- it may indicate a memory
2847 range which is reserved in E820, so which didn't get set
2848 up to start with in si_domain */
2849 if (domain
== si_domain
&& hw_pass_through
) {
2850 dev_warn(dev
, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2855 dev_info(dev
, "Setting identity map [0x%Lx - 0x%Lx]\n", start
, end
);
2858 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2859 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2860 dmi_get_system_info(DMI_BIOS_VENDOR
),
2861 dmi_get_system_info(DMI_BIOS_VERSION
),
2862 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2866 if (end
>> agaw_to_width(domain
->agaw
)) {
2867 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2868 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2869 agaw_to_width(domain
->agaw
),
2870 dmi_get_system_info(DMI_BIOS_VENDOR
),
2871 dmi_get_system_info(DMI_BIOS_VERSION
),
2872 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2876 return iommu_domain_identity_map(domain
, start
, end
);
2879 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
);
2881 static int __init
si_domain_init(int hw
)
2883 struct dmar_rmrr_unit
*rmrr
;
2887 si_domain
= alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY
);
2891 if (md_domain_init(si_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2892 domain_exit(si_domain
);
2899 for_each_online_node(nid
) {
2900 unsigned long start_pfn
, end_pfn
;
2903 for_each_mem_pfn_range(i
, nid
, &start_pfn
, &end_pfn
, NULL
) {
2904 ret
= iommu_domain_identity_map(si_domain
,
2905 PFN_PHYS(start_pfn
), PFN_PHYS(end_pfn
));
2912 * Identity map the RMRRs so that devices with RMRRs could also use
2915 for_each_rmrr_units(rmrr
) {
2916 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
2918 unsigned long long start
= rmrr
->base_address
;
2919 unsigned long long end
= rmrr
->end_address
;
2921 if (WARN_ON(end
< start
||
2922 end
>> agaw_to_width(si_domain
->agaw
)))
2925 ret
= iommu_domain_identity_map(si_domain
, start
, end
);
2934 static int identity_mapping(struct device
*dev
)
2936 struct device_domain_info
*info
;
2938 info
= dev
->archdata
.iommu
;
2940 return (info
->domain
== si_domain
);
2945 static int domain_add_dev_info(struct dmar_domain
*domain
, struct device
*dev
)
2947 struct dmar_domain
*ndomain
;
2948 struct intel_iommu
*iommu
;
2951 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2955 ndomain
= dmar_insert_one_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2956 if (ndomain
!= domain
)
2962 static bool device_has_rmrr(struct device
*dev
)
2964 struct dmar_rmrr_unit
*rmrr
;
2969 for_each_rmrr_units(rmrr
) {
2971 * Return TRUE if this RMRR contains the device that
2974 for_each_active_dev_scope(rmrr
->devices
,
2975 rmrr
->devices_cnt
, i
, tmp
)
2977 is_downstream_to_pci_bridge(dev
, tmp
)) {
2987 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2988 * is relaxable (ie. is allowed to be not enforced under some conditions)
2989 * @dev: device handle
2991 * We assume that PCI USB devices with RMRRs have them largely
2992 * for historical reasons and that the RMRR space is not actively used post
2993 * boot. This exclusion may change if vendors begin to abuse it.
2995 * The same exception is made for graphics devices, with the requirement that
2996 * any use of the RMRR regions will be torn down before assigning the device
2999 * Return: true if the RMRR is relaxable, false otherwise
3001 static bool device_rmrr_is_relaxable(struct device
*dev
)
3003 struct pci_dev
*pdev
;
3005 if (!dev_is_pci(dev
))
3008 pdev
= to_pci_dev(dev
);
3009 if (IS_USB_DEVICE(pdev
) || IS_GFX_DEVICE(pdev
))
3016 * There are a couple cases where we need to restrict the functionality of
3017 * devices associated with RMRRs. The first is when evaluating a device for
3018 * identity mapping because problems exist when devices are moved in and out
3019 * of domains and their respective RMRR information is lost. This means that
3020 * a device with associated RMRRs will never be in a "passthrough" domain.
3021 * The second is use of the device through the IOMMU API. This interface
3022 * expects to have full control of the IOVA space for the device. We cannot
3023 * satisfy both the requirement that RMRR access is maintained and have an
3024 * unencumbered IOVA space. We also have no ability to quiesce the device's
3025 * use of the RMRR space or even inform the IOMMU API user of the restriction.
3026 * We therefore prevent devices associated with an RMRR from participating in
3027 * the IOMMU API, which eliminates them from device assignment.
3029 * In both cases, devices which have relaxable RMRRs are not concerned by this
3030 * restriction. See device_rmrr_is_relaxable comment.
3032 static bool device_is_rmrr_locked(struct device
*dev
)
3034 if (!device_has_rmrr(dev
))
3037 if (device_rmrr_is_relaxable(dev
))
3044 * Return the required default domain type for a specific device.
3046 * @dev: the device in query
3047 * @startup: true if this is during early boot
3050 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
3051 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
3052 * - 0: both identity and dynamic domains work for this device
3054 static int device_def_domain_type(struct device
*dev
)
3056 if (dev_is_pci(dev
)) {
3057 struct pci_dev
*pdev
= to_pci_dev(dev
);
3060 * Prevent any device marked as untrusted from getting
3061 * placed into the statically identity mapping domain.
3063 if (pdev
->untrusted
)
3064 return IOMMU_DOMAIN_DMA
;
3066 if ((iommu_identity_mapping
& IDENTMAP_AZALIA
) && IS_AZALIA(pdev
))
3067 return IOMMU_DOMAIN_IDENTITY
;
3069 if ((iommu_identity_mapping
& IDENTMAP_GFX
) && IS_GFX_DEVICE(pdev
))
3070 return IOMMU_DOMAIN_IDENTITY
;
3073 * We want to start off with all devices in the 1:1 domain, and
3074 * take them out later if we find they can't access all of memory.
3076 * However, we can't do this for PCI devices behind bridges,
3077 * because all PCI devices behind the same bridge will end up
3078 * with the same source-id on their transactions.
3080 * Practically speaking, we can't change things around for these
3081 * devices at run-time, because we can't be sure there'll be no
3082 * DMA transactions in flight for any of their siblings.
3084 * So PCI devices (unless they're on the root bus) as well as
3085 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
3086 * the 1:1 domain, just in _case_ one of their siblings turns out
3087 * not to be able to map all of memory.
3089 if (!pci_is_pcie(pdev
)) {
3090 if (!pci_is_root_bus(pdev
->bus
))
3091 return IOMMU_DOMAIN_DMA
;
3092 if (pdev
->class >> 8 == PCI_CLASS_BRIDGE_PCI
)
3093 return IOMMU_DOMAIN_DMA
;
3094 } else if (pci_pcie_type(pdev
) == PCI_EXP_TYPE_PCI_BRIDGE
)
3095 return IOMMU_DOMAIN_DMA
;
3101 static void intel_iommu_init_qi(struct intel_iommu
*iommu
)
3104 * Start from the sane iommu hardware state.
3105 * If the queued invalidation is already initialized by us
3106 * (for example, while enabling interrupt-remapping) then
3107 * we got the things already rolling from a sane state.
3111 * Clear any previous faults.
3113 dmar_fault(-1, iommu
);
3115 * Disable queued invalidation if supported and already enabled
3116 * before OS handover.
3118 dmar_disable_qi(iommu
);
3121 if (dmar_enable_qi(iommu
)) {
3123 * Queued Invalidate not enabled, use Register Based Invalidate
3125 iommu
->flush
.flush_context
= __iommu_flush_context
;
3126 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
3127 pr_info("%s: Using Register based invalidation\n",
3130 iommu
->flush
.flush_context
= qi_flush_context
;
3131 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
3132 pr_info("%s: Using Queued invalidation\n", iommu
->name
);
3136 static int copy_context_table(struct intel_iommu
*iommu
,
3137 struct root_entry
*old_re
,
3138 struct context_entry
**tbl
,
3141 int tbl_idx
, pos
= 0, idx
, devfn
, ret
= 0, did
;
3142 struct context_entry
*new_ce
= NULL
, ce
;
3143 struct context_entry
*old_ce
= NULL
;
3144 struct root_entry re
;
3145 phys_addr_t old_ce_phys
;
3147 tbl_idx
= ext
? bus
* 2 : bus
;
3148 memcpy(&re
, old_re
, sizeof(re
));
3150 for (devfn
= 0; devfn
< 256; devfn
++) {
3151 /* First calculate the correct index */
3152 idx
= (ext
? devfn
* 2 : devfn
) % 256;
3155 /* First save what we may have and clean up */
3157 tbl
[tbl_idx
] = new_ce
;
3158 __iommu_flush_cache(iommu
, new_ce
,
3168 old_ce_phys
= root_entry_lctp(&re
);
3170 old_ce_phys
= root_entry_uctp(&re
);
3173 if (ext
&& devfn
== 0) {
3174 /* No LCTP, try UCTP */
3183 old_ce
= memremap(old_ce_phys
, PAGE_SIZE
,
3188 new_ce
= alloc_pgtable_page(iommu
->node
);
3195 /* Now copy the context entry */
3196 memcpy(&ce
, old_ce
+ idx
, sizeof(ce
));
3198 if (!__context_present(&ce
))
3201 did
= context_domain_id(&ce
);
3202 if (did
>= 0 && did
< cap_ndoms(iommu
->cap
))
3203 set_bit(did
, iommu
->domain_ids
);
3206 * We need a marker for copied context entries. This
3207 * marker needs to work for the old format as well as
3208 * for extended context entries.
3210 * Bit 67 of the context entry is used. In the old
3211 * format this bit is available to software, in the
3212 * extended format it is the PGE bit, but PGE is ignored
3213 * by HW if PASIDs are disabled (and thus still
3216 * So disable PASIDs first and then mark the entry
3217 * copied. This means that we don't copy PASID
3218 * translations from the old kernel, but this is fine as
3219 * faults there are not fatal.
3221 context_clear_pasid_enable(&ce
);
3222 context_set_copied(&ce
);
3227 tbl
[tbl_idx
+ pos
] = new_ce
;
3229 __iommu_flush_cache(iommu
, new_ce
, VTD_PAGE_SIZE
);
3238 static int copy_translation_tables(struct intel_iommu
*iommu
)
3240 struct context_entry
**ctxt_tbls
;
3241 struct root_entry
*old_rt
;
3242 phys_addr_t old_rt_phys
;
3243 int ctxt_table_entries
;
3244 unsigned long flags
;
3249 rtaddr_reg
= dmar_readq(iommu
->reg
+ DMAR_RTADDR_REG
);
3250 ext
= !!(rtaddr_reg
& DMA_RTADDR_RTT
);
3251 new_ext
= !!ecap_ecs(iommu
->ecap
);
3254 * The RTT bit can only be changed when translation is disabled,
3255 * but disabling translation means to open a window for data
3256 * corruption. So bail out and don't copy anything if we would
3257 * have to change the bit.
3262 old_rt_phys
= rtaddr_reg
& VTD_PAGE_MASK
;
3266 old_rt
= memremap(old_rt_phys
, PAGE_SIZE
, MEMREMAP_WB
);
3270 /* This is too big for the stack - allocate it from slab */
3271 ctxt_table_entries
= ext
? 512 : 256;
3273 ctxt_tbls
= kcalloc(ctxt_table_entries
, sizeof(void *), GFP_KERNEL
);
3277 for (bus
= 0; bus
< 256; bus
++) {
3278 ret
= copy_context_table(iommu
, &old_rt
[bus
],
3279 ctxt_tbls
, bus
, ext
);
3281 pr_err("%s: Failed to copy context table for bus %d\n",
3287 spin_lock_irqsave(&iommu
->lock
, flags
);
3289 /* Context tables are copied, now write them to the root_entry table */
3290 for (bus
= 0; bus
< 256; bus
++) {
3291 int idx
= ext
? bus
* 2 : bus
;
3294 if (ctxt_tbls
[idx
]) {
3295 val
= virt_to_phys(ctxt_tbls
[idx
]) | 1;
3296 iommu
->root_entry
[bus
].lo
= val
;
3299 if (!ext
|| !ctxt_tbls
[idx
+ 1])
3302 val
= virt_to_phys(ctxt_tbls
[idx
+ 1]) | 1;
3303 iommu
->root_entry
[bus
].hi
= val
;
3306 spin_unlock_irqrestore(&iommu
->lock
, flags
);
3310 __iommu_flush_cache(iommu
, iommu
->root_entry
, PAGE_SIZE
);
3320 static int __init
init_dmars(void)
3322 struct dmar_drhd_unit
*drhd
;
3323 struct intel_iommu
*iommu
;
3329 * initialize and program root entry to not present
3332 for_each_drhd_unit(drhd
) {
3334 * lock not needed as this is only incremented in the single
3335 * threaded kernel __init code path all other access are read
3338 if (g_num_of_iommus
< DMAR_UNITS_SUPPORTED
) {
3342 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED
);
3345 /* Preallocate enough resources for IOMMU hot-addition */
3346 if (g_num_of_iommus
< DMAR_UNITS_SUPPORTED
)
3347 g_num_of_iommus
= DMAR_UNITS_SUPPORTED
;
3349 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
3352 pr_err("Allocating global iommu array failed\n");
3357 for_each_iommu(iommu
, drhd
) {
3358 if (drhd
->ignored
) {
3359 iommu_disable_translation(iommu
);
3364 * Find the max pasid size of all IOMMU's in the system.
3365 * We need to ensure the system pasid table is no bigger
3366 * than the smallest supported.
3368 if (pasid_supported(iommu
)) {
3369 u32 temp
= 2 << ecap_pss(iommu
->ecap
);
3371 intel_pasid_max_id
= min_t(u32
, temp
,
3372 intel_pasid_max_id
);
3375 g_iommus
[iommu
->seq_id
] = iommu
;
3377 intel_iommu_init_qi(iommu
);
3379 ret
= iommu_init_domains(iommu
);
3383 init_translation_status(iommu
);
3385 if (translation_pre_enabled(iommu
) && !is_kdump_kernel()) {
3386 iommu_disable_translation(iommu
);
3387 clear_translation_pre_enabled(iommu
);
3388 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3394 * we could share the same root & context tables
3395 * among all IOMMU's. Need to Split it later.
3397 ret
= iommu_alloc_root_entry(iommu
);
3401 if (translation_pre_enabled(iommu
)) {
3402 pr_info("Translation already enabled - trying to copy translation structures\n");
3404 ret
= copy_translation_tables(iommu
);
3407 * We found the IOMMU with translation
3408 * enabled - but failed to copy over the
3409 * old root-entry table. Try to proceed
3410 * by disabling translation now and
3411 * allocating a clean root-entry table.
3412 * This might cause DMAR faults, but
3413 * probably the dump will still succeed.
3415 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3417 iommu_disable_translation(iommu
);
3418 clear_translation_pre_enabled(iommu
);
3420 pr_info("Copied translation tables from previous kernel for %s\n",
3425 if (!ecap_pass_through(iommu
->ecap
))
3426 hw_pass_through
= 0;
3427 intel_svm_check(iommu
);
3431 * Now that qi is enabled on all iommus, set the root entry and flush
3432 * caches. This is required on some Intel X58 chipsets, otherwise the
3433 * flush_context function will loop forever and the boot hangs.
3435 for_each_active_iommu(iommu
, drhd
) {
3436 iommu_flush_write_buffer(iommu
);
3437 iommu_set_root_entry(iommu
);
3438 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
3439 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
3442 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3447 iommu_identity_mapping
|= IDENTMAP_GFX
;
3449 check_tylersburg_isoch();
3451 ret
= si_domain_init(hw_pass_through
);
3458 * global invalidate context cache
3459 * global invalidate iotlb
3460 * enable translation
3462 for_each_iommu(iommu
, drhd
) {
3463 if (drhd
->ignored
) {
3465 * we always have to disable PMRs or DMA may fail on
3469 iommu_disable_protect_mem_regions(iommu
);
3473 iommu_flush_write_buffer(iommu
);
3475 #ifdef CONFIG_INTEL_IOMMU_SVM
3476 if (pasid_supported(iommu
) && ecap_prs(iommu
->ecap
)) {
3478 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3479 * could cause possible lock race condition.
3481 up_write(&dmar_global_lock
);
3482 ret
= intel_svm_enable_prq(iommu
);
3483 down_write(&dmar_global_lock
);
3488 ret
= dmar_set_interrupt(iommu
);
3496 for_each_active_iommu(iommu
, drhd
) {
3497 disable_dmar_iommu(iommu
);
3498 free_dmar_iommu(iommu
);
3507 /* This takes a number of _MM_ pages, not VTD pages */
3508 static unsigned long intel_alloc_iova(struct device
*dev
,
3509 struct dmar_domain
*domain
,
3510 unsigned long nrpages
, uint64_t dma_mask
)
3512 unsigned long iova_pfn
;
3515 * Restrict dma_mask to the width that the iommu can handle.
3516 * First-level translation restricts the input-address to a
3517 * canonical address (i.e., address bits 63:N have the same
3518 * value as address bit [N-1], where N is 48-bits with 4-level
3519 * paging and 57-bits with 5-level paging). Hence, skip bit
3522 if (domain_use_first_level(domain
))
3523 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
- 1),
3526 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
),
3529 /* Ensure we reserve the whole size-aligned region */
3530 nrpages
= __roundup_pow_of_two(nrpages
);
3532 if (!dmar_forcedac
&& dma_mask
> DMA_BIT_MASK(32)) {
3534 * First try to allocate an io virtual address in
3535 * DMA_BIT_MASK(32) and if that fails then try allocating
3538 iova_pfn
= alloc_iova_fast(&domain
->iovad
, nrpages
,
3539 IOVA_PFN(DMA_BIT_MASK(32)), false);
3543 iova_pfn
= alloc_iova_fast(&domain
->iovad
, nrpages
,
3544 IOVA_PFN(dma_mask
), true);
3545 if (unlikely(!iova_pfn
)) {
3546 dev_err_once(dev
, "Allocating %ld-page iova failed\n",
3554 static struct dmar_domain
*get_private_domain_for_dev(struct device
*dev
)
3556 struct dmar_domain
*domain
, *tmp
;
3557 struct dmar_rmrr_unit
*rmrr
;
3558 struct device
*i_dev
;
3561 /* Device shouldn't be attached by any domains. */
3562 domain
= find_domain(dev
);
3566 domain
= find_or_alloc_domain(dev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
3570 /* We have a new domain - setup possible RMRRs for the device */
3572 for_each_rmrr_units(rmrr
) {
3573 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
3578 ret
= domain_prepare_identity_map(dev
, domain
,
3582 dev_err(dev
, "Mapping reserved region failed\n");
3587 tmp
= set_domain_for_dev(dev
, domain
);
3588 if (!tmp
|| domain
!= tmp
) {
3589 domain_exit(domain
);
3595 dev_err(dev
, "Allocating domain failed\n");
3597 domain
->domain
.type
= IOMMU_DOMAIN_DMA
;
3602 /* Check if the dev needs to go through non-identity map and unmap process.*/
3603 static bool iommu_need_mapping(struct device
*dev
)
3607 if (iommu_dummy(dev
))
3610 if (unlikely(attach_deferred(dev
)))
3611 do_deferred_attach(dev
);
3613 ret
= identity_mapping(dev
);
3615 u64 dma_mask
= *dev
->dma_mask
;
3617 if (dev
->coherent_dma_mask
&& dev
->coherent_dma_mask
< dma_mask
)
3618 dma_mask
= dev
->coherent_dma_mask
;
3620 if (dma_mask
>= dma_direct_get_required_mask(dev
))
3624 * 32 bit DMA is removed from si_domain and fall back to
3625 * non-identity mapping.
3627 dmar_remove_one_dev_info(dev
);
3628 ret
= iommu_request_dma_domain_for_dev(dev
);
3630 struct iommu_domain
*domain
;
3631 struct dmar_domain
*dmar_domain
;
3633 domain
= iommu_get_domain_for_dev(dev
);
3635 dmar_domain
= to_dmar_domain(domain
);
3636 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
3638 dmar_remove_one_dev_info(dev
);
3639 get_private_domain_for_dev(dev
);
3642 dev_info(dev
, "32bit DMA uses non-identity mapping\n");
3648 static dma_addr_t
__intel_map_single(struct device
*dev
, phys_addr_t paddr
,
3649 size_t size
, int dir
, u64 dma_mask
)
3651 struct dmar_domain
*domain
;
3652 phys_addr_t start_paddr
;
3653 unsigned long iova_pfn
;
3656 struct intel_iommu
*iommu
;
3657 unsigned long paddr_pfn
= paddr
>> PAGE_SHIFT
;
3659 BUG_ON(dir
== DMA_NONE
);
3661 domain
= find_domain(dev
);
3663 return DMA_MAPPING_ERROR
;
3665 iommu
= domain_get_iommu(domain
);
3666 size
= aligned_nrpages(paddr
, size
);
3668 iova_pfn
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
), dma_mask
);
3673 * Check if DMAR supports zero-length reads on write only
3676 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3677 !cap_zlr(iommu
->cap
))
3678 prot
|= DMA_PTE_READ
;
3679 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3680 prot
|= DMA_PTE_WRITE
;
3682 * paddr - (paddr + size) might be partial page, we should map the whole
3683 * page. Note: if two part of one page are separately mapped, we
3684 * might have two guest_addr mapping to the same host paddr, but this
3685 * is not a big problem
3687 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova_pfn
),
3688 mm_to_dma_pfn(paddr_pfn
), size
, prot
);
3692 start_paddr
= (phys_addr_t
)iova_pfn
<< PAGE_SHIFT
;
3693 start_paddr
+= paddr
& ~PAGE_MASK
;
3695 trace_map_single(dev
, start_paddr
, paddr
, size
<< VTD_PAGE_SHIFT
);
3701 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(size
));
3702 dev_err(dev
, "Device request: %zx@%llx dir %d --- failed\n",
3703 size
, (unsigned long long)paddr
, dir
);
3704 return DMA_MAPPING_ERROR
;
3707 static dma_addr_t
intel_map_page(struct device
*dev
, struct page
*page
,
3708 unsigned long offset
, size_t size
,
3709 enum dma_data_direction dir
,
3710 unsigned long attrs
)
3712 if (iommu_need_mapping(dev
))
3713 return __intel_map_single(dev
, page_to_phys(page
) + offset
,
3714 size
, dir
, *dev
->dma_mask
);
3715 return dma_direct_map_page(dev
, page
, offset
, size
, dir
, attrs
);
3718 static dma_addr_t
intel_map_resource(struct device
*dev
, phys_addr_t phys_addr
,
3719 size_t size
, enum dma_data_direction dir
,
3720 unsigned long attrs
)
3722 if (iommu_need_mapping(dev
))
3723 return __intel_map_single(dev
, phys_addr
, size
, dir
,
3725 return dma_direct_map_resource(dev
, phys_addr
, size
, dir
, attrs
);
3728 static void intel_unmap(struct device
*dev
, dma_addr_t dev_addr
, size_t size
)
3730 struct dmar_domain
*domain
;
3731 unsigned long start_pfn
, last_pfn
;
3732 unsigned long nrpages
;
3733 unsigned long iova_pfn
;
3734 struct intel_iommu
*iommu
;
3735 struct page
*freelist
;
3736 struct pci_dev
*pdev
= NULL
;
3738 domain
= find_domain(dev
);
3741 iommu
= domain_get_iommu(domain
);
3743 iova_pfn
= IOVA_PFN(dev_addr
);
3745 nrpages
= aligned_nrpages(dev_addr
, size
);
3746 start_pfn
= mm_to_dma_pfn(iova_pfn
);
3747 last_pfn
= start_pfn
+ nrpages
- 1;
3749 if (dev_is_pci(dev
))
3750 pdev
= to_pci_dev(dev
);
3752 freelist
= domain_unmap(domain
, start_pfn
, last_pfn
);
3753 if (intel_iommu_strict
|| (pdev
&& pdev
->untrusted
) ||
3754 !has_iova_flush_queue(&domain
->iovad
)) {
3755 iommu_flush_iotlb_psi(iommu
, domain
, start_pfn
,
3756 nrpages
, !freelist
, 0);
3758 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(nrpages
));
3759 dma_free_pagelist(freelist
);
3761 queue_iova(&domain
->iovad
, iova_pfn
, nrpages
,
3762 (unsigned long)freelist
);
3764 * queue up the release of the unmap to save the 1/6th of the
3765 * cpu used up by the iotlb flush operation...
3769 trace_unmap_single(dev
, dev_addr
, size
);
3772 static void intel_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
3773 size_t size
, enum dma_data_direction dir
,
3774 unsigned long attrs
)
3776 if (iommu_need_mapping(dev
))
3777 intel_unmap(dev
, dev_addr
, size
);
3779 dma_direct_unmap_page(dev
, dev_addr
, size
, dir
, attrs
);
3782 static void intel_unmap_resource(struct device
*dev
, dma_addr_t dev_addr
,
3783 size_t size
, enum dma_data_direction dir
, unsigned long attrs
)
3785 if (iommu_need_mapping(dev
))
3786 intel_unmap(dev
, dev_addr
, size
);
3789 static void *intel_alloc_coherent(struct device
*dev
, size_t size
,
3790 dma_addr_t
*dma_handle
, gfp_t flags
,
3791 unsigned long attrs
)
3793 struct page
*page
= NULL
;
3796 if (!iommu_need_mapping(dev
))
3797 return dma_direct_alloc(dev
, size
, dma_handle
, flags
, attrs
);
3799 size
= PAGE_ALIGN(size
);
3800 order
= get_order(size
);
3802 if (gfpflags_allow_blocking(flags
)) {
3803 unsigned int count
= size
>> PAGE_SHIFT
;
3805 page
= dma_alloc_from_contiguous(dev
, count
, order
,
3806 flags
& __GFP_NOWARN
);
3810 page
= alloc_pages(flags
, order
);
3813 memset(page_address(page
), 0, size
);
3815 *dma_handle
= __intel_map_single(dev
, page_to_phys(page
), size
,
3817 dev
->coherent_dma_mask
);
3818 if (*dma_handle
!= DMA_MAPPING_ERROR
)
3819 return page_address(page
);
3820 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3821 __free_pages(page
, order
);
3826 static void intel_free_coherent(struct device
*dev
, size_t size
, void *vaddr
,
3827 dma_addr_t dma_handle
, unsigned long attrs
)
3830 struct page
*page
= virt_to_page(vaddr
);
3832 if (!iommu_need_mapping(dev
))
3833 return dma_direct_free(dev
, size
, vaddr
, dma_handle
, attrs
);
3835 size
= PAGE_ALIGN(size
);
3836 order
= get_order(size
);
3838 intel_unmap(dev
, dma_handle
, size
);
3839 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3840 __free_pages(page
, order
);
3843 static void intel_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
,
3844 int nelems
, enum dma_data_direction dir
,
3845 unsigned long attrs
)
3847 dma_addr_t startaddr
= sg_dma_address(sglist
) & PAGE_MASK
;
3848 unsigned long nrpages
= 0;
3849 struct scatterlist
*sg
;
3852 if (!iommu_need_mapping(dev
))
3853 return dma_direct_unmap_sg(dev
, sglist
, nelems
, dir
, attrs
);
3855 for_each_sg(sglist
, sg
, nelems
, i
) {
3856 nrpages
+= aligned_nrpages(sg_dma_address(sg
), sg_dma_len(sg
));
3859 intel_unmap(dev
, startaddr
, nrpages
<< VTD_PAGE_SHIFT
);
3861 trace_unmap_sg(dev
, startaddr
, nrpages
<< VTD_PAGE_SHIFT
);
3864 static int intel_map_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3865 enum dma_data_direction dir
, unsigned long attrs
)
3868 struct dmar_domain
*domain
;
3871 unsigned long iova_pfn
;
3873 struct scatterlist
*sg
;
3874 unsigned long start_vpfn
;
3875 struct intel_iommu
*iommu
;
3877 BUG_ON(dir
== DMA_NONE
);
3878 if (!iommu_need_mapping(dev
))
3879 return dma_direct_map_sg(dev
, sglist
, nelems
, dir
, attrs
);
3881 domain
= find_domain(dev
);
3885 iommu
= domain_get_iommu(domain
);
3887 for_each_sg(sglist
, sg
, nelems
, i
)
3888 size
+= aligned_nrpages(sg
->offset
, sg
->length
);
3890 iova_pfn
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
),
3893 sglist
->dma_length
= 0;
3898 * Check if DMAR supports zero-length reads on write only
3901 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3902 !cap_zlr(iommu
->cap
))
3903 prot
|= DMA_PTE_READ
;
3904 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3905 prot
|= DMA_PTE_WRITE
;
3907 start_vpfn
= mm_to_dma_pfn(iova_pfn
);
3909 ret
= domain_sg_mapping(domain
, start_vpfn
, sglist
, size
, prot
);
3910 if (unlikely(ret
)) {
3911 dma_pte_free_pagetable(domain
, start_vpfn
,
3912 start_vpfn
+ size
- 1,
3913 agaw_to_level(domain
->agaw
) + 1);
3914 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(size
));
3918 for_each_sg(sglist
, sg
, nelems
, i
)
3919 trace_map_sg(dev
, i
+ 1, nelems
, sg
);
3924 static u64
intel_get_required_mask(struct device
*dev
)
3926 if (!iommu_need_mapping(dev
))
3927 return dma_direct_get_required_mask(dev
);
3928 return DMA_BIT_MASK(32);
3931 static const struct dma_map_ops intel_dma_ops
= {
3932 .alloc
= intel_alloc_coherent
,
3933 .free
= intel_free_coherent
,
3934 .map_sg
= intel_map_sg
,
3935 .unmap_sg
= intel_unmap_sg
,
3936 .map_page
= intel_map_page
,
3937 .unmap_page
= intel_unmap_page
,
3938 .map_resource
= intel_map_resource
,
3939 .unmap_resource
= intel_unmap_resource
,
3940 .dma_supported
= dma_direct_supported
,
3941 .mmap
= dma_common_mmap
,
3942 .get_sgtable
= dma_common_get_sgtable
,
3943 .get_required_mask
= intel_get_required_mask
,
3947 bounce_sync_single(struct device
*dev
, dma_addr_t addr
, size_t size
,
3948 enum dma_data_direction dir
, enum dma_sync_target target
)
3950 struct dmar_domain
*domain
;
3951 phys_addr_t tlb_addr
;
3953 domain
= find_domain(dev
);
3954 if (WARN_ON(!domain
))
3957 tlb_addr
= intel_iommu_iova_to_phys(&domain
->domain
, addr
);
3958 if (is_swiotlb_buffer(tlb_addr
))
3959 swiotlb_tbl_sync_single(dev
, tlb_addr
, size
, dir
, target
);
3963 bounce_map_single(struct device
*dev
, phys_addr_t paddr
, size_t size
,
3964 enum dma_data_direction dir
, unsigned long attrs
,
3967 size_t aligned_size
= ALIGN(size
, VTD_PAGE_SIZE
);
3968 struct dmar_domain
*domain
;
3969 struct intel_iommu
*iommu
;
3970 unsigned long iova_pfn
;
3971 unsigned long nrpages
;
3972 phys_addr_t tlb_addr
;
3976 if (unlikely(attach_deferred(dev
)))
3977 do_deferred_attach(dev
);
3979 domain
= find_domain(dev
);
3981 if (WARN_ON(dir
== DMA_NONE
|| !domain
))
3982 return DMA_MAPPING_ERROR
;
3984 iommu
= domain_get_iommu(domain
);
3985 if (WARN_ON(!iommu
))
3986 return DMA_MAPPING_ERROR
;
3988 nrpages
= aligned_nrpages(0, size
);
3989 iova_pfn
= intel_alloc_iova(dev
, domain
,
3990 dma_to_mm_pfn(nrpages
), dma_mask
);
3992 return DMA_MAPPING_ERROR
;
3995 * Check if DMAR supports zero-length reads on write only
3998 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
||
3999 !cap_zlr(iommu
->cap
))
4000 prot
|= DMA_PTE_READ
;
4001 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
4002 prot
|= DMA_PTE_WRITE
;
4005 * If both the physical buffer start address and size are
4006 * page aligned, we don't need to use a bounce page.
4008 if (!IS_ALIGNED(paddr
| size
, VTD_PAGE_SIZE
)) {
4009 tlb_addr
= swiotlb_tbl_map_single(dev
,
4010 __phys_to_dma(dev
, io_tlb_start
),
4011 paddr
, size
, aligned_size
, dir
, attrs
);
4012 if (tlb_addr
== DMA_MAPPING_ERROR
) {
4015 /* Cleanup the padding area. */
4016 void *padding_start
= phys_to_virt(tlb_addr
);
4017 size_t padding_size
= aligned_size
;
4019 if (!(attrs
& DMA_ATTR_SKIP_CPU_SYNC
) &&
4020 (dir
== DMA_TO_DEVICE
||
4021 dir
== DMA_BIDIRECTIONAL
)) {
4022 padding_start
+= size
;
4023 padding_size
-= size
;
4026 memset(padding_start
, 0, padding_size
);
4032 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova_pfn
),
4033 tlb_addr
>> VTD_PAGE_SHIFT
, nrpages
, prot
);
4037 trace_bounce_map_single(dev
, iova_pfn
<< PAGE_SHIFT
, paddr
, size
);
4039 return (phys_addr_t
)iova_pfn
<< PAGE_SHIFT
;
4042 if (is_swiotlb_buffer(tlb_addr
))
4043 swiotlb_tbl_unmap_single(dev
, tlb_addr
, size
,
4044 aligned_size
, dir
, attrs
);
4046 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(nrpages
));
4047 dev_err(dev
, "Device bounce map: %zx@%llx dir %d --- failed\n",
4048 size
, (unsigned long long)paddr
, dir
);
4050 return DMA_MAPPING_ERROR
;
4054 bounce_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
4055 enum dma_data_direction dir
, unsigned long attrs
)
4057 size_t aligned_size
= ALIGN(size
, VTD_PAGE_SIZE
);
4058 struct dmar_domain
*domain
;
4059 phys_addr_t tlb_addr
;
4061 domain
= find_domain(dev
);
4062 if (WARN_ON(!domain
))
4065 tlb_addr
= intel_iommu_iova_to_phys(&domain
->domain
, dev_addr
);
4066 if (WARN_ON(!tlb_addr
))
4069 intel_unmap(dev
, dev_addr
, size
);
4070 if (is_swiotlb_buffer(tlb_addr
))
4071 swiotlb_tbl_unmap_single(dev
, tlb_addr
, size
,
4072 aligned_size
, dir
, attrs
);
4074 trace_bounce_unmap_single(dev
, dev_addr
, size
);
4078 bounce_map_page(struct device
*dev
, struct page
*page
, unsigned long offset
,
4079 size_t size
, enum dma_data_direction dir
, unsigned long attrs
)
4081 return bounce_map_single(dev
, page_to_phys(page
) + offset
,
4082 size
, dir
, attrs
, *dev
->dma_mask
);
4086 bounce_map_resource(struct device
*dev
, phys_addr_t phys_addr
, size_t size
,
4087 enum dma_data_direction dir
, unsigned long attrs
)
4089 return bounce_map_single(dev
, phys_addr
, size
,
4090 dir
, attrs
, *dev
->dma_mask
);
4094 bounce_unmap_page(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
4095 enum dma_data_direction dir
, unsigned long attrs
)
4097 bounce_unmap_single(dev
, dev_addr
, size
, dir
, attrs
);
4101 bounce_unmap_resource(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
4102 enum dma_data_direction dir
, unsigned long attrs
)
4104 bounce_unmap_single(dev
, dev_addr
, size
, dir
, attrs
);
4108 bounce_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
4109 enum dma_data_direction dir
, unsigned long attrs
)
4111 struct scatterlist
*sg
;
4114 for_each_sg(sglist
, sg
, nelems
, i
)
4115 bounce_unmap_page(dev
, sg
->dma_address
,
4116 sg_dma_len(sg
), dir
, attrs
);
4120 bounce_map_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
4121 enum dma_data_direction dir
, unsigned long attrs
)
4124 struct scatterlist
*sg
;
4126 for_each_sg(sglist
, sg
, nelems
, i
) {
4127 sg
->dma_address
= bounce_map_page(dev
, sg_page(sg
),
4128 sg
->offset
, sg
->length
,
4130 if (sg
->dma_address
== DMA_MAPPING_ERROR
)
4132 sg_dma_len(sg
) = sg
->length
;
4135 for_each_sg(sglist
, sg
, nelems
, i
)
4136 trace_bounce_map_sg(dev
, i
+ 1, nelems
, sg
);
4141 bounce_unmap_sg(dev
, sglist
, i
, dir
, attrs
| DMA_ATTR_SKIP_CPU_SYNC
);
4146 bounce_sync_single_for_cpu(struct device
*dev
, dma_addr_t addr
,
4147 size_t size
, enum dma_data_direction dir
)
4149 bounce_sync_single(dev
, addr
, size
, dir
, SYNC_FOR_CPU
);
4153 bounce_sync_single_for_device(struct device
*dev
, dma_addr_t addr
,
4154 size_t size
, enum dma_data_direction dir
)
4156 bounce_sync_single(dev
, addr
, size
, dir
, SYNC_FOR_DEVICE
);
4160 bounce_sync_sg_for_cpu(struct device
*dev
, struct scatterlist
*sglist
,
4161 int nelems
, enum dma_data_direction dir
)
4163 struct scatterlist
*sg
;
4166 for_each_sg(sglist
, sg
, nelems
, i
)
4167 bounce_sync_single(dev
, sg_dma_address(sg
),
4168 sg_dma_len(sg
), dir
, SYNC_FOR_CPU
);
4172 bounce_sync_sg_for_device(struct device
*dev
, struct scatterlist
*sglist
,
4173 int nelems
, enum dma_data_direction dir
)
4175 struct scatterlist
*sg
;
4178 for_each_sg(sglist
, sg
, nelems
, i
)
4179 bounce_sync_single(dev
, sg_dma_address(sg
),
4180 sg_dma_len(sg
), dir
, SYNC_FOR_DEVICE
);
4183 static const struct dma_map_ops bounce_dma_ops
= {
4184 .alloc
= intel_alloc_coherent
,
4185 .free
= intel_free_coherent
,
4186 .map_sg
= bounce_map_sg
,
4187 .unmap_sg
= bounce_unmap_sg
,
4188 .map_page
= bounce_map_page
,
4189 .unmap_page
= bounce_unmap_page
,
4190 .sync_single_for_cpu
= bounce_sync_single_for_cpu
,
4191 .sync_single_for_device
= bounce_sync_single_for_device
,
4192 .sync_sg_for_cpu
= bounce_sync_sg_for_cpu
,
4193 .sync_sg_for_device
= bounce_sync_sg_for_device
,
4194 .map_resource
= bounce_map_resource
,
4195 .unmap_resource
= bounce_unmap_resource
,
4196 .dma_supported
= dma_direct_supported
,
4199 static inline int iommu_domain_cache_init(void)
4203 iommu_domain_cache
= kmem_cache_create("iommu_domain",
4204 sizeof(struct dmar_domain
),
4209 if (!iommu_domain_cache
) {
4210 pr_err("Couldn't create iommu_domain cache\n");
4217 static inline int iommu_devinfo_cache_init(void)
4221 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
4222 sizeof(struct device_domain_info
),
4226 if (!iommu_devinfo_cache
) {
4227 pr_err("Couldn't create devinfo cache\n");
4234 static int __init
iommu_init_mempool(void)
4237 ret
= iova_cache_get();
4241 ret
= iommu_domain_cache_init();
4245 ret
= iommu_devinfo_cache_init();
4249 kmem_cache_destroy(iommu_domain_cache
);
4256 static void __init
iommu_exit_mempool(void)
4258 kmem_cache_destroy(iommu_devinfo_cache
);
4259 kmem_cache_destroy(iommu_domain_cache
);
4263 static void quirk_ioat_snb_local_iommu(struct pci_dev
*pdev
)
4265 struct dmar_drhd_unit
*drhd
;
4269 /* We know that this device on this chipset has its own IOMMU.
4270 * If we find it under a different IOMMU, then the BIOS is lying
4271 * to us. Hope that the IOMMU for this device is actually
4272 * disabled, and it needs no translation...
4274 rc
= pci_bus_read_config_dword(pdev
->bus
, PCI_DEVFN(0, 0), 0xb0, &vtbar
);
4276 /* "can't" happen */
4277 dev_info(&pdev
->dev
, "failed to run vt-d quirk\n");
4280 vtbar
&= 0xffff0000;
4282 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4283 drhd
= dmar_find_matched_drhd_unit(pdev
);
4284 if (!drhd
|| drhd
->reg_base_addr
- vtbar
!= 0xa000) {
4285 pr_warn_once(FW_BUG
"BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
4286 add_taint(TAINT_FIRMWARE_WORKAROUND
, LOCKDEP_STILL_OK
);
4287 pdev
->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
4290 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_IOAT_SNB
, quirk_ioat_snb_local_iommu
);
4292 static void __init
init_no_remapping_devices(void)
4294 struct dmar_drhd_unit
*drhd
;
4298 for_each_drhd_unit(drhd
) {
4299 if (!drhd
->include_all
) {
4300 for_each_active_dev_scope(drhd
->devices
,
4301 drhd
->devices_cnt
, i
, dev
)
4303 /* ignore DMAR unit if no devices exist */
4304 if (i
== drhd
->devices_cnt
)
4309 for_each_active_drhd_unit(drhd
) {
4310 if (drhd
->include_all
)
4313 for_each_active_dev_scope(drhd
->devices
,
4314 drhd
->devices_cnt
, i
, dev
)
4315 if (!dev_is_pci(dev
) || !IS_GFX_DEVICE(to_pci_dev(dev
)))
4317 if (i
< drhd
->devices_cnt
)
4320 /* This IOMMU has *only* gfx devices. Either bypass it or
4321 set the gfx_mapped flag, as appropriate */
4322 if (!dmar_map_gfx
) {
4324 for_each_active_dev_scope(drhd
->devices
,
4325 drhd
->devices_cnt
, i
, dev
)
4326 dev
->archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
4331 #ifdef CONFIG_SUSPEND
4332 static int init_iommu_hw(void)
4334 struct dmar_drhd_unit
*drhd
;
4335 struct intel_iommu
*iommu
= NULL
;
4337 for_each_active_iommu(iommu
, drhd
)
4339 dmar_reenable_qi(iommu
);
4341 for_each_iommu(iommu
, drhd
) {
4342 if (drhd
->ignored
) {
4344 * we always have to disable PMRs or DMA may fail on
4348 iommu_disable_protect_mem_regions(iommu
);
4352 iommu_flush_write_buffer(iommu
);
4354 iommu_set_root_entry(iommu
);
4356 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
4357 DMA_CCMD_GLOBAL_INVL
);
4358 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
4359 iommu_enable_translation(iommu
);
4360 iommu_disable_protect_mem_regions(iommu
);
4366 static void iommu_flush_all(void)
4368 struct dmar_drhd_unit
*drhd
;
4369 struct intel_iommu
*iommu
;
4371 for_each_active_iommu(iommu
, drhd
) {
4372 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
4373 DMA_CCMD_GLOBAL_INVL
);
4374 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
4375 DMA_TLB_GLOBAL_FLUSH
);
4379 static int iommu_suspend(void)
4381 struct dmar_drhd_unit
*drhd
;
4382 struct intel_iommu
*iommu
= NULL
;
4385 for_each_active_iommu(iommu
, drhd
) {
4386 iommu
->iommu_state
= kcalloc(MAX_SR_DMAR_REGS
, sizeof(u32
),
4388 if (!iommu
->iommu_state
)
4394 for_each_active_iommu(iommu
, drhd
) {
4395 iommu_disable_translation(iommu
);
4397 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
4399 iommu
->iommu_state
[SR_DMAR_FECTL_REG
] =
4400 readl(iommu
->reg
+ DMAR_FECTL_REG
);
4401 iommu
->iommu_state
[SR_DMAR_FEDATA_REG
] =
4402 readl(iommu
->reg
+ DMAR_FEDATA_REG
);
4403 iommu
->iommu_state
[SR_DMAR_FEADDR_REG
] =
4404 readl(iommu
->reg
+ DMAR_FEADDR_REG
);
4405 iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
] =
4406 readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
4408 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
4413 for_each_active_iommu(iommu
, drhd
)
4414 kfree(iommu
->iommu_state
);
4419 static void iommu_resume(void)
4421 struct dmar_drhd_unit
*drhd
;
4422 struct intel_iommu
*iommu
= NULL
;
4425 if (init_iommu_hw()) {
4427 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4429 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4433 for_each_active_iommu(iommu
, drhd
) {
4435 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
4437 writel(iommu
->iommu_state
[SR_DMAR_FECTL_REG
],
4438 iommu
->reg
+ DMAR_FECTL_REG
);
4439 writel(iommu
->iommu_state
[SR_DMAR_FEDATA_REG
],
4440 iommu
->reg
+ DMAR_FEDATA_REG
);
4441 writel(iommu
->iommu_state
[SR_DMAR_FEADDR_REG
],
4442 iommu
->reg
+ DMAR_FEADDR_REG
);
4443 writel(iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
],
4444 iommu
->reg
+ DMAR_FEUADDR_REG
);
4446 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
4449 for_each_active_iommu(iommu
, drhd
)
4450 kfree(iommu
->iommu_state
);
4453 static struct syscore_ops iommu_syscore_ops
= {
4454 .resume
= iommu_resume
,
4455 .suspend
= iommu_suspend
,
4458 static void __init
init_iommu_pm_ops(void)
4460 register_syscore_ops(&iommu_syscore_ops
);
4464 static inline void init_iommu_pm_ops(void) {}
4465 #endif /* CONFIG_PM */
4467 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory
*rmrr
)
4469 if (!IS_ALIGNED(rmrr
->base_address
, PAGE_SIZE
) ||
4470 !IS_ALIGNED(rmrr
->end_address
+ 1, PAGE_SIZE
) ||
4471 rmrr
->end_address
<= rmrr
->base_address
||
4472 arch_rmrr_sanity_check(rmrr
))
4478 int __init
dmar_parse_one_rmrr(struct acpi_dmar_header
*header
, void *arg
)
4480 struct acpi_dmar_reserved_memory
*rmrr
;
4481 struct dmar_rmrr_unit
*rmrru
;
4483 rmrr
= (struct acpi_dmar_reserved_memory
*)header
;
4484 if (rmrr_sanity_check(rmrr
)) {
4486 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
4487 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4488 rmrr
->base_address
, rmrr
->end_address
,
4489 dmi_get_system_info(DMI_BIOS_VENDOR
),
4490 dmi_get_system_info(DMI_BIOS_VERSION
),
4491 dmi_get_system_info(DMI_PRODUCT_VERSION
));
4492 add_taint(TAINT_FIRMWARE_WORKAROUND
, LOCKDEP_STILL_OK
);
4495 rmrru
= kzalloc(sizeof(*rmrru
), GFP_KERNEL
);
4499 rmrru
->hdr
= header
;
4501 rmrru
->base_address
= rmrr
->base_address
;
4502 rmrru
->end_address
= rmrr
->end_address
;
4504 rmrru
->devices
= dmar_alloc_dev_scope((void *)(rmrr
+ 1),
4505 ((void *)rmrr
) + rmrr
->header
.length
,
4506 &rmrru
->devices_cnt
);
4507 if (rmrru
->devices_cnt
&& rmrru
->devices
== NULL
)
4510 list_add(&rmrru
->list
, &dmar_rmrr_units
);
4519 static struct dmar_atsr_unit
*dmar_find_atsr(struct acpi_dmar_atsr
*atsr
)
4521 struct dmar_atsr_unit
*atsru
;
4522 struct acpi_dmar_atsr
*tmp
;
4524 list_for_each_entry_rcu(atsru
, &dmar_atsr_units
, list
,
4526 tmp
= (struct acpi_dmar_atsr
*)atsru
->hdr
;
4527 if (atsr
->segment
!= tmp
->segment
)
4529 if (atsr
->header
.length
!= tmp
->header
.length
)
4531 if (memcmp(atsr
, tmp
, atsr
->header
.length
) == 0)
4538 int dmar_parse_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4540 struct acpi_dmar_atsr
*atsr
;
4541 struct dmar_atsr_unit
*atsru
;
4543 if (system_state
>= SYSTEM_RUNNING
&& !intel_iommu_enabled
)
4546 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4547 atsru
= dmar_find_atsr(atsr
);
4551 atsru
= kzalloc(sizeof(*atsru
) + hdr
->length
, GFP_KERNEL
);
4556 * If memory is allocated from slab by ACPI _DSM method, we need to
4557 * copy the memory content because the memory buffer will be freed
4560 atsru
->hdr
= (void *)(atsru
+ 1);
4561 memcpy(atsru
->hdr
, hdr
, hdr
->length
);
4562 atsru
->include_all
= atsr
->flags
& 0x1;
4563 if (!atsru
->include_all
) {
4564 atsru
->devices
= dmar_alloc_dev_scope((void *)(atsr
+ 1),
4565 (void *)atsr
+ atsr
->header
.length
,
4566 &atsru
->devices_cnt
);
4567 if (atsru
->devices_cnt
&& atsru
->devices
== NULL
) {
4573 list_add_rcu(&atsru
->list
, &dmar_atsr_units
);
4578 static void intel_iommu_free_atsr(struct dmar_atsr_unit
*atsru
)
4580 dmar_free_dev_scope(&atsru
->devices
, &atsru
->devices_cnt
);
4584 int dmar_release_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4586 struct acpi_dmar_atsr
*atsr
;
4587 struct dmar_atsr_unit
*atsru
;
4589 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4590 atsru
= dmar_find_atsr(atsr
);
4592 list_del_rcu(&atsru
->list
);
4594 intel_iommu_free_atsr(atsru
);
4600 int dmar_check_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4604 struct acpi_dmar_atsr
*atsr
;
4605 struct dmar_atsr_unit
*atsru
;
4607 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4608 atsru
= dmar_find_atsr(atsr
);
4612 if (!atsru
->include_all
&& atsru
->devices
&& atsru
->devices_cnt
) {
4613 for_each_active_dev_scope(atsru
->devices
, atsru
->devices_cnt
,
4621 static int intel_iommu_add(struct dmar_drhd_unit
*dmaru
)
4624 struct intel_iommu
*iommu
= dmaru
->iommu
;
4626 if (g_iommus
[iommu
->seq_id
])
4629 if (hw_pass_through
&& !ecap_pass_through(iommu
->ecap
)) {
4630 pr_warn("%s: Doesn't support hardware pass through.\n",
4634 if (!ecap_sc_support(iommu
->ecap
) &&
4635 domain_update_iommu_snooping(iommu
)) {
4636 pr_warn("%s: Doesn't support snooping.\n",
4640 sp
= domain_update_iommu_superpage(NULL
, iommu
) - 1;
4641 if (sp
>= 0 && !(cap_super_page_val(iommu
->cap
) & (1 << sp
))) {
4642 pr_warn("%s: Doesn't support large page.\n",
4648 * Disable translation if already enabled prior to OS handover.
4650 if (iommu
->gcmd
& DMA_GCMD_TE
)
4651 iommu_disable_translation(iommu
);
4653 g_iommus
[iommu
->seq_id
] = iommu
;
4654 ret
= iommu_init_domains(iommu
);
4656 ret
= iommu_alloc_root_entry(iommu
);
4660 intel_svm_check(iommu
);
4662 if (dmaru
->ignored
) {
4664 * we always have to disable PMRs or DMA may fail on this device
4667 iommu_disable_protect_mem_regions(iommu
);
4671 intel_iommu_init_qi(iommu
);
4672 iommu_flush_write_buffer(iommu
);
4674 #ifdef CONFIG_INTEL_IOMMU_SVM
4675 if (pasid_supported(iommu
) && ecap_prs(iommu
->ecap
)) {
4676 ret
= intel_svm_enable_prq(iommu
);
4681 ret
= dmar_set_interrupt(iommu
);
4685 iommu_set_root_entry(iommu
);
4686 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
4687 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
4688 iommu_enable_translation(iommu
);
4690 iommu_disable_protect_mem_regions(iommu
);
4694 disable_dmar_iommu(iommu
);
4696 free_dmar_iommu(iommu
);
4700 int dmar_iommu_hotplug(struct dmar_drhd_unit
*dmaru
, bool insert
)
4703 struct intel_iommu
*iommu
= dmaru
->iommu
;
4705 if (!intel_iommu_enabled
)
4711 ret
= intel_iommu_add(dmaru
);
4713 disable_dmar_iommu(iommu
);
4714 free_dmar_iommu(iommu
);
4720 static void intel_iommu_free_dmars(void)
4722 struct dmar_rmrr_unit
*rmrru
, *rmrr_n
;
4723 struct dmar_atsr_unit
*atsru
, *atsr_n
;
4725 list_for_each_entry_safe(rmrru
, rmrr_n
, &dmar_rmrr_units
, list
) {
4726 list_del(&rmrru
->list
);
4727 dmar_free_dev_scope(&rmrru
->devices
, &rmrru
->devices_cnt
);
4731 list_for_each_entry_safe(atsru
, atsr_n
, &dmar_atsr_units
, list
) {
4732 list_del(&atsru
->list
);
4733 intel_iommu_free_atsr(atsru
);
4737 int dmar_find_matched_atsr_unit(struct pci_dev
*dev
)
4740 struct pci_bus
*bus
;
4741 struct pci_dev
*bridge
= NULL
;
4743 struct acpi_dmar_atsr
*atsr
;
4744 struct dmar_atsr_unit
*atsru
;
4746 dev
= pci_physfn(dev
);
4747 for (bus
= dev
->bus
; bus
; bus
= bus
->parent
) {
4749 /* If it's an integrated device, allow ATS */
4752 /* Connected via non-PCIe: no ATS */
4753 if (!pci_is_pcie(bridge
) ||
4754 pci_pcie_type(bridge
) == PCI_EXP_TYPE_PCI_BRIDGE
)
4756 /* If we found the root port, look it up in the ATSR */
4757 if (pci_pcie_type(bridge
) == PCI_EXP_TYPE_ROOT_PORT
)
4762 list_for_each_entry_rcu(atsru
, &dmar_atsr_units
, list
) {
4763 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
4764 if (atsr
->segment
!= pci_domain_nr(dev
->bus
))
4767 for_each_dev_scope(atsru
->devices
, atsru
->devices_cnt
, i
, tmp
)
4768 if (tmp
== &bridge
->dev
)
4771 if (atsru
->include_all
)
4781 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info
*info
)
4784 struct dmar_rmrr_unit
*rmrru
;
4785 struct dmar_atsr_unit
*atsru
;
4786 struct acpi_dmar_atsr
*atsr
;
4787 struct acpi_dmar_reserved_memory
*rmrr
;
4789 if (!intel_iommu_enabled
&& system_state
>= SYSTEM_RUNNING
)
4792 list_for_each_entry(rmrru
, &dmar_rmrr_units
, list
) {
4793 rmrr
= container_of(rmrru
->hdr
,
4794 struct acpi_dmar_reserved_memory
, header
);
4795 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
4796 ret
= dmar_insert_dev_scope(info
, (void *)(rmrr
+ 1),
4797 ((void *)rmrr
) + rmrr
->header
.length
,
4798 rmrr
->segment
, rmrru
->devices
,
4799 rmrru
->devices_cnt
);
4802 } else if (info
->event
== BUS_NOTIFY_REMOVED_DEVICE
) {
4803 dmar_remove_dev_scope(info
, rmrr
->segment
,
4804 rmrru
->devices
, rmrru
->devices_cnt
);
4808 list_for_each_entry(atsru
, &dmar_atsr_units
, list
) {
4809 if (atsru
->include_all
)
4812 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
4813 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
4814 ret
= dmar_insert_dev_scope(info
, (void *)(atsr
+ 1),
4815 (void *)atsr
+ atsr
->header
.length
,
4816 atsr
->segment
, atsru
->devices
,
4817 atsru
->devices_cnt
);
4822 } else if (info
->event
== BUS_NOTIFY_REMOVED_DEVICE
) {
4823 if (dmar_remove_dev_scope(info
, atsr
->segment
,
4824 atsru
->devices
, atsru
->devices_cnt
))
4832 static int intel_iommu_memory_notifier(struct notifier_block
*nb
,
4833 unsigned long val
, void *v
)
4835 struct memory_notify
*mhp
= v
;
4836 unsigned long long start
, end
;
4837 unsigned long start_vpfn
, last_vpfn
;
4840 case MEM_GOING_ONLINE
:
4841 start
= mhp
->start_pfn
<< PAGE_SHIFT
;
4842 end
= ((mhp
->start_pfn
+ mhp
->nr_pages
) << PAGE_SHIFT
) - 1;
4843 if (iommu_domain_identity_map(si_domain
, start
, end
)) {
4844 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4851 case MEM_CANCEL_ONLINE
:
4852 start_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
);
4853 last_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
+ mhp
->nr_pages
- 1);
4854 while (start_vpfn
<= last_vpfn
) {
4856 struct dmar_drhd_unit
*drhd
;
4857 struct intel_iommu
*iommu
;
4858 struct page
*freelist
;
4860 iova
= find_iova(&si_domain
->iovad
, start_vpfn
);
4862 pr_debug("Failed get IOVA for PFN %lx\n",
4867 iova
= split_and_remove_iova(&si_domain
->iovad
, iova
,
4868 start_vpfn
, last_vpfn
);
4870 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4871 start_vpfn
, last_vpfn
);
4875 freelist
= domain_unmap(si_domain
, iova
->pfn_lo
,
4879 for_each_active_iommu(iommu
, drhd
)
4880 iommu_flush_iotlb_psi(iommu
, si_domain
,
4881 iova
->pfn_lo
, iova_size(iova
),
4884 dma_free_pagelist(freelist
);
4886 start_vpfn
= iova
->pfn_hi
+ 1;
4887 free_iova_mem(iova
);
4895 static struct notifier_block intel_iommu_memory_nb
= {
4896 .notifier_call
= intel_iommu_memory_notifier
,
4900 static void free_all_cpu_cached_iovas(unsigned int cpu
)
4904 for (i
= 0; i
< g_num_of_iommus
; i
++) {
4905 struct intel_iommu
*iommu
= g_iommus
[i
];
4906 struct dmar_domain
*domain
;
4912 for (did
= 0; did
< cap_ndoms(iommu
->cap
); did
++) {
4913 domain
= get_iommu_domain(iommu
, (u16
)did
);
4917 free_cpu_cached_iovas(cpu
, &domain
->iovad
);
4922 static int intel_iommu_cpu_dead(unsigned int cpu
)
4924 free_all_cpu_cached_iovas(cpu
);
4928 static void intel_disable_iommus(void)
4930 struct intel_iommu
*iommu
= NULL
;
4931 struct dmar_drhd_unit
*drhd
;
4933 for_each_iommu(iommu
, drhd
)
4934 iommu_disable_translation(iommu
);
4937 void intel_iommu_shutdown(void)
4939 struct dmar_drhd_unit
*drhd
;
4940 struct intel_iommu
*iommu
= NULL
;
4942 if (no_iommu
|| dmar_disabled
)
4945 down_write(&dmar_global_lock
);
4947 /* Disable PMRs explicitly here. */
4948 for_each_iommu(iommu
, drhd
)
4949 iommu_disable_protect_mem_regions(iommu
);
4951 /* Make sure the IOMMUs are switched off */
4952 intel_disable_iommus();
4954 up_write(&dmar_global_lock
);
4957 static inline struct intel_iommu
*dev_to_intel_iommu(struct device
*dev
)
4959 struct iommu_device
*iommu_dev
= dev_to_iommu_device(dev
);
4961 return container_of(iommu_dev
, struct intel_iommu
, iommu
);
4964 static ssize_t
intel_iommu_show_version(struct device
*dev
,
4965 struct device_attribute
*attr
,
4968 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4969 u32 ver
= readl(iommu
->reg
+ DMAR_VER_REG
);
4970 return sprintf(buf
, "%d:%d\n",
4971 DMAR_VER_MAJOR(ver
), DMAR_VER_MINOR(ver
));
4973 static DEVICE_ATTR(version
, S_IRUGO
, intel_iommu_show_version
, NULL
);
4975 static ssize_t
intel_iommu_show_address(struct device
*dev
,
4976 struct device_attribute
*attr
,
4979 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4980 return sprintf(buf
, "%llx\n", iommu
->reg_phys
);
4982 static DEVICE_ATTR(address
, S_IRUGO
, intel_iommu_show_address
, NULL
);
4984 static ssize_t
intel_iommu_show_cap(struct device
*dev
,
4985 struct device_attribute
*attr
,
4988 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4989 return sprintf(buf
, "%llx\n", iommu
->cap
);
4991 static DEVICE_ATTR(cap
, S_IRUGO
, intel_iommu_show_cap
, NULL
);
4993 static ssize_t
intel_iommu_show_ecap(struct device
*dev
,
4994 struct device_attribute
*attr
,
4997 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4998 return sprintf(buf
, "%llx\n", iommu
->ecap
);
5000 static DEVICE_ATTR(ecap
, S_IRUGO
, intel_iommu_show_ecap
, NULL
);
5002 static ssize_t
intel_iommu_show_ndoms(struct device
*dev
,
5003 struct device_attribute
*attr
,
5006 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
5007 return sprintf(buf
, "%ld\n", cap_ndoms(iommu
->cap
));
5009 static DEVICE_ATTR(domains_supported
, S_IRUGO
, intel_iommu_show_ndoms
, NULL
);
5011 static ssize_t
intel_iommu_show_ndoms_used(struct device
*dev
,
5012 struct device_attribute
*attr
,
5015 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
5016 return sprintf(buf
, "%d\n", bitmap_weight(iommu
->domain_ids
,
5017 cap_ndoms(iommu
->cap
)));
5019 static DEVICE_ATTR(domains_used
, S_IRUGO
, intel_iommu_show_ndoms_used
, NULL
);
5021 static struct attribute
*intel_iommu_attrs
[] = {
5022 &dev_attr_version
.attr
,
5023 &dev_attr_address
.attr
,
5025 &dev_attr_ecap
.attr
,
5026 &dev_attr_domains_supported
.attr
,
5027 &dev_attr_domains_used
.attr
,
5031 static struct attribute_group intel_iommu_group
= {
5032 .name
= "intel-iommu",
5033 .attrs
= intel_iommu_attrs
,
5036 const struct attribute_group
*intel_iommu_groups
[] = {
5041 static inline bool has_untrusted_dev(void)
5043 struct pci_dev
*pdev
= NULL
;
5045 for_each_pci_dev(pdev
)
5046 if (pdev
->untrusted
)
5052 static int __init
platform_optin_force_iommu(void)
5054 if (!dmar_platform_optin() || no_platform_optin
|| !has_untrusted_dev())
5057 if (no_iommu
|| dmar_disabled
)
5058 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
5061 * If Intel-IOMMU is disabled by default, we will apply identity
5062 * map for all devices except those marked as being untrusted.
5065 iommu_set_default_passthrough(false);
5073 static int __init
probe_acpi_namespace_devices(void)
5075 struct dmar_drhd_unit
*drhd
;
5076 /* To avoid a -Wunused-but-set-variable warning. */
5077 struct intel_iommu
*iommu __maybe_unused
;
5081 for_each_active_iommu(iommu
, drhd
) {
5082 for_each_active_dev_scope(drhd
->devices
,
5083 drhd
->devices_cnt
, i
, dev
) {
5084 struct acpi_device_physical_node
*pn
;
5085 struct iommu_group
*group
;
5086 struct acpi_device
*adev
;
5088 if (dev
->bus
!= &acpi_bus_type
)
5091 adev
= to_acpi_device(dev
);
5092 mutex_lock(&adev
->physical_node_lock
);
5093 list_for_each_entry(pn
,
5094 &adev
->physical_node_list
, node
) {
5095 group
= iommu_group_get(pn
->dev
);
5097 iommu_group_put(group
);
5101 pn
->dev
->bus
->iommu_ops
= &intel_iommu_ops
;
5102 ret
= iommu_probe_device(pn
->dev
);
5106 mutex_unlock(&adev
->physical_node_lock
);
5116 int __init
intel_iommu_init(void)
5119 struct dmar_drhd_unit
*drhd
;
5120 struct intel_iommu
*iommu
;
5123 * Intel IOMMU is required for a TXT/tboot launch or platform
5124 * opt in, so enforce that.
5126 force_on
= tboot_force_iommu() || platform_optin_force_iommu();
5128 if (iommu_init_mempool()) {
5130 panic("tboot: Failed to initialize iommu memory\n");
5134 down_write(&dmar_global_lock
);
5135 if (dmar_table_init()) {
5137 panic("tboot: Failed to initialize DMAR table\n");
5141 if (dmar_dev_scope_init() < 0) {
5143 panic("tboot: Failed to initialize DMAR device scope\n");
5147 up_write(&dmar_global_lock
);
5150 * The bus notifier takes the dmar_global_lock, so lockdep will
5151 * complain later when we register it under the lock.
5153 dmar_register_bus_notifier();
5155 down_write(&dmar_global_lock
);
5158 intel_iommu_debugfs_init();
5160 if (no_iommu
|| dmar_disabled
) {
5162 * We exit the function here to ensure IOMMU's remapping and
5163 * mempool aren't setup, which means that the IOMMU's PMRs
5164 * won't be disabled via the call to init_dmars(). So disable
5165 * it explicitly here. The PMRs were setup by tboot prior to
5166 * calling SENTER, but the kernel is expected to reset/tear
5169 if (intel_iommu_tboot_noforce
) {
5170 for_each_iommu(iommu
, drhd
)
5171 iommu_disable_protect_mem_regions(iommu
);
5175 * Make sure the IOMMUs are switched off, even when we
5176 * boot into a kexec kernel and the previous kernel left
5179 intel_disable_iommus();
5183 if (list_empty(&dmar_rmrr_units
))
5184 pr_info("No RMRR found\n");
5186 if (list_empty(&dmar_atsr_units
))
5187 pr_info("No ATSR found\n");
5189 if (dmar_init_reserved_ranges()) {
5191 panic("tboot: Failed to reserve iommu ranges\n");
5192 goto out_free_reserved_range
;
5196 intel_iommu_gfx_mapped
= 1;
5198 init_no_remapping_devices();
5203 panic("tboot: Failed to initialize DMARs\n");
5204 pr_err("Initialization failed\n");
5205 goto out_free_reserved_range
;
5207 up_write(&dmar_global_lock
);
5209 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
5211 * If the system has no untrusted device or the user has decided
5212 * to disable the bounce page mechanisms, we don't need swiotlb.
5213 * Mark this and the pre-allocated bounce pages will be released
5216 if (!has_untrusted_dev() || intel_no_bounce
)
5219 dma_ops
= &intel_dma_ops
;
5221 init_iommu_pm_ops();
5223 down_read(&dmar_global_lock
);
5224 for_each_active_iommu(iommu
, drhd
) {
5225 iommu_device_sysfs_add(&iommu
->iommu
, NULL
,
5228 iommu_device_set_ops(&iommu
->iommu
, &intel_iommu_ops
);
5229 iommu_device_register(&iommu
->iommu
);
5231 up_read(&dmar_global_lock
);
5233 bus_set_iommu(&pci_bus_type
, &intel_iommu_ops
);
5234 if (si_domain
&& !hw_pass_through
)
5235 register_memory_notifier(&intel_iommu_memory_nb
);
5236 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD
, "iommu/intel:dead", NULL
,
5237 intel_iommu_cpu_dead
);
5239 down_read(&dmar_global_lock
);
5240 if (probe_acpi_namespace_devices())
5241 pr_warn("ACPI name space devices didn't probe correctly\n");
5243 /* Finally, we enable the DMA remapping hardware. */
5244 for_each_iommu(iommu
, drhd
) {
5245 if (!drhd
->ignored
&& !translation_pre_enabled(iommu
))
5246 iommu_enable_translation(iommu
);
5248 iommu_disable_protect_mem_regions(iommu
);
5250 up_read(&dmar_global_lock
);
5252 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5254 intel_iommu_enabled
= 1;
5258 out_free_reserved_range
:
5259 put_iova_domain(&reserved_iova_list
);
5261 intel_iommu_free_dmars();
5262 up_write(&dmar_global_lock
);
5263 iommu_exit_mempool();
5267 static int domain_context_clear_one_cb(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
5269 struct intel_iommu
*iommu
= opaque
;
5271 domain_context_clear_one(iommu
, PCI_BUS_NUM(alias
), alias
& 0xff);
5276 * NB - intel-iommu lacks any sort of reference counting for the users of
5277 * dependent devices. If multiple endpoints have intersecting dependent
5278 * devices, unbinding the driver from any one of them will possibly leave
5279 * the others unable to operate.
5281 static void domain_context_clear(struct intel_iommu
*iommu
, struct device
*dev
)
5283 if (!iommu
|| !dev
|| !dev_is_pci(dev
))
5286 pci_for_each_dma_alias(to_pci_dev(dev
), &domain_context_clear_one_cb
, iommu
);
5289 static void __dmar_remove_one_dev_info(struct device_domain_info
*info
)
5291 struct dmar_domain
*domain
;
5292 struct intel_iommu
*iommu
;
5293 unsigned long flags
;
5295 assert_spin_locked(&device_domain_lock
);
5300 iommu
= info
->iommu
;
5301 domain
= info
->domain
;
5304 if (dev_is_pci(info
->dev
) && sm_supported(iommu
))
5305 intel_pasid_tear_down_entry(iommu
, info
->dev
,
5308 iommu_disable_dev_iotlb(info
);
5309 if (!dev_is_real_dma_subdevice(info
->dev
))
5310 domain_context_clear(iommu
, info
->dev
);
5311 intel_pasid_free_table(info
->dev
);
5314 unlink_domain_info(info
);
5316 spin_lock_irqsave(&iommu
->lock
, flags
);
5317 domain_detach_iommu(domain
, iommu
);
5318 spin_unlock_irqrestore(&iommu
->lock
, flags
);
5320 /* free the private domain */
5321 if (domain
->flags
& DOMAIN_FLAG_LOSE_CHILDREN
&&
5322 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) &&
5323 list_empty(&domain
->devices
))
5324 domain_exit(info
->domain
);
5326 free_devinfo_mem(info
);
5329 static void dmar_remove_one_dev_info(struct device
*dev
)
5331 struct device_domain_info
*info
;
5332 unsigned long flags
;
5334 spin_lock_irqsave(&device_domain_lock
, flags
);
5335 info
= dev
->archdata
.iommu
;
5336 if (info
&& info
!= DEFER_DEVICE_DOMAIN_INFO
5337 && info
!= DUMMY_DEVICE_DOMAIN_INFO
)
5338 __dmar_remove_one_dev_info(info
);
5339 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5342 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
)
5346 init_iova_domain(&domain
->iovad
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
5347 domain_reserve_special_ranges(domain
);
5349 /* calculate AGAW */
5350 domain
->gaw
= guest_width
;
5351 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
5352 domain
->agaw
= width_to_agaw(adjust_width
);
5354 domain
->iommu_coherency
= 0;
5355 domain
->iommu_snooping
= 0;
5356 domain
->iommu_superpage
= 0;
5357 domain
->max_addr
= 0;
5359 /* always allocate the top pgd */
5360 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
5363 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
5367 static struct iommu_domain
*intel_iommu_domain_alloc(unsigned type
)
5369 struct dmar_domain
*dmar_domain
;
5370 struct iommu_domain
*domain
;
5374 case IOMMU_DOMAIN_DMA
:
5376 case IOMMU_DOMAIN_UNMANAGED
:
5377 dmar_domain
= alloc_domain(0);
5379 pr_err("Can't allocate dmar_domain\n");
5382 if (md_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
5383 pr_err("Domain initialization failed\n");
5384 domain_exit(dmar_domain
);
5388 if (!intel_iommu_strict
&& type
== IOMMU_DOMAIN_DMA
) {
5389 ret
= init_iova_flush_queue(&dmar_domain
->iovad
,
5393 pr_info("iova flush queue initialization failed\n");
5396 domain_update_iommu_cap(dmar_domain
);
5398 domain
= &dmar_domain
->domain
;
5399 domain
->geometry
.aperture_start
= 0;
5400 domain
->geometry
.aperture_end
=
5401 __DOMAIN_MAX_ADDR(dmar_domain
->gaw
);
5402 domain
->geometry
.force_aperture
= true;
5405 case IOMMU_DOMAIN_IDENTITY
:
5406 return &si_domain
->domain
;
5414 static void intel_iommu_domain_free(struct iommu_domain
*domain
)
5416 if (domain
!= &si_domain
->domain
)
5417 domain_exit(to_dmar_domain(domain
));
5421 * Check whether a @domain could be attached to the @dev through the
5422 * aux-domain attach/detach APIs.
5425 is_aux_domain(struct device
*dev
, struct iommu_domain
*domain
)
5427 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5429 return info
&& info
->auxd_enabled
&&
5430 domain
->type
== IOMMU_DOMAIN_UNMANAGED
;
5433 static void auxiliary_link_device(struct dmar_domain
*domain
,
5436 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5438 assert_spin_locked(&device_domain_lock
);
5442 domain
->auxd_refcnt
++;
5443 list_add(&domain
->auxd
, &info
->auxiliary_domains
);
5446 static void auxiliary_unlink_device(struct dmar_domain
*domain
,
5449 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5451 assert_spin_locked(&device_domain_lock
);
5455 list_del(&domain
->auxd
);
5456 domain
->auxd_refcnt
--;
5458 if (!domain
->auxd_refcnt
&& domain
->default_pasid
> 0)
5459 ioasid_free(domain
->default_pasid
);
5462 static int aux_domain_add_dev(struct dmar_domain
*domain
,
5467 unsigned long flags
;
5468 struct intel_iommu
*iommu
;
5470 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5474 if (domain
->default_pasid
<= 0) {
5477 /* No private data needed for the default pasid */
5478 pasid
= ioasid_alloc(NULL
, PASID_MIN
,
5479 pci_max_pasids(to_pci_dev(dev
)) - 1,
5481 if (pasid
== INVALID_IOASID
) {
5482 pr_err("Can't allocate default pasid\n");
5485 domain
->default_pasid
= pasid
;
5488 spin_lock_irqsave(&device_domain_lock
, flags
);
5490 * iommu->lock must be held to attach domain to iommu and setup the
5491 * pasid entry for second level translation.
5493 spin_lock(&iommu
->lock
);
5494 ret
= domain_attach_iommu(domain
, iommu
);
5498 /* Setup the PASID entry for mediated devices: */
5499 if (domain_use_first_level(domain
))
5500 ret
= domain_setup_first_level(iommu
, domain
, dev
,
5501 domain
->default_pasid
);
5503 ret
= intel_pasid_setup_second_level(iommu
, domain
, dev
,
5504 domain
->default_pasid
);
5507 spin_unlock(&iommu
->lock
);
5509 auxiliary_link_device(domain
, dev
);
5511 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5516 domain_detach_iommu(domain
, iommu
);
5518 spin_unlock(&iommu
->lock
);
5519 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5520 if (!domain
->auxd_refcnt
&& domain
->default_pasid
> 0)
5521 ioasid_free(domain
->default_pasid
);
5526 static void aux_domain_remove_dev(struct dmar_domain
*domain
,
5529 struct device_domain_info
*info
;
5530 struct intel_iommu
*iommu
;
5531 unsigned long flags
;
5533 if (!is_aux_domain(dev
, &domain
->domain
))
5536 spin_lock_irqsave(&device_domain_lock
, flags
);
5537 info
= dev
->archdata
.iommu
;
5538 iommu
= info
->iommu
;
5540 auxiliary_unlink_device(domain
, dev
);
5542 spin_lock(&iommu
->lock
);
5543 intel_pasid_tear_down_entry(iommu
, dev
, domain
->default_pasid
);
5544 domain_detach_iommu(domain
, iommu
);
5545 spin_unlock(&iommu
->lock
);
5547 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5550 static int prepare_domain_attach_device(struct iommu_domain
*domain
,
5553 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5554 struct intel_iommu
*iommu
;
5558 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5562 /* check if this iommu agaw is sufficient for max mapped address */
5563 addr_width
= agaw_to_width(iommu
->agaw
);
5564 if (addr_width
> cap_mgaw(iommu
->cap
))
5565 addr_width
= cap_mgaw(iommu
->cap
);
5567 if (dmar_domain
->max_addr
> (1LL << addr_width
)) {
5568 dev_err(dev
, "%s: iommu width (%d) is not "
5569 "sufficient for the mapped address (%llx)\n",
5570 __func__
, addr_width
, dmar_domain
->max_addr
);
5573 dmar_domain
->gaw
= addr_width
;
5576 * Knock out extra levels of page tables if necessary
5578 while (iommu
->agaw
< dmar_domain
->agaw
) {
5579 struct dma_pte
*pte
;
5581 pte
= dmar_domain
->pgd
;
5582 if (dma_pte_present(pte
)) {
5583 dmar_domain
->pgd
= (struct dma_pte
*)
5584 phys_to_virt(dma_pte_addr(pte
));
5585 free_pgtable_page(pte
);
5587 dmar_domain
->agaw
--;
5593 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
5598 if (domain
->type
== IOMMU_DOMAIN_UNMANAGED
&&
5599 device_is_rmrr_locked(dev
)) {
5600 dev_warn(dev
, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5604 if (is_aux_domain(dev
, domain
))
5607 /* normally dev is not mapped */
5608 if (unlikely(domain_context_mapped(dev
))) {
5609 struct dmar_domain
*old_domain
;
5611 old_domain
= find_domain(dev
);
5613 dmar_remove_one_dev_info(dev
);
5616 ret
= prepare_domain_attach_device(domain
, dev
);
5620 return domain_add_dev_info(to_dmar_domain(domain
), dev
);
5623 static int intel_iommu_aux_attach_device(struct iommu_domain
*domain
,
5628 if (!is_aux_domain(dev
, domain
))
5631 ret
= prepare_domain_attach_device(domain
, dev
);
5635 return aux_domain_add_dev(to_dmar_domain(domain
), dev
);
5638 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
5641 dmar_remove_one_dev_info(dev
);
5644 static void intel_iommu_aux_detach_device(struct iommu_domain
*domain
,
5647 aux_domain_remove_dev(to_dmar_domain(domain
), dev
);
5650 static int intel_iommu_map(struct iommu_domain
*domain
,
5651 unsigned long iova
, phys_addr_t hpa
,
5652 size_t size
, int iommu_prot
, gfp_t gfp
)
5654 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5659 if (iommu_prot
& IOMMU_READ
)
5660 prot
|= DMA_PTE_READ
;
5661 if (iommu_prot
& IOMMU_WRITE
)
5662 prot
|= DMA_PTE_WRITE
;
5663 if ((iommu_prot
& IOMMU_CACHE
) && dmar_domain
->iommu_snooping
)
5664 prot
|= DMA_PTE_SNP
;
5666 max_addr
= iova
+ size
;
5667 if (dmar_domain
->max_addr
< max_addr
) {
5670 /* check if minimum agaw is sufficient for mapped address */
5671 end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
) + 1;
5672 if (end
< max_addr
) {
5673 pr_err("%s: iommu width (%d) is not "
5674 "sufficient for the mapped address (%llx)\n",
5675 __func__
, dmar_domain
->gaw
, max_addr
);
5678 dmar_domain
->max_addr
= max_addr
;
5680 /* Round up size to next multiple of PAGE_SIZE, if it and
5681 the low bits of hpa would take us onto the next page */
5682 size
= aligned_nrpages(hpa
, size
);
5683 ret
= domain_pfn_mapping(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
5684 hpa
>> VTD_PAGE_SHIFT
, size
, prot
);
5688 static size_t intel_iommu_unmap(struct iommu_domain
*domain
,
5689 unsigned long iova
, size_t size
,
5690 struct iommu_iotlb_gather
*gather
)
5692 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5693 struct page
*freelist
= NULL
;
5694 unsigned long start_pfn
, last_pfn
;
5695 unsigned int npages
;
5696 int iommu_id
, level
= 0;
5698 /* Cope with horrid API which requires us to unmap more than the
5699 size argument if it happens to be a large-page mapping. */
5700 BUG_ON(!pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
));
5702 if (size
< VTD_PAGE_SIZE
<< level_to_offset_bits(level
))
5703 size
= VTD_PAGE_SIZE
<< level_to_offset_bits(level
);
5705 start_pfn
= iova
>> VTD_PAGE_SHIFT
;
5706 last_pfn
= (iova
+ size
- 1) >> VTD_PAGE_SHIFT
;
5708 freelist
= domain_unmap(dmar_domain
, start_pfn
, last_pfn
);
5710 npages
= last_pfn
- start_pfn
+ 1;
5712 for_each_domain_iommu(iommu_id
, dmar_domain
)
5713 iommu_flush_iotlb_psi(g_iommus
[iommu_id
], dmar_domain
,
5714 start_pfn
, npages
, !freelist
, 0);
5716 dma_free_pagelist(freelist
);
5718 if (dmar_domain
->max_addr
== iova
+ size
)
5719 dmar_domain
->max_addr
= iova
;
5724 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
5727 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5728 struct dma_pte
*pte
;
5732 pte
= pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
);
5733 if (pte
&& dma_pte_present(pte
))
5734 phys
= dma_pte_addr(pte
) +
5735 (iova
& (BIT_MASK(level_to_offset_bits(level
) +
5736 VTD_PAGE_SHIFT
) - 1));
5741 static inline bool scalable_mode_support(void)
5743 struct dmar_drhd_unit
*drhd
;
5744 struct intel_iommu
*iommu
;
5748 for_each_active_iommu(iommu
, drhd
) {
5749 if (!sm_supported(iommu
)) {
5759 static inline bool iommu_pasid_support(void)
5761 struct dmar_drhd_unit
*drhd
;
5762 struct intel_iommu
*iommu
;
5766 for_each_active_iommu(iommu
, drhd
) {
5767 if (!pasid_supported(iommu
)) {
5777 static inline bool nested_mode_support(void)
5779 struct dmar_drhd_unit
*drhd
;
5780 struct intel_iommu
*iommu
;
5784 for_each_active_iommu(iommu
, drhd
) {
5785 if (!sm_supported(iommu
) || !ecap_nest(iommu
->ecap
)) {
5795 static bool intel_iommu_capable(enum iommu_cap cap
)
5797 if (cap
== IOMMU_CAP_CACHE_COHERENCY
)
5798 return domain_update_iommu_snooping(NULL
) == 1;
5799 if (cap
== IOMMU_CAP_INTR_REMAP
)
5800 return irq_remapping_enabled
== 1;
5805 static int intel_iommu_add_device(struct device
*dev
)
5807 struct dmar_domain
*dmar_domain
;
5808 struct iommu_domain
*domain
;
5809 struct intel_iommu
*iommu
;
5810 struct iommu_group
*group
;
5814 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5818 iommu_device_link(&iommu
->iommu
, dev
);
5820 if (translation_pre_enabled(iommu
))
5821 dev
->archdata
.iommu
= DEFER_DEVICE_DOMAIN_INFO
;
5823 group
= iommu_group_get_for_dev(dev
);
5825 if (IS_ERR(group
)) {
5826 ret
= PTR_ERR(group
);
5830 iommu_group_put(group
);
5832 domain
= iommu_get_domain_for_dev(dev
);
5833 dmar_domain
= to_dmar_domain(domain
);
5834 if (domain
->type
== IOMMU_DOMAIN_DMA
) {
5835 if (device_def_domain_type(dev
) == IOMMU_DOMAIN_IDENTITY
) {
5836 ret
= iommu_request_dm_for_dev(dev
);
5838 dmar_remove_one_dev_info(dev
);
5839 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
5840 domain_add_dev_info(si_domain
, dev
);
5842 "Device uses a private identity domain.\n");
5846 if (device_def_domain_type(dev
) == IOMMU_DOMAIN_DMA
) {
5847 ret
= iommu_request_dma_domain_for_dev(dev
);
5849 dmar_remove_one_dev_info(dev
);
5850 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
5851 if (!get_private_domain_for_dev(dev
)) {
5853 "Failed to get a private domain.\n");
5859 "Device uses a private dma domain.\n");
5864 if (device_needs_bounce(dev
)) {
5865 dev_info(dev
, "Use Intel IOMMU bounce page dma_ops\n");
5866 set_dma_ops(dev
, &bounce_dma_ops
);
5872 iommu_device_unlink(&iommu
->iommu
, dev
);
5876 static void intel_iommu_remove_device(struct device
*dev
)
5878 struct intel_iommu
*iommu
;
5881 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5885 dmar_remove_one_dev_info(dev
);
5887 iommu_group_remove_device(dev
);
5889 iommu_device_unlink(&iommu
->iommu
, dev
);
5891 if (device_needs_bounce(dev
))
5892 set_dma_ops(dev
, NULL
);
5895 static void intel_iommu_get_resv_regions(struct device
*device
,
5896 struct list_head
*head
)
5898 int prot
= DMA_PTE_READ
| DMA_PTE_WRITE
;
5899 struct iommu_resv_region
*reg
;
5900 struct dmar_rmrr_unit
*rmrr
;
5901 struct device
*i_dev
;
5904 down_read(&dmar_global_lock
);
5905 for_each_rmrr_units(rmrr
) {
5906 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
5908 struct iommu_resv_region
*resv
;
5909 enum iommu_resv_type type
;
5912 if (i_dev
!= device
&&
5913 !is_downstream_to_pci_bridge(device
, i_dev
))
5916 length
= rmrr
->end_address
- rmrr
->base_address
+ 1;
5918 type
= device_rmrr_is_relaxable(device
) ?
5919 IOMMU_RESV_DIRECT_RELAXABLE
: IOMMU_RESV_DIRECT
;
5921 resv
= iommu_alloc_resv_region(rmrr
->base_address
,
5922 length
, prot
, type
);
5926 list_add_tail(&resv
->list
, head
);
5929 up_read(&dmar_global_lock
);
5931 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5932 if (dev_is_pci(device
)) {
5933 struct pci_dev
*pdev
= to_pci_dev(device
);
5935 if ((pdev
->class >> 8) == PCI_CLASS_BRIDGE_ISA
) {
5936 reg
= iommu_alloc_resv_region(0, 1UL << 24, prot
,
5937 IOMMU_RESV_DIRECT_RELAXABLE
);
5939 list_add_tail(®
->list
, head
);
5942 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5944 reg
= iommu_alloc_resv_region(IOAPIC_RANGE_START
,
5945 IOAPIC_RANGE_END
- IOAPIC_RANGE_START
+ 1,
5949 list_add_tail(®
->list
, head
);
5952 int intel_iommu_enable_pasid(struct intel_iommu
*iommu
, struct device
*dev
)
5954 struct device_domain_info
*info
;
5955 struct context_entry
*context
;
5956 struct dmar_domain
*domain
;
5957 unsigned long flags
;
5961 domain
= find_domain(dev
);
5965 spin_lock_irqsave(&device_domain_lock
, flags
);
5966 spin_lock(&iommu
->lock
);
5969 info
= dev
->archdata
.iommu
;
5970 if (!info
|| !info
->pasid_supported
)
5973 context
= iommu_context_addr(iommu
, info
->bus
, info
->devfn
, 0);
5974 if (WARN_ON(!context
))
5977 ctx_lo
= context
[0].lo
;
5979 if (!(ctx_lo
& CONTEXT_PASIDE
)) {
5980 ctx_lo
|= CONTEXT_PASIDE
;
5981 context
[0].lo
= ctx_lo
;
5983 iommu
->flush
.flush_context(iommu
,
5984 domain
->iommu_did
[iommu
->seq_id
],
5985 PCI_DEVID(info
->bus
, info
->devfn
),
5986 DMA_CCMD_MASK_NOBIT
,
5987 DMA_CCMD_DEVICE_INVL
);
5990 /* Enable PASID support in the device, if it wasn't already */
5991 if (!info
->pasid_enabled
)
5992 iommu_enable_dev_iotlb(info
);
5997 spin_unlock(&iommu
->lock
);
5998 spin_unlock_irqrestore(&device_domain_lock
, flags
);
6003 static void intel_iommu_apply_resv_region(struct device
*dev
,
6004 struct iommu_domain
*domain
,
6005 struct iommu_resv_region
*region
)
6007 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
6008 unsigned long start
, end
;
6010 start
= IOVA_PFN(region
->start
);
6011 end
= IOVA_PFN(region
->start
+ region
->length
- 1);
6013 WARN_ON_ONCE(!reserve_iova(&dmar_domain
->iovad
, start
, end
));
6016 static struct iommu_group
*intel_iommu_device_group(struct device
*dev
)
6018 if (dev_is_pci(dev
))
6019 return pci_device_group(dev
);
6020 return generic_device_group(dev
);
6023 #ifdef CONFIG_INTEL_IOMMU_SVM
6024 struct intel_iommu
*intel_svm_device_to_iommu(struct device
*dev
)
6026 struct intel_iommu
*iommu
;
6029 if (iommu_dummy(dev
)) {
6031 "No IOMMU translation for device; cannot enable SVM\n");
6035 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
6037 dev_err(dev
, "No IOMMU for device; cannot enable SVM\n");
6043 #endif /* CONFIG_INTEL_IOMMU_SVM */
6045 static int intel_iommu_enable_auxd(struct device
*dev
)
6047 struct device_domain_info
*info
;
6048 struct intel_iommu
*iommu
;
6049 unsigned long flags
;
6053 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
6054 if (!iommu
|| dmar_disabled
)
6057 if (!sm_supported(iommu
) || !pasid_supported(iommu
))
6060 ret
= intel_iommu_enable_pasid(iommu
, dev
);
6064 spin_lock_irqsave(&device_domain_lock
, flags
);
6065 info
= dev
->archdata
.iommu
;
6066 info
->auxd_enabled
= 1;
6067 spin_unlock_irqrestore(&device_domain_lock
, flags
);
6072 static int intel_iommu_disable_auxd(struct device
*dev
)
6074 struct device_domain_info
*info
;
6075 unsigned long flags
;
6077 spin_lock_irqsave(&device_domain_lock
, flags
);
6078 info
= dev
->archdata
.iommu
;
6079 if (!WARN_ON(!info
))
6080 info
->auxd_enabled
= 0;
6081 spin_unlock_irqrestore(&device_domain_lock
, flags
);
6087 * A PCI express designated vendor specific extended capability is defined
6088 * in the section 3.7 of Intel scalable I/O virtualization technical spec
6089 * for system software and tools to detect endpoint devices supporting the
6090 * Intel scalable IO virtualization without host driver dependency.
6092 * Returns the address of the matching extended capability structure within
6093 * the device's PCI configuration space or 0 if the device does not support
6096 static int siov_find_pci_dvsec(struct pci_dev
*pdev
)
6101 pos
= pci_find_next_ext_capability(pdev
, 0, 0x23);
6103 pci_read_config_word(pdev
, pos
+ 4, &vendor
);
6104 pci_read_config_word(pdev
, pos
+ 8, &id
);
6105 if (vendor
== PCI_VENDOR_ID_INTEL
&& id
== 5)
6108 pos
= pci_find_next_ext_capability(pdev
, pos
, 0x23);
6115 intel_iommu_dev_has_feat(struct device
*dev
, enum iommu_dev_features feat
)
6117 if (feat
== IOMMU_DEV_FEAT_AUX
) {
6120 if (!dev_is_pci(dev
) || dmar_disabled
||
6121 !scalable_mode_support() || !iommu_pasid_support())
6124 ret
= pci_pasid_features(to_pci_dev(dev
));
6128 return !!siov_find_pci_dvsec(to_pci_dev(dev
));
6135 intel_iommu_dev_enable_feat(struct device
*dev
, enum iommu_dev_features feat
)
6137 if (feat
== IOMMU_DEV_FEAT_AUX
)
6138 return intel_iommu_enable_auxd(dev
);
6144 intel_iommu_dev_disable_feat(struct device
*dev
, enum iommu_dev_features feat
)
6146 if (feat
== IOMMU_DEV_FEAT_AUX
)
6147 return intel_iommu_disable_auxd(dev
);
6153 intel_iommu_dev_feat_enabled(struct device
*dev
, enum iommu_dev_features feat
)
6155 struct device_domain_info
*info
= dev
->archdata
.iommu
;
6157 if (feat
== IOMMU_DEV_FEAT_AUX
)
6158 return scalable_mode_support() && info
&& info
->auxd_enabled
;
6164 intel_iommu_aux_get_pasid(struct iommu_domain
*domain
, struct device
*dev
)
6166 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
6168 return dmar_domain
->default_pasid
> 0 ?
6169 dmar_domain
->default_pasid
: -EINVAL
;
6172 static bool intel_iommu_is_attach_deferred(struct iommu_domain
*domain
,
6175 return attach_deferred(dev
);
6179 intel_iommu_domain_set_attr(struct iommu_domain
*domain
,
6180 enum iommu_attr attr
, void *data
)
6182 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
6183 unsigned long flags
;
6186 if (domain
->type
!= IOMMU_DOMAIN_UNMANAGED
)
6190 case DOMAIN_ATTR_NESTING
:
6191 spin_lock_irqsave(&device_domain_lock
, flags
);
6192 if (nested_mode_support() &&
6193 list_empty(&dmar_domain
->devices
)) {
6194 dmar_domain
->flags
|= DOMAIN_FLAG_NESTING_MODE
;
6195 dmar_domain
->flags
&= ~DOMAIN_FLAG_USE_FIRST_LEVEL
;
6199 spin_unlock_irqrestore(&device_domain_lock
, flags
);
6209 const struct iommu_ops intel_iommu_ops
= {
6210 .capable
= intel_iommu_capable
,
6211 .domain_alloc
= intel_iommu_domain_alloc
,
6212 .domain_free
= intel_iommu_domain_free
,
6213 .domain_set_attr
= intel_iommu_domain_set_attr
,
6214 .attach_dev
= intel_iommu_attach_device
,
6215 .detach_dev
= intel_iommu_detach_device
,
6216 .aux_attach_dev
= intel_iommu_aux_attach_device
,
6217 .aux_detach_dev
= intel_iommu_aux_detach_device
,
6218 .aux_get_pasid
= intel_iommu_aux_get_pasid
,
6219 .map
= intel_iommu_map
,
6220 .unmap
= intel_iommu_unmap
,
6221 .iova_to_phys
= intel_iommu_iova_to_phys
,
6222 .add_device
= intel_iommu_add_device
,
6223 .remove_device
= intel_iommu_remove_device
,
6224 .get_resv_regions
= intel_iommu_get_resv_regions
,
6225 .put_resv_regions
= generic_iommu_put_resv_regions
,
6226 .apply_resv_region
= intel_iommu_apply_resv_region
,
6227 .device_group
= intel_iommu_device_group
,
6228 .dev_has_feat
= intel_iommu_dev_has_feat
,
6229 .dev_feat_enabled
= intel_iommu_dev_feat_enabled
,
6230 .dev_enable_feat
= intel_iommu_dev_enable_feat
,
6231 .dev_disable_feat
= intel_iommu_dev_disable_feat
,
6232 .is_attach_deferred
= intel_iommu_is_attach_deferred
,
6233 .pgsize_bitmap
= INTEL_IOMMU_PGSIZES
,
6236 static void quirk_iommu_igfx(struct pci_dev
*dev
)
6238 pci_info(dev
, "Disabling IOMMU for graphics on this chipset\n");
6242 /* G4x/GM45 integrated gfx dmar support is totally busted. */
6243 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_igfx
);
6244 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_igfx
);
6245 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_igfx
);
6246 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_igfx
);
6247 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_igfx
);
6248 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_igfx
);
6249 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_igfx
);
6251 /* Broadwell igfx malfunctions with dmar */
6252 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1606, quirk_iommu_igfx
);
6253 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160B, quirk_iommu_igfx
);
6254 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160E, quirk_iommu_igfx
);
6255 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1602, quirk_iommu_igfx
);
6256 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160A, quirk_iommu_igfx
);
6257 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160D, quirk_iommu_igfx
);
6258 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1616, quirk_iommu_igfx
);
6259 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161B, quirk_iommu_igfx
);
6260 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161E, quirk_iommu_igfx
);
6261 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1612, quirk_iommu_igfx
);
6262 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161A, quirk_iommu_igfx
);
6263 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161D, quirk_iommu_igfx
);
6264 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1626, quirk_iommu_igfx
);
6265 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162B, quirk_iommu_igfx
);
6266 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162E, quirk_iommu_igfx
);
6267 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1622, quirk_iommu_igfx
);
6268 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162A, quirk_iommu_igfx
);
6269 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162D, quirk_iommu_igfx
);
6270 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1636, quirk_iommu_igfx
);
6271 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163B, quirk_iommu_igfx
);
6272 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163E, quirk_iommu_igfx
);
6273 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1632, quirk_iommu_igfx
);
6274 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163A, quirk_iommu_igfx
);
6275 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163D, quirk_iommu_igfx
);
6277 static void quirk_iommu_rwbf(struct pci_dev
*dev
)
6280 * Mobile 4 Series Chipset neglects to set RWBF capability,
6281 * but needs it. Same seems to hold for the desktop versions.
6283 pci_info(dev
, "Forcing write-buffer flush capability\n");
6287 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_rwbf
);
6288 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_rwbf
);
6289 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_rwbf
);
6290 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_rwbf
);
6291 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_rwbf
);
6292 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_rwbf
);
6293 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_rwbf
);
6296 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
6297 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6298 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
6299 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
6300 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6301 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6302 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6303 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6305 static void quirk_calpella_no_shadow_gtt(struct pci_dev
*dev
)
6309 if (pci_read_config_word(dev
, GGC
, &ggc
))
6312 if (!(ggc
& GGC_MEMORY_VT_ENABLED
)) {
6313 pci_info(dev
, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6315 } else if (dmar_map_gfx
) {
6316 /* we have to ensure the gfx device is idle before we flush */
6317 pci_info(dev
, "Disabling batched IOTLB flush on Ironlake\n");
6318 intel_iommu_strict
= 1;
6321 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0040, quirk_calpella_no_shadow_gtt
);
6322 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0044, quirk_calpella_no_shadow_gtt
);
6323 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0062, quirk_calpella_no_shadow_gtt
);
6324 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x006a, quirk_calpella_no_shadow_gtt
);
6326 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6327 ISOCH DMAR unit for the Azalia sound device, but not give it any
6328 TLB entries, which causes it to deadlock. Check for that. We do
6329 this in a function called from init_dmars(), instead of in a PCI
6330 quirk, because we don't want to print the obnoxious "BIOS broken"
6331 message if VT-d is actually disabled.
6333 static void __init
check_tylersburg_isoch(void)
6335 struct pci_dev
*pdev
;
6336 uint32_t vtisochctrl
;
6338 /* If there's no Azalia in the system anyway, forget it. */
6339 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x3a3e, NULL
);
6344 /* System Management Registers. Might be hidden, in which case
6345 we can't do the sanity check. But that's OK, because the
6346 known-broken BIOSes _don't_ actually hide it, so far. */
6347 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x342e, NULL
);
6351 if (pci_read_config_dword(pdev
, 0x188, &vtisochctrl
)) {
6358 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6359 if (vtisochctrl
& 1)
6362 /* Drop all bits other than the number of TLB entries */
6363 vtisochctrl
&= 0x1c;
6365 /* If we have the recommended number of TLB entries (16), fine. */
6366 if (vtisochctrl
== 0x10)
6369 /* Zero TLB entries? You get to ride the short bus to school. */
6371 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6372 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6373 dmi_get_system_info(DMI_BIOS_VENDOR
),
6374 dmi_get_system_info(DMI_BIOS_VERSION
),
6375 dmi_get_system_info(DMI_PRODUCT_VERSION
));
6376 iommu_identity_mapping
|= IDENTMAP_AZALIA
;
6380 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",