1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-contiguous.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
50 #include "irq_remapping.h"
51 #include "intel-pasid.h"
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
84 /* page table handling */
85 #define LEVEL_STRIDE (9)
86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
104 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
106 static inline int agaw_to_level(int agaw
)
111 static inline int agaw_to_width(int agaw
)
113 return min_t(int, 30 + agaw
* LEVEL_STRIDE
, MAX_AGAW_WIDTH
);
116 static inline int width_to_agaw(int width
)
118 return DIV_ROUND_UP(width
- 30, LEVEL_STRIDE
);
121 static inline unsigned int level_to_offset_bits(int level
)
123 return (level
- 1) * LEVEL_STRIDE
;
126 static inline int pfn_level_offset(unsigned long pfn
, int level
)
128 return (pfn
>> level_to_offset_bits(level
)) & LEVEL_MASK
;
131 static inline unsigned long level_mask(int level
)
133 return -1UL << level_to_offset_bits(level
);
136 static inline unsigned long level_size(int level
)
138 return 1UL << level_to_offset_bits(level
);
141 static inline unsigned long align_to_level(unsigned long pfn
, int level
)
143 return (pfn
+ level_size(level
) - 1) & level_mask(level
);
146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl
)
148 return 1 << min_t(int, (lvl
- 1) * LEVEL_STRIDE
, MAX_AGAW_PFN_WIDTH
);
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn
)
155 return dma_pfn
>> (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn
)
160 return mm_pfn
<< (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
162 static inline unsigned long page_to_dma_pfn(struct page
*pg
)
164 return mm_to_dma_pfn(page_to_pfn(pg
));
166 static inline unsigned long virt_to_dma_pfn(void *p
)
168 return page_to_dma_pfn(virt_to_page(p
));
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu
**g_iommus
;
174 static void __init
check_tylersburg_isoch(void);
175 static int rwbf_quirk
;
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
181 static int force_on
= 0;
182 int intel_iommu_tboot_noforce
;
183 static int no_platform_optin
;
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
191 static phys_addr_t
root_entry_lctp(struct root_entry
*re
)
196 return re
->lo
& VTD_PAGE_MASK
;
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
203 static phys_addr_t
root_entry_uctp(struct root_entry
*re
)
208 return re
->hi
& VTD_PAGE_MASK
;
211 static inline void context_clear_pasid_enable(struct context_entry
*context
)
213 context
->lo
&= ~(1ULL << 11);
216 static inline bool context_pasid_enabled(struct context_entry
*context
)
218 return !!(context
->lo
& (1ULL << 11));
221 static inline void context_set_copied(struct context_entry
*context
)
223 context
->hi
|= (1ull << 3);
226 static inline bool context_copied(struct context_entry
*context
)
228 return !!(context
->hi
& (1ULL << 3));
231 static inline bool __context_present(struct context_entry
*context
)
233 return (context
->lo
& 1);
236 bool context_present(struct context_entry
*context
)
238 return context_pasid_enabled(context
) ?
239 __context_present(context
) :
240 __context_present(context
) && !context_copied(context
);
243 static inline void context_set_present(struct context_entry
*context
)
248 static inline void context_set_fault_enable(struct context_entry
*context
)
250 context
->lo
&= (((u64
)-1) << 2) | 1;
253 static inline void context_set_translation_type(struct context_entry
*context
,
256 context
->lo
&= (((u64
)-1) << 4) | 3;
257 context
->lo
|= (value
& 3) << 2;
260 static inline void context_set_address_root(struct context_entry
*context
,
263 context
->lo
&= ~VTD_PAGE_MASK
;
264 context
->lo
|= value
& VTD_PAGE_MASK
;
267 static inline void context_set_address_width(struct context_entry
*context
,
270 context
->hi
|= value
& 7;
273 static inline void context_set_domain_id(struct context_entry
*context
,
276 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
279 static inline int context_domain_id(struct context_entry
*c
)
281 return((c
->hi
>> 8) & 0xffff);
284 static inline void context_clear_entry(struct context_entry
*context
)
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
296 static struct dmar_domain
*si_domain
;
297 static int hw_pass_through
= 1;
299 /* si_domain contains mulitple devices */
300 #define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
303 * This is a DMA domain allocated through the iommu domain allocation
304 * interface. But one or more devices belonging to this domain have
305 * been chosen to use a private domain. We should avoid to use the
306 * map/unmap/iova_to_phys APIs on it.
308 #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
310 #define for_each_domain_iommu(idx, domain) \
311 for (idx = 0; idx < g_num_of_iommus; idx++) \
312 if (domain->iommu_refcnt[idx])
314 struct dmar_rmrr_unit
{
315 struct list_head list
; /* list of rmrr units */
316 struct acpi_dmar_header
*hdr
; /* ACPI header */
317 u64 base_address
; /* reserved base address*/
318 u64 end_address
; /* reserved end address */
319 struct dmar_dev_scope
*devices
; /* target devices */
320 int devices_cnt
; /* target device count */
323 struct dmar_atsr_unit
{
324 struct list_head list
; /* list of ATSR units */
325 struct acpi_dmar_header
*hdr
; /* ACPI header */
326 struct dmar_dev_scope
*devices
; /* target devices */
327 int devices_cnt
; /* target device count */
328 u8 include_all
:1; /* include all ports */
331 static LIST_HEAD(dmar_atsr_units
);
332 static LIST_HEAD(dmar_rmrr_units
);
334 #define for_each_rmrr_units(rmrr) \
335 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
337 /* bitmap for indexing intel_iommus */
338 static int g_num_of_iommus
;
340 static void domain_exit(struct dmar_domain
*domain
);
341 static void domain_remove_dev_info(struct dmar_domain
*domain
);
342 static void dmar_remove_one_dev_info(struct device
*dev
);
343 static void __dmar_remove_one_dev_info(struct device_domain_info
*info
);
344 static void domain_context_clear(struct intel_iommu
*iommu
,
346 static int domain_detach_iommu(struct dmar_domain
*domain
,
347 struct intel_iommu
*iommu
);
348 static bool device_is_rmrr_locked(struct device
*dev
);
349 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
351 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
354 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
355 int dmar_disabled
= 0;
357 int dmar_disabled
= 1;
358 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
361 int intel_iommu_enabled
= 0;
362 EXPORT_SYMBOL_GPL(intel_iommu_enabled
);
364 static int dmar_map_gfx
= 1;
365 static int dmar_forcedac
;
366 static int intel_iommu_strict
;
367 static int intel_iommu_superpage
= 1;
368 static int iommu_identity_mapping
;
369 static int intel_no_bounce
;
371 #define IDENTMAP_ALL 1
372 #define IDENTMAP_GFX 2
373 #define IDENTMAP_AZALIA 4
375 int intel_iommu_gfx_mapped
;
376 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped
);
378 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
379 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
380 static DEFINE_SPINLOCK(device_domain_lock
);
381 static LIST_HEAD(device_domain_list
);
383 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
384 to_pci_dev(d)->untrusted)
387 * Iterate over elements in device_domain_list and call the specified
388 * callback @fn against each element.
390 int for_each_device_domain(int (*fn
)(struct device_domain_info
*info
,
391 void *data
), void *data
)
395 struct device_domain_info
*info
;
397 spin_lock_irqsave(&device_domain_lock
, flags
);
398 list_for_each_entry(info
, &device_domain_list
, global
) {
399 ret
= fn(info
, data
);
401 spin_unlock_irqrestore(&device_domain_lock
, flags
);
405 spin_unlock_irqrestore(&device_domain_lock
, flags
);
410 const struct iommu_ops intel_iommu_ops
;
412 static bool translation_pre_enabled(struct intel_iommu
*iommu
)
414 return (iommu
->flags
& VTD_FLAG_TRANS_PRE_ENABLED
);
417 static void clear_translation_pre_enabled(struct intel_iommu
*iommu
)
419 iommu
->flags
&= ~VTD_FLAG_TRANS_PRE_ENABLED
;
422 static void init_translation_status(struct intel_iommu
*iommu
)
426 gsts
= readl(iommu
->reg
+ DMAR_GSTS_REG
);
427 if (gsts
& DMA_GSTS_TES
)
428 iommu
->flags
|= VTD_FLAG_TRANS_PRE_ENABLED
;
431 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
432 static struct dmar_domain
*to_dmar_domain(struct iommu_domain
*dom
)
434 return container_of(dom
, struct dmar_domain
, domain
);
437 static int __init
intel_iommu_setup(char *str
)
442 if (!strncmp(str
, "on", 2)) {
444 pr_info("IOMMU enabled\n");
445 } else if (!strncmp(str
, "off", 3)) {
447 no_platform_optin
= 1;
448 pr_info("IOMMU disabled\n");
449 } else if (!strncmp(str
, "igfx_off", 8)) {
451 pr_info("Disable GFX device mapping\n");
452 } else if (!strncmp(str
, "forcedac", 8)) {
453 pr_info("Forcing DAC for PCI devices\n");
455 } else if (!strncmp(str
, "strict", 6)) {
456 pr_info("Disable batched IOTLB flush\n");
457 intel_iommu_strict
= 1;
458 } else if (!strncmp(str
, "sp_off", 6)) {
459 pr_info("Disable supported super page\n");
460 intel_iommu_superpage
= 0;
461 } else if (!strncmp(str
, "sm_on", 5)) {
462 pr_info("Intel-IOMMU: scalable mode supported\n");
464 } else if (!strncmp(str
, "tboot_noforce", 13)) {
466 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
467 intel_iommu_tboot_noforce
= 1;
468 } else if (!strncmp(str
, "nobounce", 8)) {
469 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
473 str
+= strcspn(str
, ",");
479 __setup("intel_iommu=", intel_iommu_setup
);
481 static struct kmem_cache
*iommu_domain_cache
;
482 static struct kmem_cache
*iommu_devinfo_cache
;
484 static struct dmar_domain
* get_iommu_domain(struct intel_iommu
*iommu
, u16 did
)
486 struct dmar_domain
**domains
;
489 domains
= iommu
->domains
[idx
];
493 return domains
[did
& 0xff];
496 static void set_iommu_domain(struct intel_iommu
*iommu
, u16 did
,
497 struct dmar_domain
*domain
)
499 struct dmar_domain
**domains
;
502 if (!iommu
->domains
[idx
]) {
503 size_t size
= 256 * sizeof(struct dmar_domain
*);
504 iommu
->domains
[idx
] = kzalloc(size
, GFP_ATOMIC
);
507 domains
= iommu
->domains
[idx
];
508 if (WARN_ON(!domains
))
511 domains
[did
& 0xff] = domain
;
514 void *alloc_pgtable_page(int node
)
519 page
= alloc_pages_node(node
, GFP_ATOMIC
| __GFP_ZERO
, 0);
521 vaddr
= page_address(page
);
525 void free_pgtable_page(void *vaddr
)
527 free_page((unsigned long)vaddr
);
530 static inline void *alloc_domain_mem(void)
532 return kmem_cache_alloc(iommu_domain_cache
, GFP_ATOMIC
);
535 static void free_domain_mem(void *vaddr
)
537 kmem_cache_free(iommu_domain_cache
, vaddr
);
540 static inline void * alloc_devinfo_mem(void)
542 return kmem_cache_alloc(iommu_devinfo_cache
, GFP_ATOMIC
);
545 static inline void free_devinfo_mem(void *vaddr
)
547 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
550 static inline int domain_type_is_si(struct dmar_domain
*domain
)
552 return domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
;
555 static inline int domain_pfn_supported(struct dmar_domain
*domain
,
558 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
560 return !(addr_width
< BITS_PER_LONG
&& pfn
>> addr_width
);
563 static int __iommu_calculate_agaw(struct intel_iommu
*iommu
, int max_gaw
)
568 sagaw
= cap_sagaw(iommu
->cap
);
569 for (agaw
= width_to_agaw(max_gaw
);
571 if (test_bit(agaw
, &sagaw
))
579 * Calculate max SAGAW for each iommu.
581 int iommu_calculate_max_sagaw(struct intel_iommu
*iommu
)
583 return __iommu_calculate_agaw(iommu
, MAX_AGAW_WIDTH
);
587 * calculate agaw for each iommu.
588 * "SAGAW" may be different across iommus, use a default agaw, and
589 * get a supported less agaw for iommus that don't support the default agaw.
591 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
593 return __iommu_calculate_agaw(iommu
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
596 /* This functionin only returns single iommu in a domain */
597 struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
601 /* si_domain and vm domain should not get here. */
602 if (WARN_ON(domain
->domain
.type
!= IOMMU_DOMAIN_DMA
))
605 for_each_domain_iommu(iommu_id
, domain
)
608 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
611 return g_iommus
[iommu_id
];
614 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
616 struct dmar_drhd_unit
*drhd
;
617 struct intel_iommu
*iommu
;
621 domain
->iommu_coherency
= 1;
623 for_each_domain_iommu(i
, domain
) {
625 if (!ecap_coherent(g_iommus
[i
]->ecap
)) {
626 domain
->iommu_coherency
= 0;
633 /* No hardware attached; use lowest common denominator */
635 for_each_active_iommu(iommu
, drhd
) {
636 if (!ecap_coherent(iommu
->ecap
)) {
637 domain
->iommu_coherency
= 0;
644 static int domain_update_iommu_snooping(struct intel_iommu
*skip
)
646 struct dmar_drhd_unit
*drhd
;
647 struct intel_iommu
*iommu
;
651 for_each_active_iommu(iommu
, drhd
) {
653 if (!ecap_sc_support(iommu
->ecap
)) {
664 static int domain_update_iommu_superpage(struct intel_iommu
*skip
)
666 struct dmar_drhd_unit
*drhd
;
667 struct intel_iommu
*iommu
;
670 if (!intel_iommu_superpage
) {
674 /* set iommu_superpage to the smallest common denominator */
676 for_each_active_iommu(iommu
, drhd
) {
678 mask
&= cap_super_page_val(iommu
->cap
);
688 /* Some capabilities may be different across iommus */
689 static void domain_update_iommu_cap(struct dmar_domain
*domain
)
691 domain_update_iommu_coherency(domain
);
692 domain
->iommu_snooping
= domain_update_iommu_snooping(NULL
);
693 domain
->iommu_superpage
= domain_update_iommu_superpage(NULL
);
696 struct context_entry
*iommu_context_addr(struct intel_iommu
*iommu
, u8 bus
,
699 struct root_entry
*root
= &iommu
->root_entry
[bus
];
700 struct context_entry
*context
;
704 if (sm_supported(iommu
)) {
712 context
= phys_to_virt(*entry
& VTD_PAGE_MASK
);
714 unsigned long phy_addr
;
718 context
= alloc_pgtable_page(iommu
->node
);
722 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
723 phy_addr
= virt_to_phys((void *)context
);
724 *entry
= phy_addr
| 1;
725 __iommu_flush_cache(iommu
, entry
, sizeof(*entry
));
727 return &context
[devfn
];
730 static int iommu_dummy(struct device
*dev
)
732 return dev
->archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
;
736 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
737 * sub-hierarchy of a candidate PCI-PCI bridge
738 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
739 * @bridge: the candidate PCI-PCI bridge
741 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
744 is_downstream_to_pci_bridge(struct device
*dev
, struct device
*bridge
)
746 struct pci_dev
*pdev
, *pbridge
;
748 if (!dev_is_pci(dev
) || !dev_is_pci(bridge
))
751 pdev
= to_pci_dev(dev
);
752 pbridge
= to_pci_dev(bridge
);
754 if (pbridge
->subordinate
&&
755 pbridge
->subordinate
->number
<= pdev
->bus
->number
&&
756 pbridge
->subordinate
->busn_res
.end
>= pdev
->bus
->number
)
762 static struct intel_iommu
*device_to_iommu(struct device
*dev
, u8
*bus
, u8
*devfn
)
764 struct dmar_drhd_unit
*drhd
= NULL
;
765 struct intel_iommu
*iommu
;
767 struct pci_dev
*pdev
= NULL
;
771 if (iommu_dummy(dev
))
774 if (dev_is_pci(dev
)) {
775 struct pci_dev
*pf_pdev
;
777 pdev
= pci_real_dma_dev(to_pci_dev(dev
));
779 /* VFs aren't listed in scope tables; we need to look up
780 * the PF instead to find the IOMMU. */
781 pf_pdev
= pci_physfn(pdev
);
783 segment
= pci_domain_nr(pdev
->bus
);
784 } else if (has_acpi_companion(dev
))
785 dev
= &ACPI_COMPANION(dev
)->dev
;
788 for_each_active_iommu(iommu
, drhd
) {
789 if (pdev
&& segment
!= drhd
->segment
)
792 for_each_active_dev_scope(drhd
->devices
,
793 drhd
->devices_cnt
, i
, tmp
) {
795 /* For a VF use its original BDF# not that of the PF
796 * which we used for the IOMMU lookup. Strictly speaking
797 * we could do this for all PCI devices; we only need to
798 * get the BDF# from the scope table for ACPI matches. */
799 if (pdev
&& pdev
->is_virtfn
)
802 *bus
= drhd
->devices
[i
].bus
;
803 *devfn
= drhd
->devices
[i
].devfn
;
807 if (is_downstream_to_pci_bridge(dev
, tmp
))
811 if (pdev
&& drhd
->include_all
) {
813 *bus
= pdev
->bus
->number
;
814 *devfn
= pdev
->devfn
;
825 static void domain_flush_cache(struct dmar_domain
*domain
,
826 void *addr
, int size
)
828 if (!domain
->iommu_coherency
)
829 clflush_cache_range(addr
, size
);
832 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
834 struct context_entry
*context
;
838 spin_lock_irqsave(&iommu
->lock
, flags
);
839 context
= iommu_context_addr(iommu
, bus
, devfn
, 0);
841 ret
= context_present(context
);
842 spin_unlock_irqrestore(&iommu
->lock
, flags
);
846 static void free_context_table(struct intel_iommu
*iommu
)
850 struct context_entry
*context
;
852 spin_lock_irqsave(&iommu
->lock
, flags
);
853 if (!iommu
->root_entry
) {
856 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
857 context
= iommu_context_addr(iommu
, i
, 0, 0);
859 free_pgtable_page(context
);
861 if (!sm_supported(iommu
))
864 context
= iommu_context_addr(iommu
, i
, 0x80, 0);
866 free_pgtable_page(context
);
869 free_pgtable_page(iommu
->root_entry
);
870 iommu
->root_entry
= NULL
;
872 spin_unlock_irqrestore(&iommu
->lock
, flags
);
875 static struct dma_pte
*pfn_to_dma_pte(struct dmar_domain
*domain
,
876 unsigned long pfn
, int *target_level
)
878 struct dma_pte
*parent
, *pte
;
879 int level
= agaw_to_level(domain
->agaw
);
882 BUG_ON(!domain
->pgd
);
884 if (!domain_pfn_supported(domain
, pfn
))
885 /* Address beyond IOMMU's addressing capabilities. */
888 parent
= domain
->pgd
;
893 offset
= pfn_level_offset(pfn
, level
);
894 pte
= &parent
[offset
];
895 if (!*target_level
&& (dma_pte_superpage(pte
) || !dma_pte_present(pte
)))
897 if (level
== *target_level
)
900 if (!dma_pte_present(pte
)) {
903 tmp_page
= alloc_pgtable_page(domain
->nid
);
908 domain_flush_cache(domain
, tmp_page
, VTD_PAGE_SIZE
);
909 pteval
= ((uint64_t)virt_to_dma_pfn(tmp_page
) << VTD_PAGE_SHIFT
) | DMA_PTE_READ
| DMA_PTE_WRITE
;
910 if (cmpxchg64(&pte
->val
, 0ULL, pteval
))
911 /* Someone else set it while we were thinking; use theirs. */
912 free_pgtable_page(tmp_page
);
914 domain_flush_cache(domain
, pte
, sizeof(*pte
));
919 parent
= phys_to_virt(dma_pte_addr(pte
));
924 *target_level
= level
;
929 /* return address's pte at specific level */
930 static struct dma_pte
*dma_pfn_level_pte(struct dmar_domain
*domain
,
932 int level
, int *large_page
)
934 struct dma_pte
*parent
, *pte
;
935 int total
= agaw_to_level(domain
->agaw
);
938 parent
= domain
->pgd
;
939 while (level
<= total
) {
940 offset
= pfn_level_offset(pfn
, total
);
941 pte
= &parent
[offset
];
945 if (!dma_pte_present(pte
)) {
950 if (dma_pte_superpage(pte
)) {
955 parent
= phys_to_virt(dma_pte_addr(pte
));
961 /* clear last level pte, a tlb flush should be followed */
962 static void dma_pte_clear_range(struct dmar_domain
*domain
,
963 unsigned long start_pfn
,
964 unsigned long last_pfn
)
966 unsigned int large_page
;
967 struct dma_pte
*first_pte
, *pte
;
969 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
970 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
971 BUG_ON(start_pfn
> last_pfn
);
973 /* we don't need lock here; nobody else touches the iova range */
976 first_pte
= pte
= dma_pfn_level_pte(domain
, start_pfn
, 1, &large_page
);
978 start_pfn
= align_to_level(start_pfn
+ 1, large_page
+ 1);
983 start_pfn
+= lvl_to_nr_pages(large_page
);
985 } while (start_pfn
<= last_pfn
&& !first_pte_in_page(pte
));
987 domain_flush_cache(domain
, first_pte
,
988 (void *)pte
- (void *)first_pte
);
990 } while (start_pfn
&& start_pfn
<= last_pfn
);
993 static void dma_pte_free_level(struct dmar_domain
*domain
, int level
,
994 int retain_level
, struct dma_pte
*pte
,
995 unsigned long pfn
, unsigned long start_pfn
,
996 unsigned long last_pfn
)
998 pfn
= max(start_pfn
, pfn
);
999 pte
= &pte
[pfn_level_offset(pfn
, level
)];
1002 unsigned long level_pfn
;
1003 struct dma_pte
*level_pte
;
1005 if (!dma_pte_present(pte
) || dma_pte_superpage(pte
))
1008 level_pfn
= pfn
& level_mask(level
);
1009 level_pte
= phys_to_virt(dma_pte_addr(pte
));
1012 dma_pte_free_level(domain
, level
- 1, retain_level
,
1013 level_pte
, level_pfn
, start_pfn
,
1018 * Free the page table if we're below the level we want to
1019 * retain and the range covers the entire table.
1021 if (level
< retain_level
&& !(start_pfn
> level_pfn
||
1022 last_pfn
< level_pfn
+ level_size(level
) - 1)) {
1024 domain_flush_cache(domain
, pte
, sizeof(*pte
));
1025 free_pgtable_page(level_pte
);
1028 pfn
+= level_size(level
);
1029 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
1033 * clear last level (leaf) ptes and free page table pages below the
1034 * level we wish to keep intact.
1036 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
1037 unsigned long start_pfn
,
1038 unsigned long last_pfn
,
1041 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
1042 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
1043 BUG_ON(start_pfn
> last_pfn
);
1045 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
1047 /* We don't need lock here; nobody else touches the iova range */
1048 dma_pte_free_level(domain
, agaw_to_level(domain
->agaw
), retain_level
,
1049 domain
->pgd
, 0, start_pfn
, last_pfn
);
1052 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1053 free_pgtable_page(domain
->pgd
);
1058 /* When a page at a given level is being unlinked from its parent, we don't
1059 need to *modify* it at all. All we need to do is make a list of all the
1060 pages which can be freed just as soon as we've flushed the IOTLB and we
1061 know the hardware page-walk will no longer touch them.
1062 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1064 static struct page
*dma_pte_list_pagetables(struct dmar_domain
*domain
,
1065 int level
, struct dma_pte
*pte
,
1066 struct page
*freelist
)
1070 pg
= pfn_to_page(dma_pte_addr(pte
) >> PAGE_SHIFT
);
1071 pg
->freelist
= freelist
;
1077 pte
= page_address(pg
);
1079 if (dma_pte_present(pte
) && !dma_pte_superpage(pte
))
1080 freelist
= dma_pte_list_pagetables(domain
, level
- 1,
1083 } while (!first_pte_in_page(pte
));
1088 static struct page
*dma_pte_clear_level(struct dmar_domain
*domain
, int level
,
1089 struct dma_pte
*pte
, unsigned long pfn
,
1090 unsigned long start_pfn
,
1091 unsigned long last_pfn
,
1092 struct page
*freelist
)
1094 struct dma_pte
*first_pte
= NULL
, *last_pte
= NULL
;
1096 pfn
= max(start_pfn
, pfn
);
1097 pte
= &pte
[pfn_level_offset(pfn
, level
)];
1100 unsigned long level_pfn
;
1102 if (!dma_pte_present(pte
))
1105 level_pfn
= pfn
& level_mask(level
);
1107 /* If range covers entire pagetable, free it */
1108 if (start_pfn
<= level_pfn
&&
1109 last_pfn
>= level_pfn
+ level_size(level
) - 1) {
1110 /* These suborbinate page tables are going away entirely. Don't
1111 bother to clear them; we're just going to *free* them. */
1112 if (level
> 1 && !dma_pte_superpage(pte
))
1113 freelist
= dma_pte_list_pagetables(domain
, level
- 1, pte
, freelist
);
1119 } else if (level
> 1) {
1120 /* Recurse down into a level that isn't *entirely* obsolete */
1121 freelist
= dma_pte_clear_level(domain
, level
- 1,
1122 phys_to_virt(dma_pte_addr(pte
)),
1123 level_pfn
, start_pfn
, last_pfn
,
1127 pfn
+= level_size(level
);
1128 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
1131 domain_flush_cache(domain
, first_pte
,
1132 (void *)++last_pte
- (void *)first_pte
);
1137 /* We can't just free the pages because the IOMMU may still be walking
1138 the page tables, and may have cached the intermediate levels. The
1139 pages can only be freed after the IOTLB flush has been done. */
1140 static struct page
*domain_unmap(struct dmar_domain
*domain
,
1141 unsigned long start_pfn
,
1142 unsigned long last_pfn
)
1144 struct page
*freelist
;
1146 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
1147 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
1148 BUG_ON(start_pfn
> last_pfn
);
1150 /* we don't need lock here; nobody else touches the iova range */
1151 freelist
= dma_pte_clear_level(domain
, agaw_to_level(domain
->agaw
),
1152 domain
->pgd
, 0, start_pfn
, last_pfn
, NULL
);
1155 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1156 struct page
*pgd_page
= virt_to_page(domain
->pgd
);
1157 pgd_page
->freelist
= freelist
;
1158 freelist
= pgd_page
;
1166 static void dma_free_pagelist(struct page
*freelist
)
1170 while ((pg
= freelist
)) {
1171 freelist
= pg
->freelist
;
1172 free_pgtable_page(page_address(pg
));
1176 static void iova_entry_free(unsigned long data
)
1178 struct page
*freelist
= (struct page
*)data
;
1180 dma_free_pagelist(freelist
);
1183 /* iommu handling */
1184 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
1186 struct root_entry
*root
;
1187 unsigned long flags
;
1189 root
= (struct root_entry
*)alloc_pgtable_page(iommu
->node
);
1191 pr_err("Allocating root entry for %s failed\n",
1196 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
1198 spin_lock_irqsave(&iommu
->lock
, flags
);
1199 iommu
->root_entry
= root
;
1200 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1205 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
1211 addr
= virt_to_phys(iommu
->root_entry
);
1212 if (sm_supported(iommu
))
1213 addr
|= DMA_RTADDR_SMT
;
1215 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1216 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, addr
);
1218 writel(iommu
->gcmd
| DMA_GCMD_SRTP
, iommu
->reg
+ DMAR_GCMD_REG
);
1220 /* Make sure hardware complete it */
1221 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1222 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
1224 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1227 void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
1232 if (!rwbf_quirk
&& !cap_rwbf(iommu
->cap
))
1235 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1236 writel(iommu
->gcmd
| DMA_GCMD_WBF
, iommu
->reg
+ DMAR_GCMD_REG
);
1238 /* Make sure hardware complete it */
1239 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1240 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
1242 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1245 /* return value determine if we need a write buffer flush */
1246 static void __iommu_flush_context(struct intel_iommu
*iommu
,
1247 u16 did
, u16 source_id
, u8 function_mask
,
1254 case DMA_CCMD_GLOBAL_INVL
:
1255 val
= DMA_CCMD_GLOBAL_INVL
;
1257 case DMA_CCMD_DOMAIN_INVL
:
1258 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
1260 case DMA_CCMD_DEVICE_INVL
:
1261 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
1262 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
1267 val
|= DMA_CCMD_ICC
;
1269 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1270 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
1272 /* Make sure hardware complete it */
1273 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
1274 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
1276 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1279 /* return value determine if we need a write buffer flush */
1280 static void __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
1281 u64 addr
, unsigned int size_order
, u64 type
)
1283 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
1284 u64 val
= 0, val_iva
= 0;
1288 case DMA_TLB_GLOBAL_FLUSH
:
1289 /* global flush doesn't need set IVA_REG */
1290 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
1292 case DMA_TLB_DSI_FLUSH
:
1293 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1295 case DMA_TLB_PSI_FLUSH
:
1296 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1297 /* IH bit is passed in as part of address */
1298 val_iva
= size_order
| addr
;
1303 /* Note: set drain read/write */
1306 * This is probably to be super secure.. Looks like we can
1307 * ignore it without any impact.
1309 if (cap_read_drain(iommu
->cap
))
1310 val
|= DMA_TLB_READ_DRAIN
;
1312 if (cap_write_drain(iommu
->cap
))
1313 val
|= DMA_TLB_WRITE_DRAIN
;
1315 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1316 /* Note: Only uses first TLB reg currently */
1318 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
1319 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
1321 /* Make sure hardware complete it */
1322 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
1323 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
1325 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1327 /* check IOTLB invalidation granularity */
1328 if (DMA_TLB_IAIG(val
) == 0)
1329 pr_err("Flush IOTLB failed\n");
1330 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
1331 pr_debug("TLB flush request %Lx, actual %Lx\n",
1332 (unsigned long long)DMA_TLB_IIRG(type
),
1333 (unsigned long long)DMA_TLB_IAIG(val
));
1336 static struct device_domain_info
*
1337 iommu_support_dev_iotlb (struct dmar_domain
*domain
, struct intel_iommu
*iommu
,
1340 struct device_domain_info
*info
;
1342 assert_spin_locked(&device_domain_lock
);
1347 list_for_each_entry(info
, &domain
->devices
, link
)
1348 if (info
->iommu
== iommu
&& info
->bus
== bus
&&
1349 info
->devfn
== devfn
) {
1350 if (info
->ats_supported
&& info
->dev
)
1358 static void domain_update_iotlb(struct dmar_domain
*domain
)
1360 struct device_domain_info
*info
;
1361 bool has_iotlb_device
= false;
1363 assert_spin_locked(&device_domain_lock
);
1365 list_for_each_entry(info
, &domain
->devices
, link
) {
1366 struct pci_dev
*pdev
;
1368 if (!info
->dev
|| !dev_is_pci(info
->dev
))
1371 pdev
= to_pci_dev(info
->dev
);
1372 if (pdev
->ats_enabled
) {
1373 has_iotlb_device
= true;
1378 domain
->has_iotlb_device
= has_iotlb_device
;
1381 static void iommu_enable_dev_iotlb(struct device_domain_info
*info
)
1383 struct pci_dev
*pdev
;
1385 assert_spin_locked(&device_domain_lock
);
1387 if (!info
|| !dev_is_pci(info
->dev
))
1390 pdev
= to_pci_dev(info
->dev
);
1391 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1392 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1393 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1394 * reserved, which should be set to 0.
1396 if (!ecap_dit(info
->iommu
->ecap
))
1399 struct pci_dev
*pf_pdev
;
1401 /* pdev will be returned if device is not a vf */
1402 pf_pdev
= pci_physfn(pdev
);
1403 info
->pfsid
= pci_dev_id(pf_pdev
);
1406 #ifdef CONFIG_INTEL_IOMMU_SVM
1407 /* The PCIe spec, in its wisdom, declares that the behaviour of
1408 the device if you enable PASID support after ATS support is
1409 undefined. So always enable PASID support on devices which
1410 have it, even if we can't yet know if we're ever going to
1412 if (info
->pasid_supported
&& !pci_enable_pasid(pdev
, info
->pasid_supported
& ~1))
1413 info
->pasid_enabled
= 1;
1415 if (info
->pri_supported
&&
1416 (info
->pasid_enabled
? pci_prg_resp_pasid_required(pdev
) : 1) &&
1417 !pci_reset_pri(pdev
) && !pci_enable_pri(pdev
, 32))
1418 info
->pri_enabled
= 1;
1420 if (!pdev
->untrusted
&& info
->ats_supported
&&
1421 pci_ats_page_aligned(pdev
) &&
1422 !pci_enable_ats(pdev
, VTD_PAGE_SHIFT
)) {
1423 info
->ats_enabled
= 1;
1424 domain_update_iotlb(info
->domain
);
1425 info
->ats_qdep
= pci_ats_queue_depth(pdev
);
1429 static void iommu_disable_dev_iotlb(struct device_domain_info
*info
)
1431 struct pci_dev
*pdev
;
1433 assert_spin_locked(&device_domain_lock
);
1435 if (!dev_is_pci(info
->dev
))
1438 pdev
= to_pci_dev(info
->dev
);
1440 if (info
->ats_enabled
) {
1441 pci_disable_ats(pdev
);
1442 info
->ats_enabled
= 0;
1443 domain_update_iotlb(info
->domain
);
1445 #ifdef CONFIG_INTEL_IOMMU_SVM
1446 if (info
->pri_enabled
) {
1447 pci_disable_pri(pdev
);
1448 info
->pri_enabled
= 0;
1450 if (info
->pasid_enabled
) {
1451 pci_disable_pasid(pdev
);
1452 info
->pasid_enabled
= 0;
1457 static void iommu_flush_dev_iotlb(struct dmar_domain
*domain
,
1458 u64 addr
, unsigned mask
)
1461 unsigned long flags
;
1462 struct device_domain_info
*info
;
1464 if (!domain
->has_iotlb_device
)
1467 spin_lock_irqsave(&device_domain_lock
, flags
);
1468 list_for_each_entry(info
, &domain
->devices
, link
) {
1469 if (!info
->ats_enabled
)
1472 sid
= info
->bus
<< 8 | info
->devfn
;
1473 qdep
= info
->ats_qdep
;
1474 qi_flush_dev_iotlb(info
->iommu
, sid
, info
->pfsid
,
1477 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1480 static void iommu_flush_iotlb_psi(struct intel_iommu
*iommu
,
1481 struct dmar_domain
*domain
,
1482 unsigned long pfn
, unsigned int pages
,
1485 unsigned int mask
= ilog2(__roundup_pow_of_two(pages
));
1486 uint64_t addr
= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
1487 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1494 * Fallback to domain selective flush if no PSI support or the size is
1496 * PSI requires page size to be 2 ^ x, and the base address is naturally
1497 * aligned to the size
1499 if (!cap_pgsel_inv(iommu
->cap
) || mask
> cap_max_amask_val(iommu
->cap
))
1500 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1503 iommu
->flush
.flush_iotlb(iommu
, did
, addr
| ih
, mask
,
1507 * In caching mode, changes of pages from non-present to present require
1508 * flush. However, device IOTLB doesn't need to be flushed in this case.
1510 if (!cap_caching_mode(iommu
->cap
) || !map
)
1511 iommu_flush_dev_iotlb(domain
, addr
, mask
);
1514 /* Notification for newly created mappings */
1515 static inline void __mapping_notify_one(struct intel_iommu
*iommu
,
1516 struct dmar_domain
*domain
,
1517 unsigned long pfn
, unsigned int pages
)
1519 /* It's a non-present to present mapping. Only flush if caching mode */
1520 if (cap_caching_mode(iommu
->cap
))
1521 iommu_flush_iotlb_psi(iommu
, domain
, pfn
, pages
, 0, 1);
1523 iommu_flush_write_buffer(iommu
);
1526 static void iommu_flush_iova(struct iova_domain
*iovad
)
1528 struct dmar_domain
*domain
;
1531 domain
= container_of(iovad
, struct dmar_domain
, iovad
);
1533 for_each_domain_iommu(idx
, domain
) {
1534 struct intel_iommu
*iommu
= g_iommus
[idx
];
1535 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1537 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
1539 if (!cap_caching_mode(iommu
->cap
))
1540 iommu_flush_dev_iotlb(get_iommu_domain(iommu
, did
),
1541 0, MAX_AGAW_PFN_WIDTH
);
1545 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
1548 unsigned long flags
;
1550 if (!cap_plmr(iommu
->cap
) && !cap_phmr(iommu
->cap
))
1553 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1554 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
1555 pmen
&= ~DMA_PMEN_EPM
;
1556 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
1558 /* wait for the protected region status bit to clear */
1559 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
1560 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
1562 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1565 static void iommu_enable_translation(struct intel_iommu
*iommu
)
1568 unsigned long flags
;
1570 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1571 iommu
->gcmd
|= DMA_GCMD_TE
;
1572 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1574 /* Make sure hardware complete it */
1575 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1576 readl
, (sts
& DMA_GSTS_TES
), sts
);
1578 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1581 static void iommu_disable_translation(struct intel_iommu
*iommu
)
1586 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1587 iommu
->gcmd
&= ~DMA_GCMD_TE
;
1588 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1590 /* Make sure hardware complete it */
1591 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1592 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
1594 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1597 static int iommu_init_domains(struct intel_iommu
*iommu
)
1599 u32 ndomains
, nlongs
;
1602 ndomains
= cap_ndoms(iommu
->cap
);
1603 pr_debug("%s: Number of Domains supported <%d>\n",
1604 iommu
->name
, ndomains
);
1605 nlongs
= BITS_TO_LONGS(ndomains
);
1607 spin_lock_init(&iommu
->lock
);
1609 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1610 if (!iommu
->domain_ids
) {
1611 pr_err("%s: Allocating domain id array failed\n",
1616 size
= (ALIGN(ndomains
, 256) >> 8) * sizeof(struct dmar_domain
**);
1617 iommu
->domains
= kzalloc(size
, GFP_KERNEL
);
1619 if (iommu
->domains
) {
1620 size
= 256 * sizeof(struct dmar_domain
*);
1621 iommu
->domains
[0] = kzalloc(size
, GFP_KERNEL
);
1624 if (!iommu
->domains
|| !iommu
->domains
[0]) {
1625 pr_err("%s: Allocating domain array failed\n",
1627 kfree(iommu
->domain_ids
);
1628 kfree(iommu
->domains
);
1629 iommu
->domain_ids
= NULL
;
1630 iommu
->domains
= NULL
;
1635 * If Caching mode is set, then invalid translations are tagged
1636 * with domain-id 0, hence we need to pre-allocate it. We also
1637 * use domain-id 0 as a marker for non-allocated domain-id, so
1638 * make sure it is not used for a real domain.
1640 set_bit(0, iommu
->domain_ids
);
1643 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1644 * entry for first-level or pass-through translation modes should
1645 * be programmed with a domain id different from those used for
1646 * second-level or nested translation. We reserve a domain id for
1649 if (sm_supported(iommu
))
1650 set_bit(FLPT_DEFAULT_DID
, iommu
->domain_ids
);
1655 static void disable_dmar_iommu(struct intel_iommu
*iommu
)
1657 struct device_domain_info
*info
, *tmp
;
1658 unsigned long flags
;
1660 if (!iommu
->domains
|| !iommu
->domain_ids
)
1663 spin_lock_irqsave(&device_domain_lock
, flags
);
1664 list_for_each_entry_safe(info
, tmp
, &device_domain_list
, global
) {
1665 if (info
->iommu
!= iommu
)
1668 if (!info
->dev
|| !info
->domain
)
1671 __dmar_remove_one_dev_info(info
);
1673 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1675 if (iommu
->gcmd
& DMA_GCMD_TE
)
1676 iommu_disable_translation(iommu
);
1679 static void free_dmar_iommu(struct intel_iommu
*iommu
)
1681 if ((iommu
->domains
) && (iommu
->domain_ids
)) {
1682 int elems
= ALIGN(cap_ndoms(iommu
->cap
), 256) >> 8;
1685 for (i
= 0; i
< elems
; i
++)
1686 kfree(iommu
->domains
[i
]);
1687 kfree(iommu
->domains
);
1688 kfree(iommu
->domain_ids
);
1689 iommu
->domains
= NULL
;
1690 iommu
->domain_ids
= NULL
;
1693 g_iommus
[iommu
->seq_id
] = NULL
;
1695 /* free context mapping */
1696 free_context_table(iommu
);
1698 #ifdef CONFIG_INTEL_IOMMU_SVM
1699 if (pasid_supported(iommu
)) {
1700 if (ecap_prs(iommu
->ecap
))
1701 intel_svm_finish_prq(iommu
);
1706 static struct dmar_domain
*alloc_domain(int flags
)
1708 struct dmar_domain
*domain
;
1710 domain
= alloc_domain_mem();
1714 memset(domain
, 0, sizeof(*domain
));
1715 domain
->nid
= NUMA_NO_NODE
;
1716 domain
->flags
= flags
;
1717 domain
->has_iotlb_device
= false;
1718 INIT_LIST_HEAD(&domain
->devices
);
1723 /* Must be called with iommu->lock */
1724 static int domain_attach_iommu(struct dmar_domain
*domain
,
1725 struct intel_iommu
*iommu
)
1727 unsigned long ndomains
;
1730 assert_spin_locked(&device_domain_lock
);
1731 assert_spin_locked(&iommu
->lock
);
1733 domain
->iommu_refcnt
[iommu
->seq_id
] += 1;
1734 domain
->iommu_count
+= 1;
1735 if (domain
->iommu_refcnt
[iommu
->seq_id
] == 1) {
1736 ndomains
= cap_ndoms(iommu
->cap
);
1737 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1739 if (num
>= ndomains
) {
1740 pr_err("%s: No free domain ids\n", iommu
->name
);
1741 domain
->iommu_refcnt
[iommu
->seq_id
] -= 1;
1742 domain
->iommu_count
-= 1;
1746 set_bit(num
, iommu
->domain_ids
);
1747 set_iommu_domain(iommu
, num
, domain
);
1749 domain
->iommu_did
[iommu
->seq_id
] = num
;
1750 domain
->nid
= iommu
->node
;
1752 domain_update_iommu_cap(domain
);
1758 static int domain_detach_iommu(struct dmar_domain
*domain
,
1759 struct intel_iommu
*iommu
)
1763 assert_spin_locked(&device_domain_lock
);
1764 assert_spin_locked(&iommu
->lock
);
1766 domain
->iommu_refcnt
[iommu
->seq_id
] -= 1;
1767 count
= --domain
->iommu_count
;
1768 if (domain
->iommu_refcnt
[iommu
->seq_id
] == 0) {
1769 num
= domain
->iommu_did
[iommu
->seq_id
];
1770 clear_bit(num
, iommu
->domain_ids
);
1771 set_iommu_domain(iommu
, num
, NULL
);
1773 domain_update_iommu_cap(domain
);
1774 domain
->iommu_did
[iommu
->seq_id
] = 0;
1780 static struct iova_domain reserved_iova_list
;
1781 static struct lock_class_key reserved_rbtree_key
;
1783 static int dmar_init_reserved_ranges(void)
1785 struct pci_dev
*pdev
= NULL
;
1789 init_iova_domain(&reserved_iova_list
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
1791 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1792 &reserved_rbtree_key
);
1794 /* IOAPIC ranges shouldn't be accessed by DMA */
1795 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1796 IOVA_PFN(IOAPIC_RANGE_END
));
1798 pr_err("Reserve IOAPIC range failed\n");
1802 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1803 for_each_pci_dev(pdev
) {
1806 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1807 r
= &pdev
->resource
[i
];
1808 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1810 iova
= reserve_iova(&reserved_iova_list
,
1814 pci_err(pdev
, "Reserve iova for %pR failed\n", r
);
1822 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1824 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1827 static inline int guestwidth_to_adjustwidth(int gaw
)
1830 int r
= (gaw
- 12) % 9;
1841 static int domain_init(struct dmar_domain
*domain
, struct intel_iommu
*iommu
,
1844 int adjust_width
, agaw
;
1845 unsigned long sagaw
;
1848 init_iova_domain(&domain
->iovad
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
1850 err
= init_iova_flush_queue(&domain
->iovad
,
1851 iommu_flush_iova
, iova_entry_free
);
1855 domain_reserve_special_ranges(domain
);
1857 /* calculate AGAW */
1858 if (guest_width
> cap_mgaw(iommu
->cap
))
1859 guest_width
= cap_mgaw(iommu
->cap
);
1860 domain
->gaw
= guest_width
;
1861 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1862 agaw
= width_to_agaw(adjust_width
);
1863 sagaw
= cap_sagaw(iommu
->cap
);
1864 if (!test_bit(agaw
, &sagaw
)) {
1865 /* hardware doesn't support it, choose a bigger one */
1866 pr_debug("Hardware doesn't support agaw %d\n", agaw
);
1867 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1871 domain
->agaw
= agaw
;
1873 if (ecap_coherent(iommu
->ecap
))
1874 domain
->iommu_coherency
= 1;
1876 domain
->iommu_coherency
= 0;
1878 if (ecap_sc_support(iommu
->ecap
))
1879 domain
->iommu_snooping
= 1;
1881 domain
->iommu_snooping
= 0;
1883 if (intel_iommu_superpage
)
1884 domain
->iommu_superpage
= fls(cap_super_page_val(iommu
->cap
));
1886 domain
->iommu_superpage
= 0;
1888 domain
->nid
= iommu
->node
;
1890 /* always allocate the top pgd */
1891 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
1894 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1898 static void domain_exit(struct dmar_domain
*domain
)
1901 /* Remove associated devices and clear attached or cached domains */
1902 domain_remove_dev_info(domain
);
1905 put_iova_domain(&domain
->iovad
);
1908 struct page
*freelist
;
1910 freelist
= domain_unmap(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1911 dma_free_pagelist(freelist
);
1914 free_domain_mem(domain
);
1918 * Get the PASID directory size for scalable mode context entry.
1919 * Value of X in the PDTS field of a scalable mode context entry
1920 * indicates PASID directory with 2^(X + 7) entries.
1922 static inline unsigned long context_get_sm_pds(struct pasid_table
*table
)
1926 max_pde
= table
->max_pasid
>> PASID_PDE_SHIFT
;
1927 pds
= find_first_bit((unsigned long *)&max_pde
, MAX_NR_PASID_BITS
);
1935 * Set the RID_PASID field of a scalable mode context entry. The
1936 * IOMMU hardware will use the PASID value set in this field for
1937 * DMA translations of DMA requests without PASID.
1940 context_set_sm_rid2pasid(struct context_entry
*context
, unsigned long pasid
)
1942 context
->hi
|= pasid
& ((1 << 20) - 1);
1943 context
->hi
|= (1 << 20);
1947 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1950 static inline void context_set_sm_dte(struct context_entry
*context
)
1952 context
->lo
|= (1 << 2);
1956 * Set the PRE(Page Request Enable) field of a scalable mode context
1959 static inline void context_set_sm_pre(struct context_entry
*context
)
1961 context
->lo
|= (1 << 4);
1964 /* Convert value to context PASID directory size field coding. */
1965 #define context_pdts(pds) (((pds) & 0x7) << 9)
1967 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1968 struct intel_iommu
*iommu
,
1969 struct pasid_table
*table
,
1972 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1973 int translation
= CONTEXT_TT_MULTI_LEVEL
;
1974 struct device_domain_info
*info
= NULL
;
1975 struct context_entry
*context
;
1976 unsigned long flags
;
1981 if (hw_pass_through
&& domain_type_is_si(domain
))
1982 translation
= CONTEXT_TT_PASS_THROUGH
;
1984 pr_debug("Set context mapping for %02x:%02x.%d\n",
1985 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1987 BUG_ON(!domain
->pgd
);
1989 spin_lock_irqsave(&device_domain_lock
, flags
);
1990 spin_lock(&iommu
->lock
);
1993 context
= iommu_context_addr(iommu
, bus
, devfn
, 1);
1998 if (context_present(context
))
2002 * For kdump cases, old valid entries may be cached due to the
2003 * in-flight DMA and copied pgtable, but there is no unmapping
2004 * behaviour for them, thus we need an explicit cache flush for
2005 * the newly-mapped device. For kdump, at this point, the device
2006 * is supposed to finish reset at its driver probe stage, so no
2007 * in-flight DMA will exist, and we don't need to worry anymore
2010 if (context_copied(context
)) {
2011 u16 did_old
= context_domain_id(context
);
2013 if (did_old
< cap_ndoms(iommu
->cap
)) {
2014 iommu
->flush
.flush_context(iommu
, did_old
,
2015 (((u16
)bus
) << 8) | devfn
,
2016 DMA_CCMD_MASK_NOBIT
,
2017 DMA_CCMD_DEVICE_INVL
);
2018 iommu
->flush
.flush_iotlb(iommu
, did_old
, 0, 0,
2023 context_clear_entry(context
);
2025 if (sm_supported(iommu
)) {
2030 /* Setup the PASID DIR pointer: */
2031 pds
= context_get_sm_pds(table
);
2032 context
->lo
= (u64
)virt_to_phys(table
->table
) |
2035 /* Setup the RID_PASID field: */
2036 context_set_sm_rid2pasid(context
, PASID_RID2PASID
);
2039 * Setup the Device-TLB enable bit and Page request
2042 info
= iommu_support_dev_iotlb(domain
, iommu
, bus
, devfn
);
2043 if (info
&& info
->ats_supported
)
2044 context_set_sm_dte(context
);
2045 if (info
&& info
->pri_supported
)
2046 context_set_sm_pre(context
);
2048 struct dma_pte
*pgd
= domain
->pgd
;
2051 context_set_domain_id(context
, did
);
2053 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
2055 * Skip top levels of page tables for iommu which has
2056 * less agaw than default. Unnecessary for PT mode.
2058 for (agaw
= domain
->agaw
; agaw
> iommu
->agaw
; agaw
--) {
2060 pgd
= phys_to_virt(dma_pte_addr(pgd
));
2061 if (!dma_pte_present(pgd
))
2065 info
= iommu_support_dev_iotlb(domain
, iommu
, bus
, devfn
);
2066 if (info
&& info
->ats_supported
)
2067 translation
= CONTEXT_TT_DEV_IOTLB
;
2069 translation
= CONTEXT_TT_MULTI_LEVEL
;
2071 context_set_address_root(context
, virt_to_phys(pgd
));
2072 context_set_address_width(context
, agaw
);
2075 * In pass through mode, AW must be programmed to
2076 * indicate the largest AGAW value supported by
2077 * hardware. And ASR is ignored by hardware.
2079 context_set_address_width(context
, iommu
->msagaw
);
2082 context_set_translation_type(context
, translation
);
2085 context_set_fault_enable(context
);
2086 context_set_present(context
);
2087 domain_flush_cache(domain
, context
, sizeof(*context
));
2090 * It's a non-present to present mapping. If hardware doesn't cache
2091 * non-present entry we only need to flush the write-buffer. If the
2092 * _does_ cache non-present entries, then it does so in the special
2093 * domain #0, which we have to flush:
2095 if (cap_caching_mode(iommu
->cap
)) {
2096 iommu
->flush
.flush_context(iommu
, 0,
2097 (((u16
)bus
) << 8) | devfn
,
2098 DMA_CCMD_MASK_NOBIT
,
2099 DMA_CCMD_DEVICE_INVL
);
2100 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
2102 iommu_flush_write_buffer(iommu
);
2104 iommu_enable_dev_iotlb(info
);
2109 spin_unlock(&iommu
->lock
);
2110 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2115 struct domain_context_mapping_data
{
2116 struct dmar_domain
*domain
;
2117 struct intel_iommu
*iommu
;
2118 struct pasid_table
*table
;
2121 static int domain_context_mapping_cb(struct pci_dev
*pdev
,
2122 u16 alias
, void *opaque
)
2124 struct domain_context_mapping_data
*data
= opaque
;
2126 return domain_context_mapping_one(data
->domain
, data
->iommu
,
2127 data
->table
, PCI_BUS_NUM(alias
),
2132 domain_context_mapping(struct dmar_domain
*domain
, struct device
*dev
)
2134 struct domain_context_mapping_data data
;
2135 struct pasid_table
*table
;
2136 struct intel_iommu
*iommu
;
2139 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2143 table
= intel_pasid_get_table(dev
);
2145 if (!dev_is_pci(dev
))
2146 return domain_context_mapping_one(domain
, iommu
, table
,
2149 data
.domain
= domain
;
2153 return pci_for_each_dma_alias(to_pci_dev(dev
),
2154 &domain_context_mapping_cb
, &data
);
2157 static int domain_context_mapped_cb(struct pci_dev
*pdev
,
2158 u16 alias
, void *opaque
)
2160 struct intel_iommu
*iommu
= opaque
;
2162 return !device_context_mapped(iommu
, PCI_BUS_NUM(alias
), alias
& 0xff);
2165 static int domain_context_mapped(struct device
*dev
)
2167 struct intel_iommu
*iommu
;
2170 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2174 if (!dev_is_pci(dev
))
2175 return device_context_mapped(iommu
, bus
, devfn
);
2177 return !pci_for_each_dma_alias(to_pci_dev(dev
),
2178 domain_context_mapped_cb
, iommu
);
2181 /* Returns a number of VTD pages, but aligned to MM page size */
2182 static inline unsigned long aligned_nrpages(unsigned long host_addr
,
2185 host_addr
&= ~PAGE_MASK
;
2186 return PAGE_ALIGN(host_addr
+ size
) >> VTD_PAGE_SHIFT
;
2189 /* Return largest possible superpage level for a given mapping */
2190 static inline int hardware_largepage_caps(struct dmar_domain
*domain
,
2191 unsigned long iov_pfn
,
2192 unsigned long phy_pfn
,
2193 unsigned long pages
)
2195 int support
, level
= 1;
2196 unsigned long pfnmerge
;
2198 support
= domain
->iommu_superpage
;
2200 /* To use a large page, the virtual *and* physical addresses
2201 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2202 of them will mean we have to use smaller pages. So just
2203 merge them and check both at once. */
2204 pfnmerge
= iov_pfn
| phy_pfn
;
2206 while (support
&& !(pfnmerge
& ~VTD_STRIDE_MASK
)) {
2207 pages
>>= VTD_STRIDE_SHIFT
;
2210 pfnmerge
>>= VTD_STRIDE_SHIFT
;
2217 static int __domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2218 struct scatterlist
*sg
, unsigned long phys_pfn
,
2219 unsigned long nr_pages
, int prot
)
2221 struct dma_pte
*first_pte
= NULL
, *pte
= NULL
;
2222 phys_addr_t
uninitialized_var(pteval
);
2223 unsigned long sg_res
= 0;
2224 unsigned int largepage_lvl
= 0;
2225 unsigned long lvl_pages
= 0;
2227 BUG_ON(!domain_pfn_supported(domain
, iov_pfn
+ nr_pages
- 1));
2229 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
2232 prot
&= DMA_PTE_READ
| DMA_PTE_WRITE
| DMA_PTE_SNP
;
2236 pteval
= ((phys_addr_t
)phys_pfn
<< VTD_PAGE_SHIFT
) | prot
;
2239 while (nr_pages
> 0) {
2243 unsigned int pgoff
= sg
->offset
& ~PAGE_MASK
;
2245 sg_res
= aligned_nrpages(sg
->offset
, sg
->length
);
2246 sg
->dma_address
= ((dma_addr_t
)iov_pfn
<< VTD_PAGE_SHIFT
) + pgoff
;
2247 sg
->dma_length
= sg
->length
;
2248 pteval
= (sg_phys(sg
) - pgoff
) | prot
;
2249 phys_pfn
= pteval
>> VTD_PAGE_SHIFT
;
2253 largepage_lvl
= hardware_largepage_caps(domain
, iov_pfn
, phys_pfn
, sg_res
);
2255 first_pte
= pte
= pfn_to_dma_pte(domain
, iov_pfn
, &largepage_lvl
);
2258 /* It is large page*/
2259 if (largepage_lvl
> 1) {
2260 unsigned long nr_superpages
, end_pfn
;
2262 pteval
|= DMA_PTE_LARGE_PAGE
;
2263 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
2265 nr_superpages
= sg_res
/ lvl_pages
;
2266 end_pfn
= iov_pfn
+ nr_superpages
* lvl_pages
- 1;
2269 * Ensure that old small page tables are
2270 * removed to make room for superpage(s).
2271 * We're adding new large pages, so make sure
2272 * we don't remove their parent tables.
2274 dma_pte_free_pagetable(domain
, iov_pfn
, end_pfn
,
2277 pteval
&= ~(uint64_t)DMA_PTE_LARGE_PAGE
;
2281 /* We don't need lock here, nobody else
2282 * touches the iova range
2284 tmp
= cmpxchg64_local(&pte
->val
, 0ULL, pteval
);
2286 static int dumps
= 5;
2287 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2288 iov_pfn
, tmp
, (unsigned long long)pteval
);
2291 debug_dma_dump_mappings(NULL
);
2296 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
2298 BUG_ON(nr_pages
< lvl_pages
);
2299 BUG_ON(sg_res
< lvl_pages
);
2301 nr_pages
-= lvl_pages
;
2302 iov_pfn
+= lvl_pages
;
2303 phys_pfn
+= lvl_pages
;
2304 pteval
+= lvl_pages
* VTD_PAGE_SIZE
;
2305 sg_res
-= lvl_pages
;
2307 /* If the next PTE would be the first in a new page, then we
2308 need to flush the cache on the entries we've just written.
2309 And then we'll need to recalculate 'pte', so clear it and
2310 let it get set again in the if (!pte) block above.
2312 If we're done (!nr_pages) we need to flush the cache too.
2314 Also if we've been setting superpages, we may need to
2315 recalculate 'pte' and switch back to smaller pages for the
2316 end of the mapping, if the trailing size is not enough to
2317 use another superpage (i.e. sg_res < lvl_pages). */
2319 if (!nr_pages
|| first_pte_in_page(pte
) ||
2320 (largepage_lvl
> 1 && sg_res
< lvl_pages
)) {
2321 domain_flush_cache(domain
, first_pte
,
2322 (void *)pte
- (void *)first_pte
);
2326 if (!sg_res
&& nr_pages
)
2332 static int domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2333 struct scatterlist
*sg
, unsigned long phys_pfn
,
2334 unsigned long nr_pages
, int prot
)
2337 struct intel_iommu
*iommu
;
2339 /* Do the real mapping first */
2340 ret
= __domain_mapping(domain
, iov_pfn
, sg
, phys_pfn
, nr_pages
, prot
);
2344 for_each_domain_iommu(iommu_id
, domain
) {
2345 iommu
= g_iommus
[iommu_id
];
2346 __mapping_notify_one(iommu
, domain
, iov_pfn
, nr_pages
);
2352 static inline int domain_sg_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2353 struct scatterlist
*sg
, unsigned long nr_pages
,
2356 return domain_mapping(domain
, iov_pfn
, sg
, 0, nr_pages
, prot
);
2359 static inline int domain_pfn_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2360 unsigned long phys_pfn
, unsigned long nr_pages
,
2363 return domain_mapping(domain
, iov_pfn
, NULL
, phys_pfn
, nr_pages
, prot
);
2366 static void domain_context_clear_one(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
2368 unsigned long flags
;
2369 struct context_entry
*context
;
2375 spin_lock_irqsave(&iommu
->lock
, flags
);
2376 context
= iommu_context_addr(iommu
, bus
, devfn
, 0);
2378 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2381 did_old
= context_domain_id(context
);
2382 context_clear_entry(context
);
2383 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
2384 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2385 iommu
->flush
.flush_context(iommu
,
2387 (((u16
)bus
) << 8) | devfn
,
2388 DMA_CCMD_MASK_NOBIT
,
2389 DMA_CCMD_DEVICE_INVL
);
2390 iommu
->flush
.flush_iotlb(iommu
,
2397 static inline void unlink_domain_info(struct device_domain_info
*info
)
2399 assert_spin_locked(&device_domain_lock
);
2400 list_del(&info
->link
);
2401 list_del(&info
->global
);
2403 info
->dev
->archdata
.iommu
= NULL
;
2406 static void domain_remove_dev_info(struct dmar_domain
*domain
)
2408 struct device_domain_info
*info
, *tmp
;
2409 unsigned long flags
;
2411 spin_lock_irqsave(&device_domain_lock
, flags
);
2412 list_for_each_entry_safe(info
, tmp
, &domain
->devices
, link
)
2413 __dmar_remove_one_dev_info(info
);
2414 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2417 static struct dmar_domain
*find_domain(struct device
*dev
)
2419 struct device_domain_info
*info
;
2421 if (unlikely(dev
->archdata
.iommu
== DEFER_DEVICE_DOMAIN_INFO
||
2422 dev
->archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
))
2425 if (dev_is_pci(dev
))
2426 dev
= &pci_real_dma_dev(to_pci_dev(dev
))->dev
;
2428 /* No lock here, assumes no domain exit in normal case */
2429 info
= dev
->archdata
.iommu
;
2431 return info
->domain
;
2436 static struct dmar_domain
*deferred_attach_domain(struct device
*dev
)
2438 if (unlikely(dev
->archdata
.iommu
== DEFER_DEVICE_DOMAIN_INFO
)) {
2439 struct iommu_domain
*domain
;
2441 dev
->archdata
.iommu
= NULL
;
2442 domain
= iommu_get_domain_for_dev(dev
);
2444 intel_iommu_attach_device(domain
, dev
);
2447 return find_domain(dev
);
2450 static inline struct device_domain_info
*
2451 dmar_search_domain_by_dev_info(int segment
, int bus
, int devfn
)
2453 struct device_domain_info
*info
;
2455 list_for_each_entry(info
, &device_domain_list
, global
)
2456 if (info
->iommu
->segment
== segment
&& info
->bus
== bus
&&
2457 info
->devfn
== devfn
)
2463 static struct dmar_domain
*dmar_insert_one_dev_info(struct intel_iommu
*iommu
,
2466 struct dmar_domain
*domain
)
2468 struct dmar_domain
*found
= NULL
;
2469 struct device_domain_info
*info
;
2470 unsigned long flags
;
2473 info
= alloc_devinfo_mem();
2478 info
->devfn
= devfn
;
2479 info
->ats_supported
= info
->pasid_supported
= info
->pri_supported
= 0;
2480 info
->ats_enabled
= info
->pasid_enabled
= info
->pri_enabled
= 0;
2483 info
->domain
= domain
;
2484 info
->iommu
= iommu
;
2485 info
->pasid_table
= NULL
;
2486 info
->auxd_enabled
= 0;
2487 INIT_LIST_HEAD(&info
->auxiliary_domains
);
2489 if (dev
&& dev_is_pci(dev
)) {
2490 struct pci_dev
*pdev
= to_pci_dev(info
->dev
);
2492 if (!pdev
->untrusted
&&
2493 !pci_ats_disabled() &&
2494 ecap_dev_iotlb_support(iommu
->ecap
) &&
2495 pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_ATS
) &&
2496 dmar_find_matched_atsr_unit(pdev
))
2497 info
->ats_supported
= 1;
2499 if (sm_supported(iommu
)) {
2500 if (pasid_supported(iommu
)) {
2501 int features
= pci_pasid_features(pdev
);
2503 info
->pasid_supported
= features
| 1;
2506 if (info
->ats_supported
&& ecap_prs(iommu
->ecap
) &&
2507 pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_PRI
))
2508 info
->pri_supported
= 1;
2512 spin_lock_irqsave(&device_domain_lock
, flags
);
2514 found
= find_domain(dev
);
2517 struct device_domain_info
*info2
;
2518 info2
= dmar_search_domain_by_dev_info(iommu
->segment
, bus
, devfn
);
2520 found
= info2
->domain
;
2526 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2527 free_devinfo_mem(info
);
2528 /* Caller must free the original domain */
2532 spin_lock(&iommu
->lock
);
2533 ret
= domain_attach_iommu(domain
, iommu
);
2534 spin_unlock(&iommu
->lock
);
2537 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2538 free_devinfo_mem(info
);
2542 list_add(&info
->link
, &domain
->devices
);
2543 list_add(&info
->global
, &device_domain_list
);
2545 dev
->archdata
.iommu
= info
;
2546 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2548 /* PASID table is mandatory for a PCI device in scalable mode. */
2549 if (dev
&& dev_is_pci(dev
) && sm_supported(iommu
)) {
2550 ret
= intel_pasid_alloc_table(dev
);
2552 dev_err(dev
, "PASID table allocation failed\n");
2553 dmar_remove_one_dev_info(dev
);
2557 /* Setup the PASID entry for requests without PASID: */
2558 spin_lock(&iommu
->lock
);
2559 if (hw_pass_through
&& domain_type_is_si(domain
))
2560 ret
= intel_pasid_setup_pass_through(iommu
, domain
,
2561 dev
, PASID_RID2PASID
);
2563 ret
= intel_pasid_setup_second_level(iommu
, domain
,
2564 dev
, PASID_RID2PASID
);
2565 spin_unlock(&iommu
->lock
);
2567 dev_err(dev
, "Setup RID2PASID failed\n");
2568 dmar_remove_one_dev_info(dev
);
2573 if (dev
&& domain_context_mapping(domain
, dev
)) {
2574 dev_err(dev
, "Domain context map failed\n");
2575 dmar_remove_one_dev_info(dev
);
2582 static int get_last_alias(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
2584 *(u16
*)opaque
= alias
;
2588 static struct dmar_domain
*find_or_alloc_domain(struct device
*dev
, int gaw
)
2590 struct device_domain_info
*info
;
2591 struct dmar_domain
*domain
= NULL
;
2592 struct intel_iommu
*iommu
;
2594 unsigned long flags
;
2597 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2601 if (dev_is_pci(dev
)) {
2602 struct pci_dev
*pdev
= to_pci_dev(dev
);
2604 pci_for_each_dma_alias(pdev
, get_last_alias
, &dma_alias
);
2606 spin_lock_irqsave(&device_domain_lock
, flags
);
2607 info
= dmar_search_domain_by_dev_info(pci_domain_nr(pdev
->bus
),
2608 PCI_BUS_NUM(dma_alias
),
2611 iommu
= info
->iommu
;
2612 domain
= info
->domain
;
2614 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2616 /* DMA alias already has a domain, use it */
2621 /* Allocate and initialize new domain for the device */
2622 domain
= alloc_domain(0);
2625 if (domain_init(domain
, iommu
, gaw
)) {
2626 domain_exit(domain
);
2634 static struct dmar_domain
*set_domain_for_dev(struct device
*dev
,
2635 struct dmar_domain
*domain
)
2637 struct intel_iommu
*iommu
;
2638 struct dmar_domain
*tmp
;
2639 u16 req_id
, dma_alias
;
2642 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2646 req_id
= ((u16
)bus
<< 8) | devfn
;
2648 if (dev_is_pci(dev
)) {
2649 struct pci_dev
*pdev
= to_pci_dev(dev
);
2651 pci_for_each_dma_alias(pdev
, get_last_alias
, &dma_alias
);
2653 /* register PCI DMA alias device */
2654 if (req_id
!= dma_alias
) {
2655 tmp
= dmar_insert_one_dev_info(iommu
, PCI_BUS_NUM(dma_alias
),
2656 dma_alias
& 0xff, NULL
, domain
);
2658 if (!tmp
|| tmp
!= domain
)
2663 tmp
= dmar_insert_one_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2664 if (!tmp
|| tmp
!= domain
)
2670 static int iommu_domain_identity_map(struct dmar_domain
*domain
,
2671 unsigned long long start
,
2672 unsigned long long end
)
2674 unsigned long first_vpfn
= start
>> VTD_PAGE_SHIFT
;
2675 unsigned long last_vpfn
= end
>> VTD_PAGE_SHIFT
;
2677 if (!reserve_iova(&domain
->iovad
, dma_to_mm_pfn(first_vpfn
),
2678 dma_to_mm_pfn(last_vpfn
))) {
2679 pr_err("Reserving iova failed\n");
2683 pr_debug("Mapping reserved region %llx-%llx\n", start
, end
);
2685 * RMRR range might have overlap with physical memory range,
2688 dma_pte_clear_range(domain
, first_vpfn
, last_vpfn
);
2690 return __domain_mapping(domain
, first_vpfn
, NULL
,
2691 first_vpfn
, last_vpfn
- first_vpfn
+ 1,
2692 DMA_PTE_READ
|DMA_PTE_WRITE
);
2695 static int domain_prepare_identity_map(struct device
*dev
,
2696 struct dmar_domain
*domain
,
2697 unsigned long long start
,
2698 unsigned long long end
)
2700 /* For _hardware_ passthrough, don't bother. But for software
2701 passthrough, we do it anyway -- it may indicate a memory
2702 range which is reserved in E820, so which didn't get set
2703 up to start with in si_domain */
2704 if (domain
== si_domain
&& hw_pass_through
) {
2705 dev_warn(dev
, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2710 dev_info(dev
, "Setting identity map [0x%Lx - 0x%Lx]\n", start
, end
);
2713 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2714 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2715 dmi_get_system_info(DMI_BIOS_VENDOR
),
2716 dmi_get_system_info(DMI_BIOS_VERSION
),
2717 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2721 if (end
>> agaw_to_width(domain
->agaw
)) {
2722 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2723 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2724 agaw_to_width(domain
->agaw
),
2725 dmi_get_system_info(DMI_BIOS_VENDOR
),
2726 dmi_get_system_info(DMI_BIOS_VERSION
),
2727 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2731 return iommu_domain_identity_map(domain
, start
, end
);
2734 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
);
2736 static int __init
si_domain_init(int hw
)
2738 struct dmar_rmrr_unit
*rmrr
;
2742 si_domain
= alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY
);
2746 if (md_domain_init(si_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2747 domain_exit(si_domain
);
2754 for_each_online_node(nid
) {
2755 unsigned long start_pfn
, end_pfn
;
2758 for_each_mem_pfn_range(i
, nid
, &start_pfn
, &end_pfn
, NULL
) {
2759 ret
= iommu_domain_identity_map(si_domain
,
2760 PFN_PHYS(start_pfn
), PFN_PHYS(end_pfn
));
2767 * Normally we use DMA domains for devices which have RMRRs. But we
2768 * loose this requirement for graphic and usb devices. Identity map
2769 * the RMRRs for graphic and USB devices so that they could use the
2772 for_each_rmrr_units(rmrr
) {
2773 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
2775 unsigned long long start
= rmrr
->base_address
;
2776 unsigned long long end
= rmrr
->end_address
;
2778 if (device_is_rmrr_locked(dev
))
2781 if (WARN_ON(end
< start
||
2782 end
>> agaw_to_width(si_domain
->agaw
)))
2785 ret
= iommu_domain_identity_map(si_domain
, start
, end
);
2794 static int identity_mapping(struct device
*dev
)
2796 struct device_domain_info
*info
;
2798 info
= dev
->archdata
.iommu
;
2799 if (info
&& info
!= DUMMY_DEVICE_DOMAIN_INFO
&& info
!= DEFER_DEVICE_DOMAIN_INFO
)
2800 return (info
->domain
== si_domain
);
2805 static int domain_add_dev_info(struct dmar_domain
*domain
, struct device
*dev
)
2807 struct dmar_domain
*ndomain
;
2808 struct intel_iommu
*iommu
;
2811 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2815 ndomain
= dmar_insert_one_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2816 if (ndomain
!= domain
)
2822 static bool device_has_rmrr(struct device
*dev
)
2824 struct dmar_rmrr_unit
*rmrr
;
2829 for_each_rmrr_units(rmrr
) {
2831 * Return TRUE if this RMRR contains the device that
2834 for_each_active_dev_scope(rmrr
->devices
,
2835 rmrr
->devices_cnt
, i
, tmp
)
2837 is_downstream_to_pci_bridge(dev
, tmp
)) {
2847 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2848 * is relaxable (ie. is allowed to be not enforced under some conditions)
2849 * @dev: device handle
2851 * We assume that PCI USB devices with RMRRs have them largely
2852 * for historical reasons and that the RMRR space is not actively used post
2853 * boot. This exclusion may change if vendors begin to abuse it.
2855 * The same exception is made for graphics devices, with the requirement that
2856 * any use of the RMRR regions will be torn down before assigning the device
2859 * Return: true if the RMRR is relaxable, false otherwise
2861 static bool device_rmrr_is_relaxable(struct device
*dev
)
2863 struct pci_dev
*pdev
;
2865 if (!dev_is_pci(dev
))
2868 pdev
= to_pci_dev(dev
);
2869 if (IS_USB_DEVICE(pdev
) || IS_GFX_DEVICE(pdev
))
2876 * There are a couple cases where we need to restrict the functionality of
2877 * devices associated with RMRRs. The first is when evaluating a device for
2878 * identity mapping because problems exist when devices are moved in and out
2879 * of domains and their respective RMRR information is lost. This means that
2880 * a device with associated RMRRs will never be in a "passthrough" domain.
2881 * The second is use of the device through the IOMMU API. This interface
2882 * expects to have full control of the IOVA space for the device. We cannot
2883 * satisfy both the requirement that RMRR access is maintained and have an
2884 * unencumbered IOVA space. We also have no ability to quiesce the device's
2885 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2886 * We therefore prevent devices associated with an RMRR from participating in
2887 * the IOMMU API, which eliminates them from device assignment.
2889 * In both cases, devices which have relaxable RMRRs are not concerned by this
2890 * restriction. See device_rmrr_is_relaxable comment.
2892 static bool device_is_rmrr_locked(struct device
*dev
)
2894 if (!device_has_rmrr(dev
))
2897 if (device_rmrr_is_relaxable(dev
))
2904 * Return the required default domain type for a specific device.
2906 * @dev: the device in query
2907 * @startup: true if this is during early boot
2910 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2911 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2912 * - 0: both identity and dynamic domains work for this device
2914 static int device_def_domain_type(struct device
*dev
)
2916 if (dev_is_pci(dev
)) {
2917 struct pci_dev
*pdev
= to_pci_dev(dev
);
2919 if (device_is_rmrr_locked(dev
))
2920 return IOMMU_DOMAIN_DMA
;
2923 * Prevent any device marked as untrusted from getting
2924 * placed into the statically identity mapping domain.
2926 if (pdev
->untrusted
)
2927 return IOMMU_DOMAIN_DMA
;
2929 if ((iommu_identity_mapping
& IDENTMAP_AZALIA
) && IS_AZALIA(pdev
))
2930 return IOMMU_DOMAIN_IDENTITY
;
2932 if ((iommu_identity_mapping
& IDENTMAP_GFX
) && IS_GFX_DEVICE(pdev
))
2933 return IOMMU_DOMAIN_IDENTITY
;
2936 * We want to start off with all devices in the 1:1 domain, and
2937 * take them out later if we find they can't access all of memory.
2939 * However, we can't do this for PCI devices behind bridges,
2940 * because all PCI devices behind the same bridge will end up
2941 * with the same source-id on their transactions.
2943 * Practically speaking, we can't change things around for these
2944 * devices at run-time, because we can't be sure there'll be no
2945 * DMA transactions in flight for any of their siblings.
2947 * So PCI devices (unless they're on the root bus) as well as
2948 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2949 * the 1:1 domain, just in _case_ one of their siblings turns out
2950 * not to be able to map all of memory.
2952 if (!pci_is_pcie(pdev
)) {
2953 if (!pci_is_root_bus(pdev
->bus
))
2954 return IOMMU_DOMAIN_DMA
;
2955 if (pdev
->class >> 8 == PCI_CLASS_BRIDGE_PCI
)
2956 return IOMMU_DOMAIN_DMA
;
2957 } else if (pci_pcie_type(pdev
) == PCI_EXP_TYPE_PCI_BRIDGE
)
2958 return IOMMU_DOMAIN_DMA
;
2960 if (device_has_rmrr(dev
))
2961 return IOMMU_DOMAIN_DMA
;
2964 return (iommu_identity_mapping
& IDENTMAP_ALL
) ?
2965 IOMMU_DOMAIN_IDENTITY
: 0;
2968 static void intel_iommu_init_qi(struct intel_iommu
*iommu
)
2971 * Start from the sane iommu hardware state.
2972 * If the queued invalidation is already initialized by us
2973 * (for example, while enabling interrupt-remapping) then
2974 * we got the things already rolling from a sane state.
2978 * Clear any previous faults.
2980 dmar_fault(-1, iommu
);
2982 * Disable queued invalidation if supported and already enabled
2983 * before OS handover.
2985 dmar_disable_qi(iommu
);
2988 if (dmar_enable_qi(iommu
)) {
2990 * Queued Invalidate not enabled, use Register Based Invalidate
2992 iommu
->flush
.flush_context
= __iommu_flush_context
;
2993 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
2994 pr_info("%s: Using Register based invalidation\n",
2997 iommu
->flush
.flush_context
= qi_flush_context
;
2998 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2999 pr_info("%s: Using Queued invalidation\n", iommu
->name
);
3003 static int copy_context_table(struct intel_iommu
*iommu
,
3004 struct root_entry
*old_re
,
3005 struct context_entry
**tbl
,
3008 int tbl_idx
, pos
= 0, idx
, devfn
, ret
= 0, did
;
3009 struct context_entry
*new_ce
= NULL
, ce
;
3010 struct context_entry
*old_ce
= NULL
;
3011 struct root_entry re
;
3012 phys_addr_t old_ce_phys
;
3014 tbl_idx
= ext
? bus
* 2 : bus
;
3015 memcpy(&re
, old_re
, sizeof(re
));
3017 for (devfn
= 0; devfn
< 256; devfn
++) {
3018 /* First calculate the correct index */
3019 idx
= (ext
? devfn
* 2 : devfn
) % 256;
3022 /* First save what we may have and clean up */
3024 tbl
[tbl_idx
] = new_ce
;
3025 __iommu_flush_cache(iommu
, new_ce
,
3035 old_ce_phys
= root_entry_lctp(&re
);
3037 old_ce_phys
= root_entry_uctp(&re
);
3040 if (ext
&& devfn
== 0) {
3041 /* No LCTP, try UCTP */
3050 old_ce
= memremap(old_ce_phys
, PAGE_SIZE
,
3055 new_ce
= alloc_pgtable_page(iommu
->node
);
3062 /* Now copy the context entry */
3063 memcpy(&ce
, old_ce
+ idx
, sizeof(ce
));
3065 if (!__context_present(&ce
))
3068 did
= context_domain_id(&ce
);
3069 if (did
>= 0 && did
< cap_ndoms(iommu
->cap
))
3070 set_bit(did
, iommu
->domain_ids
);
3073 * We need a marker for copied context entries. This
3074 * marker needs to work for the old format as well as
3075 * for extended context entries.
3077 * Bit 67 of the context entry is used. In the old
3078 * format this bit is available to software, in the
3079 * extended format it is the PGE bit, but PGE is ignored
3080 * by HW if PASIDs are disabled (and thus still
3083 * So disable PASIDs first and then mark the entry
3084 * copied. This means that we don't copy PASID
3085 * translations from the old kernel, but this is fine as
3086 * faults there are not fatal.
3088 context_clear_pasid_enable(&ce
);
3089 context_set_copied(&ce
);
3094 tbl
[tbl_idx
+ pos
] = new_ce
;
3096 __iommu_flush_cache(iommu
, new_ce
, VTD_PAGE_SIZE
);
3105 static int copy_translation_tables(struct intel_iommu
*iommu
)
3107 struct context_entry
**ctxt_tbls
;
3108 struct root_entry
*old_rt
;
3109 phys_addr_t old_rt_phys
;
3110 int ctxt_table_entries
;
3111 unsigned long flags
;
3116 rtaddr_reg
= dmar_readq(iommu
->reg
+ DMAR_RTADDR_REG
);
3117 ext
= !!(rtaddr_reg
& DMA_RTADDR_RTT
);
3118 new_ext
= !!ecap_ecs(iommu
->ecap
);
3121 * The RTT bit can only be changed when translation is disabled,
3122 * but disabling translation means to open a window for data
3123 * corruption. So bail out and don't copy anything if we would
3124 * have to change the bit.
3129 old_rt_phys
= rtaddr_reg
& VTD_PAGE_MASK
;
3133 old_rt
= memremap(old_rt_phys
, PAGE_SIZE
, MEMREMAP_WB
);
3137 /* This is too big for the stack - allocate it from slab */
3138 ctxt_table_entries
= ext
? 512 : 256;
3140 ctxt_tbls
= kcalloc(ctxt_table_entries
, sizeof(void *), GFP_KERNEL
);
3144 for (bus
= 0; bus
< 256; bus
++) {
3145 ret
= copy_context_table(iommu
, &old_rt
[bus
],
3146 ctxt_tbls
, bus
, ext
);
3148 pr_err("%s: Failed to copy context table for bus %d\n",
3154 spin_lock_irqsave(&iommu
->lock
, flags
);
3156 /* Context tables are copied, now write them to the root_entry table */
3157 for (bus
= 0; bus
< 256; bus
++) {
3158 int idx
= ext
? bus
* 2 : bus
;
3161 if (ctxt_tbls
[idx
]) {
3162 val
= virt_to_phys(ctxt_tbls
[idx
]) | 1;
3163 iommu
->root_entry
[bus
].lo
= val
;
3166 if (!ext
|| !ctxt_tbls
[idx
+ 1])
3169 val
= virt_to_phys(ctxt_tbls
[idx
+ 1]) | 1;
3170 iommu
->root_entry
[bus
].hi
= val
;
3173 spin_unlock_irqrestore(&iommu
->lock
, flags
);
3177 __iommu_flush_cache(iommu
, iommu
->root_entry
, PAGE_SIZE
);
3187 static int __init
init_dmars(void)
3189 struct dmar_drhd_unit
*drhd
;
3190 struct intel_iommu
*iommu
;
3196 * initialize and program root entry to not present
3199 for_each_drhd_unit(drhd
) {
3201 * lock not needed as this is only incremented in the single
3202 * threaded kernel __init code path all other access are read
3205 if (g_num_of_iommus
< DMAR_UNITS_SUPPORTED
) {
3209 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED
);
3212 /* Preallocate enough resources for IOMMU hot-addition */
3213 if (g_num_of_iommus
< DMAR_UNITS_SUPPORTED
)
3214 g_num_of_iommus
= DMAR_UNITS_SUPPORTED
;
3216 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
3219 pr_err("Allocating global iommu array failed\n");
3224 for_each_iommu(iommu
, drhd
) {
3225 if (drhd
->ignored
) {
3226 iommu_disable_translation(iommu
);
3231 * Find the max pasid size of all IOMMU's in the system.
3232 * We need to ensure the system pasid table is no bigger
3233 * than the smallest supported.
3235 if (pasid_supported(iommu
)) {
3236 u32 temp
= 2 << ecap_pss(iommu
->ecap
);
3238 intel_pasid_max_id
= min_t(u32
, temp
,
3239 intel_pasid_max_id
);
3242 g_iommus
[iommu
->seq_id
] = iommu
;
3244 intel_iommu_init_qi(iommu
);
3246 ret
= iommu_init_domains(iommu
);
3250 init_translation_status(iommu
);
3252 if (translation_pre_enabled(iommu
) && !is_kdump_kernel()) {
3253 iommu_disable_translation(iommu
);
3254 clear_translation_pre_enabled(iommu
);
3255 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3261 * we could share the same root & context tables
3262 * among all IOMMU's. Need to Split it later.
3264 ret
= iommu_alloc_root_entry(iommu
);
3268 if (translation_pre_enabled(iommu
)) {
3269 pr_info("Translation already enabled - trying to copy translation structures\n");
3271 ret
= copy_translation_tables(iommu
);
3274 * We found the IOMMU with translation
3275 * enabled - but failed to copy over the
3276 * old root-entry table. Try to proceed
3277 * by disabling translation now and
3278 * allocating a clean root-entry table.
3279 * This might cause DMAR faults, but
3280 * probably the dump will still succeed.
3282 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3284 iommu_disable_translation(iommu
);
3285 clear_translation_pre_enabled(iommu
);
3287 pr_info("Copied translation tables from previous kernel for %s\n",
3292 if (!ecap_pass_through(iommu
->ecap
))
3293 hw_pass_through
= 0;
3294 #ifdef CONFIG_INTEL_IOMMU_SVM
3295 if (pasid_supported(iommu
))
3296 intel_svm_init(iommu
);
3301 * Now that qi is enabled on all iommus, set the root entry and flush
3302 * caches. This is required on some Intel X58 chipsets, otherwise the
3303 * flush_context function will loop forever and the boot hangs.
3305 for_each_active_iommu(iommu
, drhd
) {
3306 iommu_flush_write_buffer(iommu
);
3307 iommu_set_root_entry(iommu
);
3308 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
3309 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
3312 if (iommu_default_passthrough())
3313 iommu_identity_mapping
|= IDENTMAP_ALL
;
3315 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3320 iommu_identity_mapping
|= IDENTMAP_GFX
;
3322 check_tylersburg_isoch();
3324 ret
= si_domain_init(hw_pass_through
);
3331 * global invalidate context cache
3332 * global invalidate iotlb
3333 * enable translation
3335 for_each_iommu(iommu
, drhd
) {
3336 if (drhd
->ignored
) {
3338 * we always have to disable PMRs or DMA may fail on
3342 iommu_disable_protect_mem_regions(iommu
);
3346 iommu_flush_write_buffer(iommu
);
3348 #ifdef CONFIG_INTEL_IOMMU_SVM
3349 if (pasid_supported(iommu
) && ecap_prs(iommu
->ecap
)) {
3351 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3352 * could cause possible lock race condition.
3354 up_write(&dmar_global_lock
);
3355 ret
= intel_svm_enable_prq(iommu
);
3356 down_write(&dmar_global_lock
);
3361 ret
= dmar_set_interrupt(iommu
);
3369 for_each_active_iommu(iommu
, drhd
) {
3370 disable_dmar_iommu(iommu
);
3371 free_dmar_iommu(iommu
);
3380 /* This takes a number of _MM_ pages, not VTD pages */
3381 static unsigned long intel_alloc_iova(struct device
*dev
,
3382 struct dmar_domain
*domain
,
3383 unsigned long nrpages
, uint64_t dma_mask
)
3385 unsigned long iova_pfn
;
3387 /* Restrict dma_mask to the width that the iommu can handle */
3388 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
), dma_mask
);
3389 /* Ensure we reserve the whole size-aligned region */
3390 nrpages
= __roundup_pow_of_two(nrpages
);
3392 if (!dmar_forcedac
&& dma_mask
> DMA_BIT_MASK(32)) {
3394 * First try to allocate an io virtual address in
3395 * DMA_BIT_MASK(32) and if that fails then try allocating
3398 iova_pfn
= alloc_iova_fast(&domain
->iovad
, nrpages
,
3399 IOVA_PFN(DMA_BIT_MASK(32)), false);
3403 iova_pfn
= alloc_iova_fast(&domain
->iovad
, nrpages
,
3404 IOVA_PFN(dma_mask
), true);
3405 if (unlikely(!iova_pfn
)) {
3406 dev_err(dev
, "Allocating %ld-page iova failed", nrpages
);
3413 static struct dmar_domain
*get_private_domain_for_dev(struct device
*dev
)
3415 struct dmar_domain
*domain
, *tmp
;
3416 struct dmar_rmrr_unit
*rmrr
;
3417 struct device
*i_dev
;
3420 /* Device shouldn't be attached by any domains. */
3421 domain
= find_domain(dev
);
3425 domain
= find_or_alloc_domain(dev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
3429 /* We have a new domain - setup possible RMRRs for the device */
3431 for_each_rmrr_units(rmrr
) {
3432 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
3437 ret
= domain_prepare_identity_map(dev
, domain
,
3441 dev_err(dev
, "Mapping reserved region failed\n");
3446 tmp
= set_domain_for_dev(dev
, domain
);
3447 if (!tmp
|| domain
!= tmp
) {
3448 domain_exit(domain
);
3454 dev_err(dev
, "Allocating domain failed\n");
3456 domain
->domain
.type
= IOMMU_DOMAIN_DMA
;
3461 /* Check if the dev needs to go through non-identity map and unmap process.*/
3462 static bool iommu_need_mapping(struct device
*dev
)
3466 if (iommu_dummy(dev
))
3469 ret
= identity_mapping(dev
);
3471 u64 dma_mask
= *dev
->dma_mask
;
3473 if (dev
->coherent_dma_mask
&& dev
->coherent_dma_mask
< dma_mask
)
3474 dma_mask
= dev
->coherent_dma_mask
;
3476 if (dma_mask
>= dma_direct_get_required_mask(dev
))
3480 * 32 bit DMA is removed from si_domain and fall back to
3481 * non-identity mapping.
3483 dmar_remove_one_dev_info(dev
);
3484 ret
= iommu_request_dma_domain_for_dev(dev
);
3486 struct iommu_domain
*domain
;
3487 struct dmar_domain
*dmar_domain
;
3489 domain
= iommu_get_domain_for_dev(dev
);
3491 dmar_domain
= to_dmar_domain(domain
);
3492 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
3494 dmar_remove_one_dev_info(dev
);
3495 get_private_domain_for_dev(dev
);
3498 dev_info(dev
, "32bit DMA uses non-identity mapping\n");
3504 static dma_addr_t
__intel_map_single(struct device
*dev
, phys_addr_t paddr
,
3505 size_t size
, int dir
, u64 dma_mask
)
3507 struct dmar_domain
*domain
;
3508 phys_addr_t start_paddr
;
3509 unsigned long iova_pfn
;
3512 struct intel_iommu
*iommu
;
3513 unsigned long paddr_pfn
= paddr
>> PAGE_SHIFT
;
3515 BUG_ON(dir
== DMA_NONE
);
3517 domain
= deferred_attach_domain(dev
);
3519 return DMA_MAPPING_ERROR
;
3521 iommu
= domain_get_iommu(domain
);
3522 size
= aligned_nrpages(paddr
, size
);
3524 iova_pfn
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
), dma_mask
);
3529 * Check if DMAR supports zero-length reads on write only
3532 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3533 !cap_zlr(iommu
->cap
))
3534 prot
|= DMA_PTE_READ
;
3535 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3536 prot
|= DMA_PTE_WRITE
;
3538 * paddr - (paddr + size) might be partial page, we should map the whole
3539 * page. Note: if two part of one page are separately mapped, we
3540 * might have two guest_addr mapping to the same host paddr, but this
3541 * is not a big problem
3543 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova_pfn
),
3544 mm_to_dma_pfn(paddr_pfn
), size
, prot
);
3548 start_paddr
= (phys_addr_t
)iova_pfn
<< PAGE_SHIFT
;
3549 start_paddr
+= paddr
& ~PAGE_MASK
;
3551 trace_map_single(dev
, start_paddr
, paddr
, size
<< VTD_PAGE_SHIFT
);
3557 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(size
));
3558 dev_err(dev
, "Device request: %zx@%llx dir %d --- failed\n",
3559 size
, (unsigned long long)paddr
, dir
);
3560 return DMA_MAPPING_ERROR
;
3563 static dma_addr_t
intel_map_page(struct device
*dev
, struct page
*page
,
3564 unsigned long offset
, size_t size
,
3565 enum dma_data_direction dir
,
3566 unsigned long attrs
)
3568 if (iommu_need_mapping(dev
))
3569 return __intel_map_single(dev
, page_to_phys(page
) + offset
,
3570 size
, dir
, *dev
->dma_mask
);
3571 return dma_direct_map_page(dev
, page
, offset
, size
, dir
, attrs
);
3574 static dma_addr_t
intel_map_resource(struct device
*dev
, phys_addr_t phys_addr
,
3575 size_t size
, enum dma_data_direction dir
,
3576 unsigned long attrs
)
3578 if (iommu_need_mapping(dev
))
3579 return __intel_map_single(dev
, phys_addr
, size
, dir
,
3581 return dma_direct_map_resource(dev
, phys_addr
, size
, dir
, attrs
);
3584 static void intel_unmap(struct device
*dev
, dma_addr_t dev_addr
, size_t size
)
3586 struct dmar_domain
*domain
;
3587 unsigned long start_pfn
, last_pfn
;
3588 unsigned long nrpages
;
3589 unsigned long iova_pfn
;
3590 struct intel_iommu
*iommu
;
3591 struct page
*freelist
;
3592 struct pci_dev
*pdev
= NULL
;
3594 domain
= find_domain(dev
);
3597 iommu
= domain_get_iommu(domain
);
3599 iova_pfn
= IOVA_PFN(dev_addr
);
3601 nrpages
= aligned_nrpages(dev_addr
, size
);
3602 start_pfn
= mm_to_dma_pfn(iova_pfn
);
3603 last_pfn
= start_pfn
+ nrpages
- 1;
3605 if (dev_is_pci(dev
))
3606 pdev
= to_pci_dev(dev
);
3608 freelist
= domain_unmap(domain
, start_pfn
, last_pfn
);
3609 if (intel_iommu_strict
|| (pdev
&& pdev
->untrusted
) ||
3610 !has_iova_flush_queue(&domain
->iovad
)) {
3611 iommu_flush_iotlb_psi(iommu
, domain
, start_pfn
,
3612 nrpages
, !freelist
, 0);
3614 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(nrpages
));
3615 dma_free_pagelist(freelist
);
3617 queue_iova(&domain
->iovad
, iova_pfn
, nrpages
,
3618 (unsigned long)freelist
);
3620 * queue up the release of the unmap to save the 1/6th of the
3621 * cpu used up by the iotlb flush operation...
3625 trace_unmap_single(dev
, dev_addr
, size
);
3628 static void intel_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
3629 size_t size
, enum dma_data_direction dir
,
3630 unsigned long attrs
)
3632 if (iommu_need_mapping(dev
))
3633 intel_unmap(dev
, dev_addr
, size
);
3635 dma_direct_unmap_page(dev
, dev_addr
, size
, dir
, attrs
);
3638 static void intel_unmap_resource(struct device
*dev
, dma_addr_t dev_addr
,
3639 size_t size
, enum dma_data_direction dir
, unsigned long attrs
)
3641 if (iommu_need_mapping(dev
))
3642 intel_unmap(dev
, dev_addr
, size
);
3645 static void *intel_alloc_coherent(struct device
*dev
, size_t size
,
3646 dma_addr_t
*dma_handle
, gfp_t flags
,
3647 unsigned long attrs
)
3649 struct page
*page
= NULL
;
3652 if (!iommu_need_mapping(dev
))
3653 return dma_direct_alloc(dev
, size
, dma_handle
, flags
, attrs
);
3655 size
= PAGE_ALIGN(size
);
3656 order
= get_order(size
);
3658 if (gfpflags_allow_blocking(flags
)) {
3659 unsigned int count
= size
>> PAGE_SHIFT
;
3661 page
= dma_alloc_from_contiguous(dev
, count
, order
,
3662 flags
& __GFP_NOWARN
);
3666 page
= alloc_pages(flags
, order
);
3669 memset(page_address(page
), 0, size
);
3671 *dma_handle
= __intel_map_single(dev
, page_to_phys(page
), size
,
3673 dev
->coherent_dma_mask
);
3674 if (*dma_handle
!= DMA_MAPPING_ERROR
)
3675 return page_address(page
);
3676 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3677 __free_pages(page
, order
);
3682 static void intel_free_coherent(struct device
*dev
, size_t size
, void *vaddr
,
3683 dma_addr_t dma_handle
, unsigned long attrs
)
3686 struct page
*page
= virt_to_page(vaddr
);
3688 if (!iommu_need_mapping(dev
))
3689 return dma_direct_free(dev
, size
, vaddr
, dma_handle
, attrs
);
3691 size
= PAGE_ALIGN(size
);
3692 order
= get_order(size
);
3694 intel_unmap(dev
, dma_handle
, size
);
3695 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3696 __free_pages(page
, order
);
3699 static void intel_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
,
3700 int nelems
, enum dma_data_direction dir
,
3701 unsigned long attrs
)
3703 dma_addr_t startaddr
= sg_dma_address(sglist
) & PAGE_MASK
;
3704 unsigned long nrpages
= 0;
3705 struct scatterlist
*sg
;
3708 if (!iommu_need_mapping(dev
))
3709 return dma_direct_unmap_sg(dev
, sglist
, nelems
, dir
, attrs
);
3711 for_each_sg(sglist
, sg
, nelems
, i
) {
3712 nrpages
+= aligned_nrpages(sg_dma_address(sg
), sg_dma_len(sg
));
3715 intel_unmap(dev
, startaddr
, nrpages
<< VTD_PAGE_SHIFT
);
3717 trace_unmap_sg(dev
, startaddr
, nrpages
<< VTD_PAGE_SHIFT
);
3720 static int intel_map_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3721 enum dma_data_direction dir
, unsigned long attrs
)
3724 struct dmar_domain
*domain
;
3727 unsigned long iova_pfn
;
3729 struct scatterlist
*sg
;
3730 unsigned long start_vpfn
;
3731 struct intel_iommu
*iommu
;
3733 BUG_ON(dir
== DMA_NONE
);
3734 if (!iommu_need_mapping(dev
))
3735 return dma_direct_map_sg(dev
, sglist
, nelems
, dir
, attrs
);
3737 domain
= deferred_attach_domain(dev
);
3741 iommu
= domain_get_iommu(domain
);
3743 for_each_sg(sglist
, sg
, nelems
, i
)
3744 size
+= aligned_nrpages(sg
->offset
, sg
->length
);
3746 iova_pfn
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
),
3749 sglist
->dma_length
= 0;
3754 * Check if DMAR supports zero-length reads on write only
3757 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3758 !cap_zlr(iommu
->cap
))
3759 prot
|= DMA_PTE_READ
;
3760 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3761 prot
|= DMA_PTE_WRITE
;
3763 start_vpfn
= mm_to_dma_pfn(iova_pfn
);
3765 ret
= domain_sg_mapping(domain
, start_vpfn
, sglist
, size
, prot
);
3766 if (unlikely(ret
)) {
3767 dma_pte_free_pagetable(domain
, start_vpfn
,
3768 start_vpfn
+ size
- 1,
3769 agaw_to_level(domain
->agaw
) + 1);
3770 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(size
));
3774 trace_map_sg(dev
, iova_pfn
<< PAGE_SHIFT
,
3775 sg_phys(sglist
), size
<< VTD_PAGE_SHIFT
);
3780 static u64
intel_get_required_mask(struct device
*dev
)
3782 if (!iommu_need_mapping(dev
))
3783 return dma_direct_get_required_mask(dev
);
3784 return DMA_BIT_MASK(32);
3787 static const struct dma_map_ops intel_dma_ops
= {
3788 .alloc
= intel_alloc_coherent
,
3789 .free
= intel_free_coherent
,
3790 .map_sg
= intel_map_sg
,
3791 .unmap_sg
= intel_unmap_sg
,
3792 .map_page
= intel_map_page
,
3793 .unmap_page
= intel_unmap_page
,
3794 .map_resource
= intel_map_resource
,
3795 .unmap_resource
= intel_unmap_resource
,
3796 .dma_supported
= dma_direct_supported
,
3797 .mmap
= dma_common_mmap
,
3798 .get_sgtable
= dma_common_get_sgtable
,
3799 .get_required_mask
= intel_get_required_mask
,
3803 bounce_sync_single(struct device
*dev
, dma_addr_t addr
, size_t size
,
3804 enum dma_data_direction dir
, enum dma_sync_target target
)
3806 struct dmar_domain
*domain
;
3807 phys_addr_t tlb_addr
;
3809 domain
= find_domain(dev
);
3810 if (WARN_ON(!domain
))
3813 tlb_addr
= intel_iommu_iova_to_phys(&domain
->domain
, addr
);
3814 if (is_swiotlb_buffer(tlb_addr
))
3815 swiotlb_tbl_sync_single(dev
, tlb_addr
, size
, dir
, target
);
3819 bounce_map_single(struct device
*dev
, phys_addr_t paddr
, size_t size
,
3820 enum dma_data_direction dir
, unsigned long attrs
,
3823 size_t aligned_size
= ALIGN(size
, VTD_PAGE_SIZE
);
3824 struct dmar_domain
*domain
;
3825 struct intel_iommu
*iommu
;
3826 unsigned long iova_pfn
;
3827 unsigned long nrpages
;
3828 phys_addr_t tlb_addr
;
3832 domain
= deferred_attach_domain(dev
);
3833 if (WARN_ON(dir
== DMA_NONE
|| !domain
))
3834 return DMA_MAPPING_ERROR
;
3836 iommu
= domain_get_iommu(domain
);
3837 if (WARN_ON(!iommu
))
3838 return DMA_MAPPING_ERROR
;
3840 nrpages
= aligned_nrpages(0, size
);
3841 iova_pfn
= intel_alloc_iova(dev
, domain
,
3842 dma_to_mm_pfn(nrpages
), dma_mask
);
3844 return DMA_MAPPING_ERROR
;
3847 * Check if DMAR supports zero-length reads on write only
3850 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
||
3851 !cap_zlr(iommu
->cap
))
3852 prot
|= DMA_PTE_READ
;
3853 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3854 prot
|= DMA_PTE_WRITE
;
3857 * If both the physical buffer start address and size are
3858 * page aligned, we don't need to use a bounce page.
3860 if (!IS_ALIGNED(paddr
| size
, VTD_PAGE_SIZE
)) {
3861 tlb_addr
= swiotlb_tbl_map_single(dev
,
3862 __phys_to_dma(dev
, io_tlb_start
),
3863 paddr
, size
, aligned_size
, dir
, attrs
);
3864 if (tlb_addr
== DMA_MAPPING_ERROR
) {
3867 /* Cleanup the padding area. */
3868 void *padding_start
= phys_to_virt(tlb_addr
);
3869 size_t padding_size
= aligned_size
;
3871 if (!(attrs
& DMA_ATTR_SKIP_CPU_SYNC
) &&
3872 (dir
== DMA_TO_DEVICE
||
3873 dir
== DMA_BIDIRECTIONAL
)) {
3874 padding_start
+= size
;
3875 padding_size
-= size
;
3878 memset(padding_start
, 0, padding_size
);
3884 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova_pfn
),
3885 tlb_addr
>> VTD_PAGE_SHIFT
, nrpages
, prot
);
3889 trace_bounce_map_single(dev
, iova_pfn
<< PAGE_SHIFT
, paddr
, size
);
3891 return (phys_addr_t
)iova_pfn
<< PAGE_SHIFT
;
3894 if (is_swiotlb_buffer(tlb_addr
))
3895 swiotlb_tbl_unmap_single(dev
, tlb_addr
, size
,
3896 aligned_size
, dir
, attrs
);
3898 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(nrpages
));
3899 dev_err(dev
, "Device bounce map: %zx@%llx dir %d --- failed\n",
3900 size
, (unsigned long long)paddr
, dir
);
3902 return DMA_MAPPING_ERROR
;
3906 bounce_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
3907 enum dma_data_direction dir
, unsigned long attrs
)
3909 size_t aligned_size
= ALIGN(size
, VTD_PAGE_SIZE
);
3910 struct dmar_domain
*domain
;
3911 phys_addr_t tlb_addr
;
3913 domain
= find_domain(dev
);
3914 if (WARN_ON(!domain
))
3917 tlb_addr
= intel_iommu_iova_to_phys(&domain
->domain
, dev_addr
);
3918 if (WARN_ON(!tlb_addr
))
3921 intel_unmap(dev
, dev_addr
, size
);
3922 if (is_swiotlb_buffer(tlb_addr
))
3923 swiotlb_tbl_unmap_single(dev
, tlb_addr
, size
,
3924 aligned_size
, dir
, attrs
);
3926 trace_bounce_unmap_single(dev
, dev_addr
, size
);
3930 bounce_map_page(struct device
*dev
, struct page
*page
, unsigned long offset
,
3931 size_t size
, enum dma_data_direction dir
, unsigned long attrs
)
3933 return bounce_map_single(dev
, page_to_phys(page
) + offset
,
3934 size
, dir
, attrs
, *dev
->dma_mask
);
3938 bounce_map_resource(struct device
*dev
, phys_addr_t phys_addr
, size_t size
,
3939 enum dma_data_direction dir
, unsigned long attrs
)
3941 return bounce_map_single(dev
, phys_addr
, size
,
3942 dir
, attrs
, *dev
->dma_mask
);
3946 bounce_unmap_page(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
3947 enum dma_data_direction dir
, unsigned long attrs
)
3949 bounce_unmap_single(dev
, dev_addr
, size
, dir
, attrs
);
3953 bounce_unmap_resource(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
3954 enum dma_data_direction dir
, unsigned long attrs
)
3956 bounce_unmap_single(dev
, dev_addr
, size
, dir
, attrs
);
3960 bounce_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3961 enum dma_data_direction dir
, unsigned long attrs
)
3963 struct scatterlist
*sg
;
3966 for_each_sg(sglist
, sg
, nelems
, i
)
3967 bounce_unmap_page(dev
, sg
->dma_address
,
3968 sg_dma_len(sg
), dir
, attrs
);
3972 bounce_map_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3973 enum dma_data_direction dir
, unsigned long attrs
)
3976 struct scatterlist
*sg
;
3978 for_each_sg(sglist
, sg
, nelems
, i
) {
3979 sg
->dma_address
= bounce_map_page(dev
, sg_page(sg
),
3980 sg
->offset
, sg
->length
,
3982 if (sg
->dma_address
== DMA_MAPPING_ERROR
)
3984 sg_dma_len(sg
) = sg
->length
;
3990 bounce_unmap_sg(dev
, sglist
, i
, dir
, attrs
| DMA_ATTR_SKIP_CPU_SYNC
);
3995 bounce_sync_single_for_cpu(struct device
*dev
, dma_addr_t addr
,
3996 size_t size
, enum dma_data_direction dir
)
3998 bounce_sync_single(dev
, addr
, size
, dir
, SYNC_FOR_CPU
);
4002 bounce_sync_single_for_device(struct device
*dev
, dma_addr_t addr
,
4003 size_t size
, enum dma_data_direction dir
)
4005 bounce_sync_single(dev
, addr
, size
, dir
, SYNC_FOR_DEVICE
);
4009 bounce_sync_sg_for_cpu(struct device
*dev
, struct scatterlist
*sglist
,
4010 int nelems
, enum dma_data_direction dir
)
4012 struct scatterlist
*sg
;
4015 for_each_sg(sglist
, sg
, nelems
, i
)
4016 bounce_sync_single(dev
, sg_dma_address(sg
),
4017 sg_dma_len(sg
), dir
, SYNC_FOR_CPU
);
4021 bounce_sync_sg_for_device(struct device
*dev
, struct scatterlist
*sglist
,
4022 int nelems
, enum dma_data_direction dir
)
4024 struct scatterlist
*sg
;
4027 for_each_sg(sglist
, sg
, nelems
, i
)
4028 bounce_sync_single(dev
, sg_dma_address(sg
),
4029 sg_dma_len(sg
), dir
, SYNC_FOR_DEVICE
);
4032 static const struct dma_map_ops bounce_dma_ops
= {
4033 .alloc
= intel_alloc_coherent
,
4034 .free
= intel_free_coherent
,
4035 .map_sg
= bounce_map_sg
,
4036 .unmap_sg
= bounce_unmap_sg
,
4037 .map_page
= bounce_map_page
,
4038 .unmap_page
= bounce_unmap_page
,
4039 .sync_single_for_cpu
= bounce_sync_single_for_cpu
,
4040 .sync_single_for_device
= bounce_sync_single_for_device
,
4041 .sync_sg_for_cpu
= bounce_sync_sg_for_cpu
,
4042 .sync_sg_for_device
= bounce_sync_sg_for_device
,
4043 .map_resource
= bounce_map_resource
,
4044 .unmap_resource
= bounce_unmap_resource
,
4045 .dma_supported
= dma_direct_supported
,
4048 static inline int iommu_domain_cache_init(void)
4052 iommu_domain_cache
= kmem_cache_create("iommu_domain",
4053 sizeof(struct dmar_domain
),
4058 if (!iommu_domain_cache
) {
4059 pr_err("Couldn't create iommu_domain cache\n");
4066 static inline int iommu_devinfo_cache_init(void)
4070 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
4071 sizeof(struct device_domain_info
),
4075 if (!iommu_devinfo_cache
) {
4076 pr_err("Couldn't create devinfo cache\n");
4083 static int __init
iommu_init_mempool(void)
4086 ret
= iova_cache_get();
4090 ret
= iommu_domain_cache_init();
4094 ret
= iommu_devinfo_cache_init();
4098 kmem_cache_destroy(iommu_domain_cache
);
4105 static void __init
iommu_exit_mempool(void)
4107 kmem_cache_destroy(iommu_devinfo_cache
);
4108 kmem_cache_destroy(iommu_domain_cache
);
4112 static void quirk_ioat_snb_local_iommu(struct pci_dev
*pdev
)
4114 struct dmar_drhd_unit
*drhd
;
4118 /* We know that this device on this chipset has its own IOMMU.
4119 * If we find it under a different IOMMU, then the BIOS is lying
4120 * to us. Hope that the IOMMU for this device is actually
4121 * disabled, and it needs no translation...
4123 rc
= pci_bus_read_config_dword(pdev
->bus
, PCI_DEVFN(0, 0), 0xb0, &vtbar
);
4125 /* "can't" happen */
4126 dev_info(&pdev
->dev
, "failed to run vt-d quirk\n");
4129 vtbar
&= 0xffff0000;
4131 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4132 drhd
= dmar_find_matched_drhd_unit(pdev
);
4133 if (WARN_TAINT_ONCE(!drhd
|| drhd
->reg_base_addr
- vtbar
!= 0xa000,
4134 TAINT_FIRMWARE_WORKAROUND
,
4135 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4136 pdev
->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
4138 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_IOAT_SNB
, quirk_ioat_snb_local_iommu
);
4140 static void __init
init_no_remapping_devices(void)
4142 struct dmar_drhd_unit
*drhd
;
4146 for_each_drhd_unit(drhd
) {
4147 if (!drhd
->include_all
) {
4148 for_each_active_dev_scope(drhd
->devices
,
4149 drhd
->devices_cnt
, i
, dev
)
4151 /* ignore DMAR unit if no devices exist */
4152 if (i
== drhd
->devices_cnt
)
4157 for_each_active_drhd_unit(drhd
) {
4158 if (drhd
->include_all
)
4161 for_each_active_dev_scope(drhd
->devices
,
4162 drhd
->devices_cnt
, i
, dev
)
4163 if (!dev_is_pci(dev
) || !IS_GFX_DEVICE(to_pci_dev(dev
)))
4165 if (i
< drhd
->devices_cnt
)
4168 /* This IOMMU has *only* gfx devices. Either bypass it or
4169 set the gfx_mapped flag, as appropriate */
4170 if (!dmar_map_gfx
) {
4172 for_each_active_dev_scope(drhd
->devices
,
4173 drhd
->devices_cnt
, i
, dev
)
4174 dev
->archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
4179 #ifdef CONFIG_SUSPEND
4180 static int init_iommu_hw(void)
4182 struct dmar_drhd_unit
*drhd
;
4183 struct intel_iommu
*iommu
= NULL
;
4185 for_each_active_iommu(iommu
, drhd
)
4187 dmar_reenable_qi(iommu
);
4189 for_each_iommu(iommu
, drhd
) {
4190 if (drhd
->ignored
) {
4192 * we always have to disable PMRs or DMA may fail on
4196 iommu_disable_protect_mem_regions(iommu
);
4200 iommu_flush_write_buffer(iommu
);
4202 iommu_set_root_entry(iommu
);
4204 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
4205 DMA_CCMD_GLOBAL_INVL
);
4206 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
4207 iommu_enable_translation(iommu
);
4208 iommu_disable_protect_mem_regions(iommu
);
4214 static void iommu_flush_all(void)
4216 struct dmar_drhd_unit
*drhd
;
4217 struct intel_iommu
*iommu
;
4219 for_each_active_iommu(iommu
, drhd
) {
4220 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
4221 DMA_CCMD_GLOBAL_INVL
);
4222 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
4223 DMA_TLB_GLOBAL_FLUSH
);
4227 static int iommu_suspend(void)
4229 struct dmar_drhd_unit
*drhd
;
4230 struct intel_iommu
*iommu
= NULL
;
4233 for_each_active_iommu(iommu
, drhd
) {
4234 iommu
->iommu_state
= kcalloc(MAX_SR_DMAR_REGS
, sizeof(u32
),
4236 if (!iommu
->iommu_state
)
4242 for_each_active_iommu(iommu
, drhd
) {
4243 iommu_disable_translation(iommu
);
4245 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
4247 iommu
->iommu_state
[SR_DMAR_FECTL_REG
] =
4248 readl(iommu
->reg
+ DMAR_FECTL_REG
);
4249 iommu
->iommu_state
[SR_DMAR_FEDATA_REG
] =
4250 readl(iommu
->reg
+ DMAR_FEDATA_REG
);
4251 iommu
->iommu_state
[SR_DMAR_FEADDR_REG
] =
4252 readl(iommu
->reg
+ DMAR_FEADDR_REG
);
4253 iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
] =
4254 readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
4256 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
4261 for_each_active_iommu(iommu
, drhd
)
4262 kfree(iommu
->iommu_state
);
4267 static void iommu_resume(void)
4269 struct dmar_drhd_unit
*drhd
;
4270 struct intel_iommu
*iommu
= NULL
;
4273 if (init_iommu_hw()) {
4275 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4277 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4281 for_each_active_iommu(iommu
, drhd
) {
4283 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
4285 writel(iommu
->iommu_state
[SR_DMAR_FECTL_REG
],
4286 iommu
->reg
+ DMAR_FECTL_REG
);
4287 writel(iommu
->iommu_state
[SR_DMAR_FEDATA_REG
],
4288 iommu
->reg
+ DMAR_FEDATA_REG
);
4289 writel(iommu
->iommu_state
[SR_DMAR_FEADDR_REG
],
4290 iommu
->reg
+ DMAR_FEADDR_REG
);
4291 writel(iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
],
4292 iommu
->reg
+ DMAR_FEUADDR_REG
);
4294 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
4297 for_each_active_iommu(iommu
, drhd
)
4298 kfree(iommu
->iommu_state
);
4301 static struct syscore_ops iommu_syscore_ops
= {
4302 .resume
= iommu_resume
,
4303 .suspend
= iommu_suspend
,
4306 static void __init
init_iommu_pm_ops(void)
4308 register_syscore_ops(&iommu_syscore_ops
);
4312 static inline void init_iommu_pm_ops(void) {}
4313 #endif /* CONFIG_PM */
4315 int __init
dmar_parse_one_rmrr(struct acpi_dmar_header
*header
, void *arg
)
4317 struct acpi_dmar_reserved_memory
*rmrr
;
4318 struct dmar_rmrr_unit
*rmrru
;
4321 rmrr
= (struct acpi_dmar_reserved_memory
*)header
;
4322 ret
= arch_rmrr_sanity_check(rmrr
);
4326 rmrru
= kzalloc(sizeof(*rmrru
), GFP_KERNEL
);
4330 rmrru
->hdr
= header
;
4332 rmrru
->base_address
= rmrr
->base_address
;
4333 rmrru
->end_address
= rmrr
->end_address
;
4335 rmrru
->devices
= dmar_alloc_dev_scope((void *)(rmrr
+ 1),
4336 ((void *)rmrr
) + rmrr
->header
.length
,
4337 &rmrru
->devices_cnt
);
4338 if (rmrru
->devices_cnt
&& rmrru
->devices
== NULL
)
4341 list_add(&rmrru
->list
, &dmar_rmrr_units
);
4350 static struct dmar_atsr_unit
*dmar_find_atsr(struct acpi_dmar_atsr
*atsr
)
4352 struct dmar_atsr_unit
*atsru
;
4353 struct acpi_dmar_atsr
*tmp
;
4355 list_for_each_entry_rcu(atsru
, &dmar_atsr_units
, list
) {
4356 tmp
= (struct acpi_dmar_atsr
*)atsru
->hdr
;
4357 if (atsr
->segment
!= tmp
->segment
)
4359 if (atsr
->header
.length
!= tmp
->header
.length
)
4361 if (memcmp(atsr
, tmp
, atsr
->header
.length
) == 0)
4368 int dmar_parse_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4370 struct acpi_dmar_atsr
*atsr
;
4371 struct dmar_atsr_unit
*atsru
;
4373 if (system_state
>= SYSTEM_RUNNING
&& !intel_iommu_enabled
)
4376 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4377 atsru
= dmar_find_atsr(atsr
);
4381 atsru
= kzalloc(sizeof(*atsru
) + hdr
->length
, GFP_KERNEL
);
4386 * If memory is allocated from slab by ACPI _DSM method, we need to
4387 * copy the memory content because the memory buffer will be freed
4390 atsru
->hdr
= (void *)(atsru
+ 1);
4391 memcpy(atsru
->hdr
, hdr
, hdr
->length
);
4392 atsru
->include_all
= atsr
->flags
& 0x1;
4393 if (!atsru
->include_all
) {
4394 atsru
->devices
= dmar_alloc_dev_scope((void *)(atsr
+ 1),
4395 (void *)atsr
+ atsr
->header
.length
,
4396 &atsru
->devices_cnt
);
4397 if (atsru
->devices_cnt
&& atsru
->devices
== NULL
) {
4403 list_add_rcu(&atsru
->list
, &dmar_atsr_units
);
4408 static void intel_iommu_free_atsr(struct dmar_atsr_unit
*atsru
)
4410 dmar_free_dev_scope(&atsru
->devices
, &atsru
->devices_cnt
);
4414 int dmar_release_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4416 struct acpi_dmar_atsr
*atsr
;
4417 struct dmar_atsr_unit
*atsru
;
4419 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4420 atsru
= dmar_find_atsr(atsr
);
4422 list_del_rcu(&atsru
->list
);
4424 intel_iommu_free_atsr(atsru
);
4430 int dmar_check_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4434 struct acpi_dmar_atsr
*atsr
;
4435 struct dmar_atsr_unit
*atsru
;
4437 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4438 atsru
= dmar_find_atsr(atsr
);
4442 if (!atsru
->include_all
&& atsru
->devices
&& atsru
->devices_cnt
) {
4443 for_each_active_dev_scope(atsru
->devices
, atsru
->devices_cnt
,
4451 static int intel_iommu_add(struct dmar_drhd_unit
*dmaru
)
4454 struct intel_iommu
*iommu
= dmaru
->iommu
;
4456 if (g_iommus
[iommu
->seq_id
])
4459 if (hw_pass_through
&& !ecap_pass_through(iommu
->ecap
)) {
4460 pr_warn("%s: Doesn't support hardware pass through.\n",
4464 if (!ecap_sc_support(iommu
->ecap
) &&
4465 domain_update_iommu_snooping(iommu
)) {
4466 pr_warn("%s: Doesn't support snooping.\n",
4470 sp
= domain_update_iommu_superpage(iommu
) - 1;
4471 if (sp
>= 0 && !(cap_super_page_val(iommu
->cap
) & (1 << sp
))) {
4472 pr_warn("%s: Doesn't support large page.\n",
4478 * Disable translation if already enabled prior to OS handover.
4480 if (iommu
->gcmd
& DMA_GCMD_TE
)
4481 iommu_disable_translation(iommu
);
4483 g_iommus
[iommu
->seq_id
] = iommu
;
4484 ret
= iommu_init_domains(iommu
);
4486 ret
= iommu_alloc_root_entry(iommu
);
4490 #ifdef CONFIG_INTEL_IOMMU_SVM
4491 if (pasid_supported(iommu
))
4492 intel_svm_init(iommu
);
4495 if (dmaru
->ignored
) {
4497 * we always have to disable PMRs or DMA may fail on this device
4500 iommu_disable_protect_mem_regions(iommu
);
4504 intel_iommu_init_qi(iommu
);
4505 iommu_flush_write_buffer(iommu
);
4507 #ifdef CONFIG_INTEL_IOMMU_SVM
4508 if (pasid_supported(iommu
) && ecap_prs(iommu
->ecap
)) {
4509 ret
= intel_svm_enable_prq(iommu
);
4514 ret
= dmar_set_interrupt(iommu
);
4518 iommu_set_root_entry(iommu
);
4519 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
4520 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
4521 iommu_enable_translation(iommu
);
4523 iommu_disable_protect_mem_regions(iommu
);
4527 disable_dmar_iommu(iommu
);
4529 free_dmar_iommu(iommu
);
4533 int dmar_iommu_hotplug(struct dmar_drhd_unit
*dmaru
, bool insert
)
4536 struct intel_iommu
*iommu
= dmaru
->iommu
;
4538 if (!intel_iommu_enabled
)
4544 ret
= intel_iommu_add(dmaru
);
4546 disable_dmar_iommu(iommu
);
4547 free_dmar_iommu(iommu
);
4553 static void intel_iommu_free_dmars(void)
4555 struct dmar_rmrr_unit
*rmrru
, *rmrr_n
;
4556 struct dmar_atsr_unit
*atsru
, *atsr_n
;
4558 list_for_each_entry_safe(rmrru
, rmrr_n
, &dmar_rmrr_units
, list
) {
4559 list_del(&rmrru
->list
);
4560 dmar_free_dev_scope(&rmrru
->devices
, &rmrru
->devices_cnt
);
4564 list_for_each_entry_safe(atsru
, atsr_n
, &dmar_atsr_units
, list
) {
4565 list_del(&atsru
->list
);
4566 intel_iommu_free_atsr(atsru
);
4570 int dmar_find_matched_atsr_unit(struct pci_dev
*dev
)
4573 struct pci_bus
*bus
;
4574 struct pci_dev
*bridge
= NULL
;
4576 struct acpi_dmar_atsr
*atsr
;
4577 struct dmar_atsr_unit
*atsru
;
4579 dev
= pci_physfn(dev
);
4580 for (bus
= dev
->bus
; bus
; bus
= bus
->parent
) {
4582 /* If it's an integrated device, allow ATS */
4585 /* Connected via non-PCIe: no ATS */
4586 if (!pci_is_pcie(bridge
) ||
4587 pci_pcie_type(bridge
) == PCI_EXP_TYPE_PCI_BRIDGE
)
4589 /* If we found the root port, look it up in the ATSR */
4590 if (pci_pcie_type(bridge
) == PCI_EXP_TYPE_ROOT_PORT
)
4595 list_for_each_entry_rcu(atsru
, &dmar_atsr_units
, list
) {
4596 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
4597 if (atsr
->segment
!= pci_domain_nr(dev
->bus
))
4600 for_each_dev_scope(atsru
->devices
, atsru
->devices_cnt
, i
, tmp
)
4601 if (tmp
== &bridge
->dev
)
4604 if (atsru
->include_all
)
4614 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info
*info
)
4617 struct dmar_rmrr_unit
*rmrru
;
4618 struct dmar_atsr_unit
*atsru
;
4619 struct acpi_dmar_atsr
*atsr
;
4620 struct acpi_dmar_reserved_memory
*rmrr
;
4622 if (!intel_iommu_enabled
&& system_state
>= SYSTEM_RUNNING
)
4625 list_for_each_entry(rmrru
, &dmar_rmrr_units
, list
) {
4626 rmrr
= container_of(rmrru
->hdr
,
4627 struct acpi_dmar_reserved_memory
, header
);
4628 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
4629 ret
= dmar_insert_dev_scope(info
, (void *)(rmrr
+ 1),
4630 ((void *)rmrr
) + rmrr
->header
.length
,
4631 rmrr
->segment
, rmrru
->devices
,
4632 rmrru
->devices_cnt
);
4635 } else if (info
->event
== BUS_NOTIFY_REMOVED_DEVICE
) {
4636 dmar_remove_dev_scope(info
, rmrr
->segment
,
4637 rmrru
->devices
, rmrru
->devices_cnt
);
4641 list_for_each_entry(atsru
, &dmar_atsr_units
, list
) {
4642 if (atsru
->include_all
)
4645 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
4646 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
4647 ret
= dmar_insert_dev_scope(info
, (void *)(atsr
+ 1),
4648 (void *)atsr
+ atsr
->header
.length
,
4649 atsr
->segment
, atsru
->devices
,
4650 atsru
->devices_cnt
);
4655 } else if (info
->event
== BUS_NOTIFY_REMOVED_DEVICE
) {
4656 if (dmar_remove_dev_scope(info
, atsr
->segment
,
4657 atsru
->devices
, atsru
->devices_cnt
))
4665 static int intel_iommu_memory_notifier(struct notifier_block
*nb
,
4666 unsigned long val
, void *v
)
4668 struct memory_notify
*mhp
= v
;
4669 unsigned long long start
, end
;
4670 unsigned long start_vpfn
, last_vpfn
;
4673 case MEM_GOING_ONLINE
:
4674 start
= mhp
->start_pfn
<< PAGE_SHIFT
;
4675 end
= ((mhp
->start_pfn
+ mhp
->nr_pages
) << PAGE_SHIFT
) - 1;
4676 if (iommu_domain_identity_map(si_domain
, start
, end
)) {
4677 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4684 case MEM_CANCEL_ONLINE
:
4685 start_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
);
4686 last_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
+ mhp
->nr_pages
- 1);
4687 while (start_vpfn
<= last_vpfn
) {
4689 struct dmar_drhd_unit
*drhd
;
4690 struct intel_iommu
*iommu
;
4691 struct page
*freelist
;
4693 iova
= find_iova(&si_domain
->iovad
, start_vpfn
);
4695 pr_debug("Failed get IOVA for PFN %lx\n",
4700 iova
= split_and_remove_iova(&si_domain
->iovad
, iova
,
4701 start_vpfn
, last_vpfn
);
4703 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4704 start_vpfn
, last_vpfn
);
4708 freelist
= domain_unmap(si_domain
, iova
->pfn_lo
,
4712 for_each_active_iommu(iommu
, drhd
)
4713 iommu_flush_iotlb_psi(iommu
, si_domain
,
4714 iova
->pfn_lo
, iova_size(iova
),
4717 dma_free_pagelist(freelist
);
4719 start_vpfn
= iova
->pfn_hi
+ 1;
4720 free_iova_mem(iova
);
4728 static struct notifier_block intel_iommu_memory_nb
= {
4729 .notifier_call
= intel_iommu_memory_notifier
,
4733 static void free_all_cpu_cached_iovas(unsigned int cpu
)
4737 for (i
= 0; i
< g_num_of_iommus
; i
++) {
4738 struct intel_iommu
*iommu
= g_iommus
[i
];
4739 struct dmar_domain
*domain
;
4745 for (did
= 0; did
< cap_ndoms(iommu
->cap
); did
++) {
4746 domain
= get_iommu_domain(iommu
, (u16
)did
);
4750 free_cpu_cached_iovas(cpu
, &domain
->iovad
);
4755 static int intel_iommu_cpu_dead(unsigned int cpu
)
4757 free_all_cpu_cached_iovas(cpu
);
4761 static void intel_disable_iommus(void)
4763 struct intel_iommu
*iommu
= NULL
;
4764 struct dmar_drhd_unit
*drhd
;
4766 for_each_iommu(iommu
, drhd
)
4767 iommu_disable_translation(iommu
);
4770 void intel_iommu_shutdown(void)
4772 struct dmar_drhd_unit
*drhd
;
4773 struct intel_iommu
*iommu
= NULL
;
4775 if (no_iommu
|| dmar_disabled
)
4778 down_write(&dmar_global_lock
);
4780 /* Disable PMRs explicitly here. */
4781 for_each_iommu(iommu
, drhd
)
4782 iommu_disable_protect_mem_regions(iommu
);
4784 /* Make sure the IOMMUs are switched off */
4785 intel_disable_iommus();
4787 up_write(&dmar_global_lock
);
4790 static inline struct intel_iommu
*dev_to_intel_iommu(struct device
*dev
)
4792 struct iommu_device
*iommu_dev
= dev_to_iommu_device(dev
);
4794 return container_of(iommu_dev
, struct intel_iommu
, iommu
);
4797 static ssize_t
intel_iommu_show_version(struct device
*dev
,
4798 struct device_attribute
*attr
,
4801 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4802 u32 ver
= readl(iommu
->reg
+ DMAR_VER_REG
);
4803 return sprintf(buf
, "%d:%d\n",
4804 DMAR_VER_MAJOR(ver
), DMAR_VER_MINOR(ver
));
4806 static DEVICE_ATTR(version
, S_IRUGO
, intel_iommu_show_version
, NULL
);
4808 static ssize_t
intel_iommu_show_address(struct device
*dev
,
4809 struct device_attribute
*attr
,
4812 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4813 return sprintf(buf
, "%llx\n", iommu
->reg_phys
);
4815 static DEVICE_ATTR(address
, S_IRUGO
, intel_iommu_show_address
, NULL
);
4817 static ssize_t
intel_iommu_show_cap(struct device
*dev
,
4818 struct device_attribute
*attr
,
4821 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4822 return sprintf(buf
, "%llx\n", iommu
->cap
);
4824 static DEVICE_ATTR(cap
, S_IRUGO
, intel_iommu_show_cap
, NULL
);
4826 static ssize_t
intel_iommu_show_ecap(struct device
*dev
,
4827 struct device_attribute
*attr
,
4830 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4831 return sprintf(buf
, "%llx\n", iommu
->ecap
);
4833 static DEVICE_ATTR(ecap
, S_IRUGO
, intel_iommu_show_ecap
, NULL
);
4835 static ssize_t
intel_iommu_show_ndoms(struct device
*dev
,
4836 struct device_attribute
*attr
,
4839 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4840 return sprintf(buf
, "%ld\n", cap_ndoms(iommu
->cap
));
4842 static DEVICE_ATTR(domains_supported
, S_IRUGO
, intel_iommu_show_ndoms
, NULL
);
4844 static ssize_t
intel_iommu_show_ndoms_used(struct device
*dev
,
4845 struct device_attribute
*attr
,
4848 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4849 return sprintf(buf
, "%d\n", bitmap_weight(iommu
->domain_ids
,
4850 cap_ndoms(iommu
->cap
)));
4852 static DEVICE_ATTR(domains_used
, S_IRUGO
, intel_iommu_show_ndoms_used
, NULL
);
4854 static struct attribute
*intel_iommu_attrs
[] = {
4855 &dev_attr_version
.attr
,
4856 &dev_attr_address
.attr
,
4858 &dev_attr_ecap
.attr
,
4859 &dev_attr_domains_supported
.attr
,
4860 &dev_attr_domains_used
.attr
,
4864 static struct attribute_group intel_iommu_group
= {
4865 .name
= "intel-iommu",
4866 .attrs
= intel_iommu_attrs
,
4869 const struct attribute_group
*intel_iommu_groups
[] = {
4874 static inline bool has_untrusted_dev(void)
4876 struct pci_dev
*pdev
= NULL
;
4878 for_each_pci_dev(pdev
)
4879 if (pdev
->untrusted
)
4885 static int __init
platform_optin_force_iommu(void)
4887 if (!dmar_platform_optin() || no_platform_optin
|| !has_untrusted_dev())
4890 if (no_iommu
|| dmar_disabled
)
4891 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4894 * If Intel-IOMMU is disabled by default, we will apply identity
4895 * map for all devices except those marked as being untrusted.
4898 iommu_identity_mapping
|= IDENTMAP_ALL
;
4906 static int __init
probe_acpi_namespace_devices(void)
4908 struct dmar_drhd_unit
*drhd
;
4909 /* To avoid a -Wunused-but-set-variable warning. */
4910 struct intel_iommu
*iommu __maybe_unused
;
4914 for_each_active_iommu(iommu
, drhd
) {
4915 for_each_active_dev_scope(drhd
->devices
,
4916 drhd
->devices_cnt
, i
, dev
) {
4917 struct acpi_device_physical_node
*pn
;
4918 struct iommu_group
*group
;
4919 struct acpi_device
*adev
;
4921 if (dev
->bus
!= &acpi_bus_type
)
4924 adev
= to_acpi_device(dev
);
4925 mutex_lock(&adev
->physical_node_lock
);
4926 list_for_each_entry(pn
,
4927 &adev
->physical_node_list
, node
) {
4928 group
= iommu_group_get(pn
->dev
);
4930 iommu_group_put(group
);
4934 pn
->dev
->bus
->iommu_ops
= &intel_iommu_ops
;
4935 ret
= iommu_probe_device(pn
->dev
);
4939 mutex_unlock(&adev
->physical_node_lock
);
4949 int __init
intel_iommu_init(void)
4952 struct dmar_drhd_unit
*drhd
;
4953 struct intel_iommu
*iommu
;
4956 * Intel IOMMU is required for a TXT/tboot launch or platform
4957 * opt in, so enforce that.
4959 force_on
= tboot_force_iommu() || platform_optin_force_iommu();
4961 if (iommu_init_mempool()) {
4963 panic("tboot: Failed to initialize iommu memory\n");
4967 down_write(&dmar_global_lock
);
4968 if (dmar_table_init()) {
4970 panic("tboot: Failed to initialize DMAR table\n");
4974 if (dmar_dev_scope_init() < 0) {
4976 panic("tboot: Failed to initialize DMAR device scope\n");
4980 up_write(&dmar_global_lock
);
4983 * The bus notifier takes the dmar_global_lock, so lockdep will
4984 * complain later when we register it under the lock.
4986 dmar_register_bus_notifier();
4988 down_write(&dmar_global_lock
);
4990 if (no_iommu
|| dmar_disabled
) {
4992 * We exit the function here to ensure IOMMU's remapping and
4993 * mempool aren't setup, which means that the IOMMU's PMRs
4994 * won't be disabled via the call to init_dmars(). So disable
4995 * it explicitly here. The PMRs were setup by tboot prior to
4996 * calling SENTER, but the kernel is expected to reset/tear
4999 if (intel_iommu_tboot_noforce
) {
5000 for_each_iommu(iommu
, drhd
)
5001 iommu_disable_protect_mem_regions(iommu
);
5005 * Make sure the IOMMUs are switched off, even when we
5006 * boot into a kexec kernel and the previous kernel left
5009 intel_disable_iommus();
5013 if (list_empty(&dmar_rmrr_units
))
5014 pr_info("No RMRR found\n");
5016 if (list_empty(&dmar_atsr_units
))
5017 pr_info("No ATSR found\n");
5019 if (dmar_init_reserved_ranges()) {
5021 panic("tboot: Failed to reserve iommu ranges\n");
5022 goto out_free_reserved_range
;
5026 intel_iommu_gfx_mapped
= 1;
5028 init_no_remapping_devices();
5033 panic("tboot: Failed to initialize DMARs\n");
5034 pr_err("Initialization failed\n");
5035 goto out_free_reserved_range
;
5037 up_write(&dmar_global_lock
);
5039 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
5041 * If the system has no untrusted device or the user has decided
5042 * to disable the bounce page mechanisms, we don't need swiotlb.
5043 * Mark this and the pre-allocated bounce pages will be released
5046 if (!has_untrusted_dev() || intel_no_bounce
)
5049 dma_ops
= &intel_dma_ops
;
5051 init_iommu_pm_ops();
5053 for_each_active_iommu(iommu
, drhd
) {
5054 iommu_device_sysfs_add(&iommu
->iommu
, NULL
,
5057 iommu_device_set_ops(&iommu
->iommu
, &intel_iommu_ops
);
5058 iommu_device_register(&iommu
->iommu
);
5061 bus_set_iommu(&pci_bus_type
, &intel_iommu_ops
);
5062 if (si_domain
&& !hw_pass_through
)
5063 register_memory_notifier(&intel_iommu_memory_nb
);
5064 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD
, "iommu/intel:dead", NULL
,
5065 intel_iommu_cpu_dead
);
5067 down_read(&dmar_global_lock
);
5068 if (probe_acpi_namespace_devices())
5069 pr_warn("ACPI name space devices didn't probe correctly\n");
5070 up_read(&dmar_global_lock
);
5072 /* Finally, we enable the DMA remapping hardware. */
5073 for_each_iommu(iommu
, drhd
) {
5074 if (!drhd
->ignored
&& !translation_pre_enabled(iommu
))
5075 iommu_enable_translation(iommu
);
5077 iommu_disable_protect_mem_regions(iommu
);
5079 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5081 intel_iommu_enabled
= 1;
5082 intel_iommu_debugfs_init();
5086 out_free_reserved_range
:
5087 put_iova_domain(&reserved_iova_list
);
5089 intel_iommu_free_dmars();
5090 up_write(&dmar_global_lock
);
5091 iommu_exit_mempool();
5095 static int domain_context_clear_one_cb(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
5097 struct intel_iommu
*iommu
= opaque
;
5099 domain_context_clear_one(iommu
, PCI_BUS_NUM(alias
), alias
& 0xff);
5104 * NB - intel-iommu lacks any sort of reference counting for the users of
5105 * dependent devices. If multiple endpoints have intersecting dependent
5106 * devices, unbinding the driver from any one of them will possibly leave
5107 * the others unable to operate.
5109 static void domain_context_clear(struct intel_iommu
*iommu
, struct device
*dev
)
5111 if (!iommu
|| !dev
|| !dev_is_pci(dev
))
5114 pci_for_each_dma_alias(to_pci_dev(dev
), &domain_context_clear_one_cb
, iommu
);
5117 static void __dmar_remove_one_dev_info(struct device_domain_info
*info
)
5119 struct dmar_domain
*domain
;
5120 struct intel_iommu
*iommu
;
5121 unsigned long flags
;
5123 assert_spin_locked(&device_domain_lock
);
5128 iommu
= info
->iommu
;
5129 domain
= info
->domain
;
5132 if (dev_is_pci(info
->dev
) && sm_supported(iommu
))
5133 intel_pasid_tear_down_entry(iommu
, info
->dev
,
5136 iommu_disable_dev_iotlb(info
);
5137 domain_context_clear(iommu
, info
->dev
);
5138 intel_pasid_free_table(info
->dev
);
5141 unlink_domain_info(info
);
5143 spin_lock_irqsave(&iommu
->lock
, flags
);
5144 domain_detach_iommu(domain
, iommu
);
5145 spin_unlock_irqrestore(&iommu
->lock
, flags
);
5147 /* free the private domain */
5148 if (domain
->flags
& DOMAIN_FLAG_LOSE_CHILDREN
&&
5149 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) &&
5150 list_empty(&domain
->devices
))
5151 domain_exit(info
->domain
);
5153 free_devinfo_mem(info
);
5156 static void dmar_remove_one_dev_info(struct device
*dev
)
5158 struct device_domain_info
*info
;
5159 unsigned long flags
;
5161 spin_lock_irqsave(&device_domain_lock
, flags
);
5162 info
= dev
->archdata
.iommu
;
5163 if (info
&& info
!= DEFER_DEVICE_DOMAIN_INFO
5164 && info
!= DUMMY_DEVICE_DOMAIN_INFO
)
5165 __dmar_remove_one_dev_info(info
);
5166 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5169 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
)
5173 init_iova_domain(&domain
->iovad
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
5174 domain_reserve_special_ranges(domain
);
5176 /* calculate AGAW */
5177 domain
->gaw
= guest_width
;
5178 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
5179 domain
->agaw
= width_to_agaw(adjust_width
);
5181 domain
->iommu_coherency
= 0;
5182 domain
->iommu_snooping
= 0;
5183 domain
->iommu_superpage
= 0;
5184 domain
->max_addr
= 0;
5186 /* always allocate the top pgd */
5187 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
5190 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
5194 static struct iommu_domain
*intel_iommu_domain_alloc(unsigned type
)
5196 struct dmar_domain
*dmar_domain
;
5197 struct iommu_domain
*domain
;
5200 case IOMMU_DOMAIN_DMA
:
5202 case IOMMU_DOMAIN_UNMANAGED
:
5203 dmar_domain
= alloc_domain(0);
5205 pr_err("Can't allocate dmar_domain\n");
5208 if (md_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
5209 pr_err("Domain initialization failed\n");
5210 domain_exit(dmar_domain
);
5214 if (type
== IOMMU_DOMAIN_DMA
&&
5215 init_iova_flush_queue(&dmar_domain
->iovad
,
5216 iommu_flush_iova
, iova_entry_free
)) {
5217 pr_warn("iova flush queue initialization failed\n");
5218 intel_iommu_strict
= 1;
5221 domain_update_iommu_cap(dmar_domain
);
5223 domain
= &dmar_domain
->domain
;
5224 domain
->geometry
.aperture_start
= 0;
5225 domain
->geometry
.aperture_end
=
5226 __DOMAIN_MAX_ADDR(dmar_domain
->gaw
);
5227 domain
->geometry
.force_aperture
= true;
5230 case IOMMU_DOMAIN_IDENTITY
:
5231 return &si_domain
->domain
;
5239 static void intel_iommu_domain_free(struct iommu_domain
*domain
)
5241 if (domain
!= &si_domain
->domain
)
5242 domain_exit(to_dmar_domain(domain
));
5246 * Check whether a @domain could be attached to the @dev through the
5247 * aux-domain attach/detach APIs.
5250 is_aux_domain(struct device
*dev
, struct iommu_domain
*domain
)
5252 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5254 return info
&& info
->auxd_enabled
&&
5255 domain
->type
== IOMMU_DOMAIN_UNMANAGED
;
5258 static void auxiliary_link_device(struct dmar_domain
*domain
,
5261 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5263 assert_spin_locked(&device_domain_lock
);
5267 domain
->auxd_refcnt
++;
5268 list_add(&domain
->auxd
, &info
->auxiliary_domains
);
5271 static void auxiliary_unlink_device(struct dmar_domain
*domain
,
5274 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5276 assert_spin_locked(&device_domain_lock
);
5280 list_del(&domain
->auxd
);
5281 domain
->auxd_refcnt
--;
5283 if (!domain
->auxd_refcnt
&& domain
->default_pasid
> 0)
5284 intel_pasid_free_id(domain
->default_pasid
);
5287 static int aux_domain_add_dev(struct dmar_domain
*domain
,
5292 unsigned long flags
;
5293 struct intel_iommu
*iommu
;
5295 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5299 if (domain
->default_pasid
<= 0) {
5302 pasid
= intel_pasid_alloc_id(domain
, PASID_MIN
,
5303 pci_max_pasids(to_pci_dev(dev
)),
5306 pr_err("Can't allocate default pasid\n");
5309 domain
->default_pasid
= pasid
;
5312 spin_lock_irqsave(&device_domain_lock
, flags
);
5314 * iommu->lock must be held to attach domain to iommu and setup the
5315 * pasid entry for second level translation.
5317 spin_lock(&iommu
->lock
);
5318 ret
= domain_attach_iommu(domain
, iommu
);
5322 /* Setup the PASID entry for mediated devices: */
5323 ret
= intel_pasid_setup_second_level(iommu
, domain
, dev
,
5324 domain
->default_pasid
);
5327 spin_unlock(&iommu
->lock
);
5329 auxiliary_link_device(domain
, dev
);
5331 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5336 domain_detach_iommu(domain
, iommu
);
5338 spin_unlock(&iommu
->lock
);
5339 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5340 if (!domain
->auxd_refcnt
&& domain
->default_pasid
> 0)
5341 intel_pasid_free_id(domain
->default_pasid
);
5346 static void aux_domain_remove_dev(struct dmar_domain
*domain
,
5349 struct device_domain_info
*info
;
5350 struct intel_iommu
*iommu
;
5351 unsigned long flags
;
5353 if (!is_aux_domain(dev
, &domain
->domain
))
5356 spin_lock_irqsave(&device_domain_lock
, flags
);
5357 info
= dev
->archdata
.iommu
;
5358 iommu
= info
->iommu
;
5360 auxiliary_unlink_device(domain
, dev
);
5362 spin_lock(&iommu
->lock
);
5363 intel_pasid_tear_down_entry(iommu
, dev
, domain
->default_pasid
);
5364 domain_detach_iommu(domain
, iommu
);
5365 spin_unlock(&iommu
->lock
);
5367 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5370 static int prepare_domain_attach_device(struct iommu_domain
*domain
,
5373 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5374 struct intel_iommu
*iommu
;
5378 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5382 /* check if this iommu agaw is sufficient for max mapped address */
5383 addr_width
= agaw_to_width(iommu
->agaw
);
5384 if (addr_width
> cap_mgaw(iommu
->cap
))
5385 addr_width
= cap_mgaw(iommu
->cap
);
5387 if (dmar_domain
->max_addr
> (1LL << addr_width
)) {
5388 dev_err(dev
, "%s: iommu width (%d) is not "
5389 "sufficient for the mapped address (%llx)\n",
5390 __func__
, addr_width
, dmar_domain
->max_addr
);
5393 dmar_domain
->gaw
= addr_width
;
5396 * Knock out extra levels of page tables if necessary
5398 while (iommu
->agaw
< dmar_domain
->agaw
) {
5399 struct dma_pte
*pte
;
5401 pte
= dmar_domain
->pgd
;
5402 if (dma_pte_present(pte
)) {
5403 dmar_domain
->pgd
= (struct dma_pte
*)
5404 phys_to_virt(dma_pte_addr(pte
));
5405 free_pgtable_page(pte
);
5407 dmar_domain
->agaw
--;
5413 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
5418 if (domain
->type
== IOMMU_DOMAIN_UNMANAGED
&&
5419 device_is_rmrr_locked(dev
)) {
5420 dev_warn(dev
, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5424 if (is_aux_domain(dev
, domain
))
5427 /* normally dev is not mapped */
5428 if (unlikely(domain_context_mapped(dev
))) {
5429 struct dmar_domain
*old_domain
;
5431 old_domain
= find_domain(dev
);
5433 dmar_remove_one_dev_info(dev
);
5436 ret
= prepare_domain_attach_device(domain
, dev
);
5440 return domain_add_dev_info(to_dmar_domain(domain
), dev
);
5443 static int intel_iommu_aux_attach_device(struct iommu_domain
*domain
,
5448 if (!is_aux_domain(dev
, domain
))
5451 ret
= prepare_domain_attach_device(domain
, dev
);
5455 return aux_domain_add_dev(to_dmar_domain(domain
), dev
);
5458 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
5461 dmar_remove_one_dev_info(dev
);
5464 static void intel_iommu_aux_detach_device(struct iommu_domain
*domain
,
5467 aux_domain_remove_dev(to_dmar_domain(domain
), dev
);
5470 static int intel_iommu_map(struct iommu_domain
*domain
,
5471 unsigned long iova
, phys_addr_t hpa
,
5472 size_t size
, int iommu_prot
, gfp_t gfp
)
5474 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5479 if (iommu_prot
& IOMMU_READ
)
5480 prot
|= DMA_PTE_READ
;
5481 if (iommu_prot
& IOMMU_WRITE
)
5482 prot
|= DMA_PTE_WRITE
;
5483 if ((iommu_prot
& IOMMU_CACHE
) && dmar_domain
->iommu_snooping
)
5484 prot
|= DMA_PTE_SNP
;
5486 max_addr
= iova
+ size
;
5487 if (dmar_domain
->max_addr
< max_addr
) {
5490 /* check if minimum agaw is sufficient for mapped address */
5491 end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
) + 1;
5492 if (end
< max_addr
) {
5493 pr_err("%s: iommu width (%d) is not "
5494 "sufficient for the mapped address (%llx)\n",
5495 __func__
, dmar_domain
->gaw
, max_addr
);
5498 dmar_domain
->max_addr
= max_addr
;
5500 /* Round up size to next multiple of PAGE_SIZE, if it and
5501 the low bits of hpa would take us onto the next page */
5502 size
= aligned_nrpages(hpa
, size
);
5503 ret
= domain_pfn_mapping(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
5504 hpa
>> VTD_PAGE_SHIFT
, size
, prot
);
5508 static size_t intel_iommu_unmap(struct iommu_domain
*domain
,
5509 unsigned long iova
, size_t size
,
5510 struct iommu_iotlb_gather
*gather
)
5512 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5513 struct page
*freelist
= NULL
;
5514 unsigned long start_pfn
, last_pfn
;
5515 unsigned int npages
;
5516 int iommu_id
, level
= 0;
5518 /* Cope with horrid API which requires us to unmap more than the
5519 size argument if it happens to be a large-page mapping. */
5520 BUG_ON(!pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
));
5522 if (size
< VTD_PAGE_SIZE
<< level_to_offset_bits(level
))
5523 size
= VTD_PAGE_SIZE
<< level_to_offset_bits(level
);
5525 start_pfn
= iova
>> VTD_PAGE_SHIFT
;
5526 last_pfn
= (iova
+ size
- 1) >> VTD_PAGE_SHIFT
;
5528 freelist
= domain_unmap(dmar_domain
, start_pfn
, last_pfn
);
5530 npages
= last_pfn
- start_pfn
+ 1;
5532 for_each_domain_iommu(iommu_id
, dmar_domain
)
5533 iommu_flush_iotlb_psi(g_iommus
[iommu_id
], dmar_domain
,
5534 start_pfn
, npages
, !freelist
, 0);
5536 dma_free_pagelist(freelist
);
5538 if (dmar_domain
->max_addr
== iova
+ size
)
5539 dmar_domain
->max_addr
= iova
;
5544 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
5547 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5548 struct dma_pte
*pte
;
5552 pte
= pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
);
5554 phys
= dma_pte_addr(pte
);
5559 static inline bool scalable_mode_support(void)
5561 struct dmar_drhd_unit
*drhd
;
5562 struct intel_iommu
*iommu
;
5566 for_each_active_iommu(iommu
, drhd
) {
5567 if (!sm_supported(iommu
)) {
5577 static inline bool iommu_pasid_support(void)
5579 struct dmar_drhd_unit
*drhd
;
5580 struct intel_iommu
*iommu
;
5584 for_each_active_iommu(iommu
, drhd
) {
5585 if (!pasid_supported(iommu
)) {
5595 static bool intel_iommu_capable(enum iommu_cap cap
)
5597 if (cap
== IOMMU_CAP_CACHE_COHERENCY
)
5598 return domain_update_iommu_snooping(NULL
) == 1;
5599 if (cap
== IOMMU_CAP_INTR_REMAP
)
5600 return irq_remapping_enabled
== 1;
5605 static int intel_iommu_add_device(struct device
*dev
)
5607 struct dmar_domain
*dmar_domain
;
5608 struct iommu_domain
*domain
;
5609 struct intel_iommu
*iommu
;
5610 struct iommu_group
*group
;
5614 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5618 iommu_device_link(&iommu
->iommu
, dev
);
5620 if (translation_pre_enabled(iommu
))
5621 dev
->archdata
.iommu
= DEFER_DEVICE_DOMAIN_INFO
;
5623 group
= iommu_group_get_for_dev(dev
);
5625 if (IS_ERR(group
)) {
5626 ret
= PTR_ERR(group
);
5630 iommu_group_put(group
);
5632 domain
= iommu_get_domain_for_dev(dev
);
5633 dmar_domain
= to_dmar_domain(domain
);
5634 if (domain
->type
== IOMMU_DOMAIN_DMA
) {
5635 if (device_def_domain_type(dev
) == IOMMU_DOMAIN_IDENTITY
) {
5636 ret
= iommu_request_dm_for_dev(dev
);
5638 dmar_remove_one_dev_info(dev
);
5639 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
5640 domain_add_dev_info(si_domain
, dev
);
5642 "Device uses a private identity domain.\n");
5646 if (device_def_domain_type(dev
) == IOMMU_DOMAIN_DMA
) {
5647 ret
= iommu_request_dma_domain_for_dev(dev
);
5649 dmar_remove_one_dev_info(dev
);
5650 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
5651 if (!get_private_domain_for_dev(dev
)) {
5653 "Failed to get a private domain.\n");
5659 "Device uses a private dma domain.\n");
5664 if (device_needs_bounce(dev
)) {
5665 dev_info(dev
, "Use Intel IOMMU bounce page dma_ops\n");
5666 set_dma_ops(dev
, &bounce_dma_ops
);
5672 iommu_device_unlink(&iommu
->iommu
, dev
);
5676 static void intel_iommu_remove_device(struct device
*dev
)
5678 struct intel_iommu
*iommu
;
5681 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5685 dmar_remove_one_dev_info(dev
);
5687 iommu_group_remove_device(dev
);
5689 iommu_device_unlink(&iommu
->iommu
, dev
);
5691 if (device_needs_bounce(dev
))
5692 set_dma_ops(dev
, NULL
);
5695 static void intel_iommu_get_resv_regions(struct device
*device
,
5696 struct list_head
*head
)
5698 int prot
= DMA_PTE_READ
| DMA_PTE_WRITE
;
5699 struct iommu_resv_region
*reg
;
5700 struct dmar_rmrr_unit
*rmrr
;
5701 struct device
*i_dev
;
5704 down_read(&dmar_global_lock
);
5705 for_each_rmrr_units(rmrr
) {
5706 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
5708 struct iommu_resv_region
*resv
;
5709 enum iommu_resv_type type
;
5712 if (i_dev
!= device
&&
5713 !is_downstream_to_pci_bridge(device
, i_dev
))
5716 length
= rmrr
->end_address
- rmrr
->base_address
+ 1;
5718 type
= device_rmrr_is_relaxable(device
) ?
5719 IOMMU_RESV_DIRECT_RELAXABLE
: IOMMU_RESV_DIRECT
;
5721 resv
= iommu_alloc_resv_region(rmrr
->base_address
,
5722 length
, prot
, type
);
5726 list_add_tail(&resv
->list
, head
);
5729 up_read(&dmar_global_lock
);
5731 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5732 if (dev_is_pci(device
)) {
5733 struct pci_dev
*pdev
= to_pci_dev(device
);
5735 if ((pdev
->class >> 8) == PCI_CLASS_BRIDGE_ISA
) {
5736 reg
= iommu_alloc_resv_region(0, 1UL << 24, prot
,
5737 IOMMU_RESV_DIRECT_RELAXABLE
);
5739 list_add_tail(®
->list
, head
);
5742 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5744 reg
= iommu_alloc_resv_region(IOAPIC_RANGE_START
,
5745 IOAPIC_RANGE_END
- IOAPIC_RANGE_START
+ 1,
5749 list_add_tail(®
->list
, head
);
5752 static void intel_iommu_put_resv_regions(struct device
*dev
,
5753 struct list_head
*head
)
5755 struct iommu_resv_region
*entry
, *next
;
5757 list_for_each_entry_safe(entry
, next
, head
, list
)
5761 int intel_iommu_enable_pasid(struct intel_iommu
*iommu
, struct device
*dev
)
5763 struct device_domain_info
*info
;
5764 struct context_entry
*context
;
5765 struct dmar_domain
*domain
;
5766 unsigned long flags
;
5770 domain
= find_domain(dev
);
5774 spin_lock_irqsave(&device_domain_lock
, flags
);
5775 spin_lock(&iommu
->lock
);
5778 info
= dev
->archdata
.iommu
;
5779 if (!info
|| !info
->pasid_supported
)
5782 context
= iommu_context_addr(iommu
, info
->bus
, info
->devfn
, 0);
5783 if (WARN_ON(!context
))
5786 ctx_lo
= context
[0].lo
;
5788 if (!(ctx_lo
& CONTEXT_PASIDE
)) {
5789 ctx_lo
|= CONTEXT_PASIDE
;
5790 context
[0].lo
= ctx_lo
;
5792 iommu
->flush
.flush_context(iommu
,
5793 domain
->iommu_did
[iommu
->seq_id
],
5794 PCI_DEVID(info
->bus
, info
->devfn
),
5795 DMA_CCMD_MASK_NOBIT
,
5796 DMA_CCMD_DEVICE_INVL
);
5799 /* Enable PASID support in the device, if it wasn't already */
5800 if (!info
->pasid_enabled
)
5801 iommu_enable_dev_iotlb(info
);
5806 spin_unlock(&iommu
->lock
);
5807 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5812 static void intel_iommu_apply_resv_region(struct device
*dev
,
5813 struct iommu_domain
*domain
,
5814 struct iommu_resv_region
*region
)
5816 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5817 unsigned long start
, end
;
5819 start
= IOVA_PFN(region
->start
);
5820 end
= IOVA_PFN(region
->start
+ region
->length
- 1);
5822 WARN_ON_ONCE(!reserve_iova(&dmar_domain
->iovad
, start
, end
));
5825 static struct iommu_group
*intel_iommu_device_group(struct device
*dev
)
5827 if (dev_is_pci(dev
))
5828 return pci_device_group(dev
);
5829 return generic_device_group(dev
);
5832 #ifdef CONFIG_INTEL_IOMMU_SVM
5833 struct intel_iommu
*intel_svm_device_to_iommu(struct device
*dev
)
5835 struct intel_iommu
*iommu
;
5838 if (iommu_dummy(dev
)) {
5840 "No IOMMU translation for device; cannot enable SVM\n");
5844 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5846 dev_err(dev
, "No IOMMU for device; cannot enable SVM\n");
5852 #endif /* CONFIG_INTEL_IOMMU_SVM */
5854 static int intel_iommu_enable_auxd(struct device
*dev
)
5856 struct device_domain_info
*info
;
5857 struct intel_iommu
*iommu
;
5858 unsigned long flags
;
5862 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5863 if (!iommu
|| dmar_disabled
)
5866 if (!sm_supported(iommu
) || !pasid_supported(iommu
))
5869 ret
= intel_iommu_enable_pasid(iommu
, dev
);
5873 spin_lock_irqsave(&device_domain_lock
, flags
);
5874 info
= dev
->archdata
.iommu
;
5875 info
->auxd_enabled
= 1;
5876 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5881 static int intel_iommu_disable_auxd(struct device
*dev
)
5883 struct device_domain_info
*info
;
5884 unsigned long flags
;
5886 spin_lock_irqsave(&device_domain_lock
, flags
);
5887 info
= dev
->archdata
.iommu
;
5888 if (!WARN_ON(!info
))
5889 info
->auxd_enabled
= 0;
5890 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5896 * A PCI express designated vendor specific extended capability is defined
5897 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5898 * for system software and tools to detect endpoint devices supporting the
5899 * Intel scalable IO virtualization without host driver dependency.
5901 * Returns the address of the matching extended capability structure within
5902 * the device's PCI configuration space or 0 if the device does not support
5905 static int siov_find_pci_dvsec(struct pci_dev
*pdev
)
5910 pos
= pci_find_next_ext_capability(pdev
, 0, 0x23);
5912 pci_read_config_word(pdev
, pos
+ 4, &vendor
);
5913 pci_read_config_word(pdev
, pos
+ 8, &id
);
5914 if (vendor
== PCI_VENDOR_ID_INTEL
&& id
== 5)
5917 pos
= pci_find_next_ext_capability(pdev
, pos
, 0x23);
5924 intel_iommu_dev_has_feat(struct device
*dev
, enum iommu_dev_features feat
)
5926 if (feat
== IOMMU_DEV_FEAT_AUX
) {
5929 if (!dev_is_pci(dev
) || dmar_disabled
||
5930 !scalable_mode_support() || !iommu_pasid_support())
5933 ret
= pci_pasid_features(to_pci_dev(dev
));
5937 return !!siov_find_pci_dvsec(to_pci_dev(dev
));
5944 intel_iommu_dev_enable_feat(struct device
*dev
, enum iommu_dev_features feat
)
5946 if (feat
== IOMMU_DEV_FEAT_AUX
)
5947 return intel_iommu_enable_auxd(dev
);
5953 intel_iommu_dev_disable_feat(struct device
*dev
, enum iommu_dev_features feat
)
5955 if (feat
== IOMMU_DEV_FEAT_AUX
)
5956 return intel_iommu_disable_auxd(dev
);
5962 intel_iommu_dev_feat_enabled(struct device
*dev
, enum iommu_dev_features feat
)
5964 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5966 if (feat
== IOMMU_DEV_FEAT_AUX
)
5967 return scalable_mode_support() && info
&& info
->auxd_enabled
;
5973 intel_iommu_aux_get_pasid(struct iommu_domain
*domain
, struct device
*dev
)
5975 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5977 return dmar_domain
->default_pasid
> 0 ?
5978 dmar_domain
->default_pasid
: -EINVAL
;
5981 static bool intel_iommu_is_attach_deferred(struct iommu_domain
*domain
,
5984 return dev
->archdata
.iommu
== DEFER_DEVICE_DOMAIN_INFO
;
5987 const struct iommu_ops intel_iommu_ops
= {
5988 .capable
= intel_iommu_capable
,
5989 .domain_alloc
= intel_iommu_domain_alloc
,
5990 .domain_free
= intel_iommu_domain_free
,
5991 .attach_dev
= intel_iommu_attach_device
,
5992 .detach_dev
= intel_iommu_detach_device
,
5993 .aux_attach_dev
= intel_iommu_aux_attach_device
,
5994 .aux_detach_dev
= intel_iommu_aux_detach_device
,
5995 .aux_get_pasid
= intel_iommu_aux_get_pasid
,
5996 .map
= intel_iommu_map
,
5997 .unmap
= intel_iommu_unmap
,
5998 .iova_to_phys
= intel_iommu_iova_to_phys
,
5999 .add_device
= intel_iommu_add_device
,
6000 .remove_device
= intel_iommu_remove_device
,
6001 .get_resv_regions
= intel_iommu_get_resv_regions
,
6002 .put_resv_regions
= intel_iommu_put_resv_regions
,
6003 .apply_resv_region
= intel_iommu_apply_resv_region
,
6004 .device_group
= intel_iommu_device_group
,
6005 .dev_has_feat
= intel_iommu_dev_has_feat
,
6006 .dev_feat_enabled
= intel_iommu_dev_feat_enabled
,
6007 .dev_enable_feat
= intel_iommu_dev_enable_feat
,
6008 .dev_disable_feat
= intel_iommu_dev_disable_feat
,
6009 .is_attach_deferred
= intel_iommu_is_attach_deferred
,
6010 .pgsize_bitmap
= INTEL_IOMMU_PGSIZES
,
6013 static void quirk_iommu_igfx(struct pci_dev
*dev
)
6015 pci_info(dev
, "Disabling IOMMU for graphics on this chipset\n");
6019 /* G4x/GM45 integrated gfx dmar support is totally busted. */
6020 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_igfx
);
6021 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_igfx
);
6022 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_igfx
);
6023 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_igfx
);
6024 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_igfx
);
6025 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_igfx
);
6026 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_igfx
);
6028 /* Broadwell igfx malfunctions with dmar */
6029 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1606, quirk_iommu_igfx
);
6030 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160B, quirk_iommu_igfx
);
6031 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160E, quirk_iommu_igfx
);
6032 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1602, quirk_iommu_igfx
);
6033 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160A, quirk_iommu_igfx
);
6034 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160D, quirk_iommu_igfx
);
6035 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1616, quirk_iommu_igfx
);
6036 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161B, quirk_iommu_igfx
);
6037 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161E, quirk_iommu_igfx
);
6038 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1612, quirk_iommu_igfx
);
6039 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161A, quirk_iommu_igfx
);
6040 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161D, quirk_iommu_igfx
);
6041 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1626, quirk_iommu_igfx
);
6042 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162B, quirk_iommu_igfx
);
6043 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162E, quirk_iommu_igfx
);
6044 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1622, quirk_iommu_igfx
);
6045 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162A, quirk_iommu_igfx
);
6046 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162D, quirk_iommu_igfx
);
6047 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1636, quirk_iommu_igfx
);
6048 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163B, quirk_iommu_igfx
);
6049 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163E, quirk_iommu_igfx
);
6050 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1632, quirk_iommu_igfx
);
6051 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163A, quirk_iommu_igfx
);
6052 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163D, quirk_iommu_igfx
);
6054 static void quirk_iommu_rwbf(struct pci_dev
*dev
)
6057 * Mobile 4 Series Chipset neglects to set RWBF capability,
6058 * but needs it. Same seems to hold for the desktop versions.
6060 pci_info(dev
, "Forcing write-buffer flush capability\n");
6064 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_rwbf
);
6065 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_rwbf
);
6066 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_rwbf
);
6067 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_rwbf
);
6068 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_rwbf
);
6069 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_rwbf
);
6070 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_rwbf
);
6073 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
6074 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6075 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
6076 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
6077 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6078 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6079 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6080 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6082 static void quirk_calpella_no_shadow_gtt(struct pci_dev
*dev
)
6086 if (pci_read_config_word(dev
, GGC
, &ggc
))
6089 if (!(ggc
& GGC_MEMORY_VT_ENABLED
)) {
6090 pci_info(dev
, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6092 } else if (dmar_map_gfx
) {
6093 /* we have to ensure the gfx device is idle before we flush */
6094 pci_info(dev
, "Disabling batched IOTLB flush on Ironlake\n");
6095 intel_iommu_strict
= 1;
6098 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0040, quirk_calpella_no_shadow_gtt
);
6099 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0044, quirk_calpella_no_shadow_gtt
);
6100 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0062, quirk_calpella_no_shadow_gtt
);
6101 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x006a, quirk_calpella_no_shadow_gtt
);
6103 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6104 ISOCH DMAR unit for the Azalia sound device, but not give it any
6105 TLB entries, which causes it to deadlock. Check for that. We do
6106 this in a function called from init_dmars(), instead of in a PCI
6107 quirk, because we don't want to print the obnoxious "BIOS broken"
6108 message if VT-d is actually disabled.
6110 static void __init
check_tylersburg_isoch(void)
6112 struct pci_dev
*pdev
;
6113 uint32_t vtisochctrl
;
6115 /* If there's no Azalia in the system anyway, forget it. */
6116 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x3a3e, NULL
);
6121 /* System Management Registers. Might be hidden, in which case
6122 we can't do the sanity check. But that's OK, because the
6123 known-broken BIOSes _don't_ actually hide it, so far. */
6124 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x342e, NULL
);
6128 if (pci_read_config_dword(pdev
, 0x188, &vtisochctrl
)) {
6135 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6136 if (vtisochctrl
& 1)
6139 /* Drop all bits other than the number of TLB entries */
6140 vtisochctrl
&= 0x1c;
6142 /* If we have the recommended number of TLB entries (16), fine. */
6143 if (vtisochctrl
== 0x10)
6146 /* Zero TLB entries? You get to ride the short bus to school. */
6148 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6149 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6150 dmi_get_system_info(DMI_BIOS_VENDOR
),
6151 dmi_get_system_info(DMI_BIOS_VERSION
),
6152 dmi_get_system_info(DMI_PRODUCT_VERSION
));
6153 iommu_identity_mapping
|= IDENTMAP_AZALIA
;
6157 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",