1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-contiguous.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
50 #include "irq_remapping.h"
51 #include "intel-pasid.h"
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
84 /* page table handling */
85 #define LEVEL_STRIDE (9)
86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
104 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
106 static inline int agaw_to_level(int agaw
)
111 static inline int agaw_to_width(int agaw
)
113 return min_t(int, 30 + agaw
* LEVEL_STRIDE
, MAX_AGAW_WIDTH
);
116 static inline int width_to_agaw(int width
)
118 return DIV_ROUND_UP(width
- 30, LEVEL_STRIDE
);
121 static inline unsigned int level_to_offset_bits(int level
)
123 return (level
- 1) * LEVEL_STRIDE
;
126 static inline int pfn_level_offset(u64 pfn
, int level
)
128 return (pfn
>> level_to_offset_bits(level
)) & LEVEL_MASK
;
131 static inline u64
level_mask(int level
)
133 return -1ULL << level_to_offset_bits(level
);
136 static inline u64
level_size(int level
)
138 return 1ULL << level_to_offset_bits(level
);
141 static inline u64
align_to_level(u64 pfn
, int level
)
143 return (pfn
+ level_size(level
) - 1) & level_mask(level
);
146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl
)
148 return 1UL << min_t(int, (lvl
- 1) * LEVEL_STRIDE
, MAX_AGAW_PFN_WIDTH
);
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn
)
155 return dma_pfn
>> (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn
)
160 return mm_pfn
<< (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
162 static inline unsigned long page_to_dma_pfn(struct page
*pg
)
164 return mm_to_dma_pfn(page_to_pfn(pg
));
166 static inline unsigned long virt_to_dma_pfn(void *p
)
168 return page_to_dma_pfn(virt_to_page(p
));
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu
**g_iommus
;
174 static void __init
check_tylersburg_isoch(void);
175 static int rwbf_quirk
;
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
181 static int force_on
= 0;
182 int intel_iommu_tboot_noforce
;
183 static int no_platform_optin
;
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
191 static phys_addr_t
root_entry_lctp(struct root_entry
*re
)
196 return re
->lo
& VTD_PAGE_MASK
;
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
203 static phys_addr_t
root_entry_uctp(struct root_entry
*re
)
208 return re
->hi
& VTD_PAGE_MASK
;
211 static inline void context_clear_pasid_enable(struct context_entry
*context
)
213 context
->lo
&= ~(1ULL << 11);
216 static inline bool context_pasid_enabled(struct context_entry
*context
)
218 return !!(context
->lo
& (1ULL << 11));
221 static inline void context_set_copied(struct context_entry
*context
)
223 context
->hi
|= (1ull << 3);
226 static inline bool context_copied(struct context_entry
*context
)
228 return !!(context
->hi
& (1ULL << 3));
231 static inline bool __context_present(struct context_entry
*context
)
233 return (context
->lo
& 1);
236 bool context_present(struct context_entry
*context
)
238 return context_pasid_enabled(context
) ?
239 __context_present(context
) :
240 __context_present(context
) && !context_copied(context
);
243 static inline void context_set_present(struct context_entry
*context
)
248 static inline void context_set_fault_enable(struct context_entry
*context
)
250 context
->lo
&= (((u64
)-1) << 2) | 1;
253 static inline void context_set_translation_type(struct context_entry
*context
,
256 context
->lo
&= (((u64
)-1) << 4) | 3;
257 context
->lo
|= (value
& 3) << 2;
260 static inline void context_set_address_root(struct context_entry
*context
,
263 context
->lo
&= ~VTD_PAGE_MASK
;
264 context
->lo
|= value
& VTD_PAGE_MASK
;
267 static inline void context_set_address_width(struct context_entry
*context
,
270 context
->hi
|= value
& 7;
273 static inline void context_set_domain_id(struct context_entry
*context
,
276 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
279 static inline int context_domain_id(struct context_entry
*c
)
281 return((c
->hi
>> 8) & 0xffff);
284 static inline void context_clear_entry(struct context_entry
*context
)
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
296 static struct dmar_domain
*si_domain
;
297 static int hw_pass_through
= 1;
299 /* si_domain contains mulitple devices */
300 #define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
303 * This is a DMA domain allocated through the iommu domain allocation
304 * interface. But one or more devices belonging to this domain have
305 * been chosen to use a private domain. We should avoid to use the
306 * map/unmap/iova_to_phys APIs on it.
308 #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
310 #define for_each_domain_iommu(idx, domain) \
311 for (idx = 0; idx < g_num_of_iommus; idx++) \
312 if (domain->iommu_refcnt[idx])
314 struct dmar_rmrr_unit
{
315 struct list_head list
; /* list of rmrr units */
316 struct acpi_dmar_header
*hdr
; /* ACPI header */
317 u64 base_address
; /* reserved base address*/
318 u64 end_address
; /* reserved end address */
319 struct dmar_dev_scope
*devices
; /* target devices */
320 int devices_cnt
; /* target device count */
323 struct dmar_atsr_unit
{
324 struct list_head list
; /* list of ATSR units */
325 struct acpi_dmar_header
*hdr
; /* ACPI header */
326 struct dmar_dev_scope
*devices
; /* target devices */
327 int devices_cnt
; /* target device count */
328 u8 include_all
:1; /* include all ports */
331 static LIST_HEAD(dmar_atsr_units
);
332 static LIST_HEAD(dmar_rmrr_units
);
334 #define for_each_rmrr_units(rmrr) \
335 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
337 /* bitmap for indexing intel_iommus */
338 static int g_num_of_iommus
;
340 static void domain_exit(struct dmar_domain
*domain
);
341 static void domain_remove_dev_info(struct dmar_domain
*domain
);
342 static void dmar_remove_one_dev_info(struct device
*dev
);
343 static void __dmar_remove_one_dev_info(struct device_domain_info
*info
);
344 static void domain_context_clear(struct intel_iommu
*iommu
,
346 static int domain_detach_iommu(struct dmar_domain
*domain
,
347 struct intel_iommu
*iommu
);
348 static bool device_is_rmrr_locked(struct device
*dev
);
349 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
351 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
354 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
355 int dmar_disabled
= 0;
357 int dmar_disabled
= 1;
358 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
361 int intel_iommu_enabled
= 0;
362 EXPORT_SYMBOL_GPL(intel_iommu_enabled
);
364 static int dmar_map_gfx
= 1;
365 static int dmar_forcedac
;
366 static int intel_iommu_strict
;
367 static int intel_iommu_superpage
= 1;
368 static int iommu_identity_mapping
;
369 static int intel_no_bounce
;
371 #define IDENTMAP_ALL 1
372 #define IDENTMAP_GFX 2
373 #define IDENTMAP_AZALIA 4
375 int intel_iommu_gfx_mapped
;
376 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped
);
378 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
379 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
380 static DEFINE_SPINLOCK(device_domain_lock
);
381 static LIST_HEAD(device_domain_list
);
383 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
384 to_pci_dev(d)->untrusted)
387 * Iterate over elements in device_domain_list and call the specified
388 * callback @fn against each element.
390 int for_each_device_domain(int (*fn
)(struct device_domain_info
*info
,
391 void *data
), void *data
)
395 struct device_domain_info
*info
;
397 spin_lock_irqsave(&device_domain_lock
, flags
);
398 list_for_each_entry(info
, &device_domain_list
, global
) {
399 ret
= fn(info
, data
);
401 spin_unlock_irqrestore(&device_domain_lock
, flags
);
405 spin_unlock_irqrestore(&device_domain_lock
, flags
);
410 const struct iommu_ops intel_iommu_ops
;
412 static bool translation_pre_enabled(struct intel_iommu
*iommu
)
414 return (iommu
->flags
& VTD_FLAG_TRANS_PRE_ENABLED
);
417 static void clear_translation_pre_enabled(struct intel_iommu
*iommu
)
419 iommu
->flags
&= ~VTD_FLAG_TRANS_PRE_ENABLED
;
422 static void init_translation_status(struct intel_iommu
*iommu
)
426 gsts
= readl(iommu
->reg
+ DMAR_GSTS_REG
);
427 if (gsts
& DMA_GSTS_TES
)
428 iommu
->flags
|= VTD_FLAG_TRANS_PRE_ENABLED
;
431 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
432 static struct dmar_domain
*to_dmar_domain(struct iommu_domain
*dom
)
434 return container_of(dom
, struct dmar_domain
, domain
);
437 static int __init
intel_iommu_setup(char *str
)
442 if (!strncmp(str
, "on", 2)) {
444 pr_info("IOMMU enabled\n");
445 } else if (!strncmp(str
, "off", 3)) {
447 no_platform_optin
= 1;
448 pr_info("IOMMU disabled\n");
449 } else if (!strncmp(str
, "igfx_off", 8)) {
451 pr_info("Disable GFX device mapping\n");
452 } else if (!strncmp(str
, "forcedac", 8)) {
453 pr_info("Forcing DAC for PCI devices\n");
455 } else if (!strncmp(str
, "strict", 6)) {
456 pr_info("Disable batched IOTLB flush\n");
457 intel_iommu_strict
= 1;
458 } else if (!strncmp(str
, "sp_off", 6)) {
459 pr_info("Disable supported super page\n");
460 intel_iommu_superpage
= 0;
461 } else if (!strncmp(str
, "sm_on", 5)) {
462 pr_info("Intel-IOMMU: scalable mode supported\n");
464 } else if (!strncmp(str
, "tboot_noforce", 13)) {
466 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
467 intel_iommu_tboot_noforce
= 1;
468 } else if (!strncmp(str
, "nobounce", 8)) {
469 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
473 str
+= strcspn(str
, ",");
479 __setup("intel_iommu=", intel_iommu_setup
);
481 static struct kmem_cache
*iommu_domain_cache
;
482 static struct kmem_cache
*iommu_devinfo_cache
;
484 static struct dmar_domain
* get_iommu_domain(struct intel_iommu
*iommu
, u16 did
)
486 struct dmar_domain
**domains
;
489 domains
= iommu
->domains
[idx
];
493 return domains
[did
& 0xff];
496 static void set_iommu_domain(struct intel_iommu
*iommu
, u16 did
,
497 struct dmar_domain
*domain
)
499 struct dmar_domain
**domains
;
502 if (!iommu
->domains
[idx
]) {
503 size_t size
= 256 * sizeof(struct dmar_domain
*);
504 iommu
->domains
[idx
] = kzalloc(size
, GFP_ATOMIC
);
507 domains
= iommu
->domains
[idx
];
508 if (WARN_ON(!domains
))
511 domains
[did
& 0xff] = domain
;
514 void *alloc_pgtable_page(int node
)
519 page
= alloc_pages_node(node
, GFP_ATOMIC
| __GFP_ZERO
, 0);
521 vaddr
= page_address(page
);
525 void free_pgtable_page(void *vaddr
)
527 free_page((unsigned long)vaddr
);
530 static inline void *alloc_domain_mem(void)
532 return kmem_cache_alloc(iommu_domain_cache
, GFP_ATOMIC
);
535 static void free_domain_mem(void *vaddr
)
537 kmem_cache_free(iommu_domain_cache
, vaddr
);
540 static inline void * alloc_devinfo_mem(void)
542 return kmem_cache_alloc(iommu_devinfo_cache
, GFP_ATOMIC
);
545 static inline void free_devinfo_mem(void *vaddr
)
547 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
550 static inline int domain_type_is_si(struct dmar_domain
*domain
)
552 return domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
;
555 static inline int domain_pfn_supported(struct dmar_domain
*domain
,
558 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
560 return !(addr_width
< BITS_PER_LONG
&& pfn
>> addr_width
);
563 static int __iommu_calculate_agaw(struct intel_iommu
*iommu
, int max_gaw
)
568 sagaw
= cap_sagaw(iommu
->cap
);
569 for (agaw
= width_to_agaw(max_gaw
);
571 if (test_bit(agaw
, &sagaw
))
579 * Calculate max SAGAW for each iommu.
581 int iommu_calculate_max_sagaw(struct intel_iommu
*iommu
)
583 return __iommu_calculate_agaw(iommu
, MAX_AGAW_WIDTH
);
587 * calculate agaw for each iommu.
588 * "SAGAW" may be different across iommus, use a default agaw, and
589 * get a supported less agaw for iommus that don't support the default agaw.
591 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
593 return __iommu_calculate_agaw(iommu
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
596 /* This functionin only returns single iommu in a domain */
597 struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
601 /* si_domain and vm domain should not get here. */
602 if (WARN_ON(domain
->domain
.type
!= IOMMU_DOMAIN_DMA
))
605 for_each_domain_iommu(iommu_id
, domain
)
608 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
611 return g_iommus
[iommu_id
];
614 static inline bool iommu_paging_structure_coherency(struct intel_iommu
*iommu
)
616 return sm_supported(iommu
) ?
617 ecap_smpwc(iommu
->ecap
) : ecap_coherent(iommu
->ecap
);
620 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
622 struct dmar_drhd_unit
*drhd
;
623 struct intel_iommu
*iommu
;
627 domain
->iommu_coherency
= 1;
629 for_each_domain_iommu(i
, domain
) {
631 if (!iommu_paging_structure_coherency(g_iommus
[i
])) {
632 domain
->iommu_coherency
= 0;
639 /* No hardware attached; use lowest common denominator */
641 for_each_active_iommu(iommu
, drhd
) {
642 if (!iommu_paging_structure_coherency(iommu
)) {
643 domain
->iommu_coherency
= 0;
650 static int domain_update_iommu_snooping(struct intel_iommu
*skip
)
652 struct dmar_drhd_unit
*drhd
;
653 struct intel_iommu
*iommu
;
657 for_each_active_iommu(iommu
, drhd
) {
659 if (!ecap_sc_support(iommu
->ecap
)) {
670 static int domain_update_iommu_superpage(struct intel_iommu
*skip
)
672 struct dmar_drhd_unit
*drhd
;
673 struct intel_iommu
*iommu
;
676 if (!intel_iommu_superpage
) {
680 /* set iommu_superpage to the smallest common denominator */
682 for_each_active_iommu(iommu
, drhd
) {
684 mask
&= cap_super_page_val(iommu
->cap
);
694 /* Some capabilities may be different across iommus */
695 static void domain_update_iommu_cap(struct dmar_domain
*domain
)
697 domain_update_iommu_coherency(domain
);
698 domain
->iommu_snooping
= domain_update_iommu_snooping(NULL
);
699 domain
->iommu_superpage
= domain_update_iommu_superpage(NULL
);
702 struct context_entry
*iommu_context_addr(struct intel_iommu
*iommu
, u8 bus
,
705 struct root_entry
*root
= &iommu
->root_entry
[bus
];
706 struct context_entry
*context
;
710 if (sm_supported(iommu
)) {
718 context
= phys_to_virt(*entry
& VTD_PAGE_MASK
);
720 unsigned long phy_addr
;
724 context
= alloc_pgtable_page(iommu
->node
);
728 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
729 phy_addr
= virt_to_phys((void *)context
);
730 *entry
= phy_addr
| 1;
731 __iommu_flush_cache(iommu
, entry
, sizeof(*entry
));
733 return &context
[devfn
];
736 static int iommu_dummy(struct device
*dev
)
738 return dev
->archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
;
742 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
743 * sub-hierarchy of a candidate PCI-PCI bridge
744 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
745 * @bridge: the candidate PCI-PCI bridge
747 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
750 is_downstream_to_pci_bridge(struct device
*dev
, struct device
*bridge
)
752 struct pci_dev
*pdev
, *pbridge
;
754 if (!dev_is_pci(dev
) || !dev_is_pci(bridge
))
757 pdev
= to_pci_dev(dev
);
758 pbridge
= to_pci_dev(bridge
);
760 if (pbridge
->subordinate
&&
761 pbridge
->subordinate
->number
<= pdev
->bus
->number
&&
762 pbridge
->subordinate
->busn_res
.end
>= pdev
->bus
->number
)
768 static struct intel_iommu
*device_to_iommu(struct device
*dev
, u8
*bus
, u8
*devfn
)
770 struct dmar_drhd_unit
*drhd
= NULL
;
771 struct intel_iommu
*iommu
;
773 struct pci_dev
*pdev
= NULL
;
777 if (iommu_dummy(dev
))
780 if (dev_is_pci(dev
)) {
781 struct pci_dev
*pf_pdev
;
783 pdev
= to_pci_dev(dev
);
786 /* VMD child devices currently cannot be handled individually */
787 if (is_vmd(pdev
->bus
))
791 /* VFs aren't listed in scope tables; we need to look up
792 * the PF instead to find the IOMMU. */
793 pf_pdev
= pci_physfn(pdev
);
795 segment
= pci_domain_nr(pdev
->bus
);
796 } else if (has_acpi_companion(dev
))
797 dev
= &ACPI_COMPANION(dev
)->dev
;
800 for_each_active_iommu(iommu
, drhd
) {
801 if (pdev
&& segment
!= drhd
->segment
)
804 for_each_active_dev_scope(drhd
->devices
,
805 drhd
->devices_cnt
, i
, tmp
) {
807 /* For a VF use its original BDF# not that of the PF
808 * which we used for the IOMMU lookup. Strictly speaking
809 * we could do this for all PCI devices; we only need to
810 * get the BDF# from the scope table for ACPI matches. */
811 if (pdev
&& pdev
->is_virtfn
)
814 *bus
= drhd
->devices
[i
].bus
;
815 *devfn
= drhd
->devices
[i
].devfn
;
819 if (is_downstream_to_pci_bridge(dev
, tmp
))
823 if (pdev
&& drhd
->include_all
) {
825 *bus
= pdev
->bus
->number
;
826 *devfn
= pdev
->devfn
;
837 static void domain_flush_cache(struct dmar_domain
*domain
,
838 void *addr
, int size
)
840 if (!domain
->iommu_coherency
)
841 clflush_cache_range(addr
, size
);
844 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
846 struct context_entry
*context
;
850 spin_lock_irqsave(&iommu
->lock
, flags
);
851 context
= iommu_context_addr(iommu
, bus
, devfn
, 0);
853 ret
= context_present(context
);
854 spin_unlock_irqrestore(&iommu
->lock
, flags
);
858 static void free_context_table(struct intel_iommu
*iommu
)
862 struct context_entry
*context
;
864 spin_lock_irqsave(&iommu
->lock
, flags
);
865 if (!iommu
->root_entry
) {
868 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
869 context
= iommu_context_addr(iommu
, i
, 0, 0);
871 free_pgtable_page(context
);
873 if (!sm_supported(iommu
))
876 context
= iommu_context_addr(iommu
, i
, 0x80, 0);
878 free_pgtable_page(context
);
881 free_pgtable_page(iommu
->root_entry
);
882 iommu
->root_entry
= NULL
;
884 spin_unlock_irqrestore(&iommu
->lock
, flags
);
887 static struct dma_pte
*pfn_to_dma_pte(struct dmar_domain
*domain
,
888 unsigned long pfn
, int *target_level
)
890 struct dma_pte
*parent
, *pte
;
891 int level
= agaw_to_level(domain
->agaw
);
894 BUG_ON(!domain
->pgd
);
896 if (!domain_pfn_supported(domain
, pfn
))
897 /* Address beyond IOMMU's addressing capabilities. */
900 parent
= domain
->pgd
;
905 offset
= pfn_level_offset(pfn
, level
);
906 pte
= &parent
[offset
];
907 if (!*target_level
&& (dma_pte_superpage(pte
) || !dma_pte_present(pte
)))
909 if (level
== *target_level
)
912 if (!dma_pte_present(pte
)) {
915 tmp_page
= alloc_pgtable_page(domain
->nid
);
920 domain_flush_cache(domain
, tmp_page
, VTD_PAGE_SIZE
);
921 pteval
= ((uint64_t)virt_to_dma_pfn(tmp_page
) << VTD_PAGE_SHIFT
) | DMA_PTE_READ
| DMA_PTE_WRITE
;
922 if (cmpxchg64(&pte
->val
, 0ULL, pteval
))
923 /* Someone else set it while we were thinking; use theirs. */
924 free_pgtable_page(tmp_page
);
926 domain_flush_cache(domain
, pte
, sizeof(*pte
));
931 parent
= phys_to_virt(dma_pte_addr(pte
));
936 *target_level
= level
;
941 /* return address's pte at specific level */
942 static struct dma_pte
*dma_pfn_level_pte(struct dmar_domain
*domain
,
944 int level
, int *large_page
)
946 struct dma_pte
*parent
, *pte
;
947 int total
= agaw_to_level(domain
->agaw
);
950 parent
= domain
->pgd
;
951 while (level
<= total
) {
952 offset
= pfn_level_offset(pfn
, total
);
953 pte
= &parent
[offset
];
957 if (!dma_pte_present(pte
)) {
962 if (dma_pte_superpage(pte
)) {
967 parent
= phys_to_virt(dma_pte_addr(pte
));
973 /* clear last level pte, a tlb flush should be followed */
974 static void dma_pte_clear_range(struct dmar_domain
*domain
,
975 unsigned long start_pfn
,
976 unsigned long last_pfn
)
978 unsigned int large_page
;
979 struct dma_pte
*first_pte
, *pte
;
981 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
982 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
983 BUG_ON(start_pfn
> last_pfn
);
985 /* we don't need lock here; nobody else touches the iova range */
988 first_pte
= pte
= dma_pfn_level_pte(domain
, start_pfn
, 1, &large_page
);
990 start_pfn
= align_to_level(start_pfn
+ 1, large_page
+ 1);
995 start_pfn
+= lvl_to_nr_pages(large_page
);
997 } while (start_pfn
<= last_pfn
&& !first_pte_in_page(pte
));
999 domain_flush_cache(domain
, first_pte
,
1000 (void *)pte
- (void *)first_pte
);
1002 } while (start_pfn
&& start_pfn
<= last_pfn
);
1005 static void dma_pte_free_level(struct dmar_domain
*domain
, int level
,
1006 int retain_level
, struct dma_pte
*pte
,
1007 unsigned long pfn
, unsigned long start_pfn
,
1008 unsigned long last_pfn
)
1010 pfn
= max(start_pfn
, pfn
);
1011 pte
= &pte
[pfn_level_offset(pfn
, level
)];
1014 unsigned long level_pfn
;
1015 struct dma_pte
*level_pte
;
1017 if (!dma_pte_present(pte
) || dma_pte_superpage(pte
))
1020 level_pfn
= pfn
& level_mask(level
);
1021 level_pte
= phys_to_virt(dma_pte_addr(pte
));
1024 dma_pte_free_level(domain
, level
- 1, retain_level
,
1025 level_pte
, level_pfn
, start_pfn
,
1030 * Free the page table if we're below the level we want to
1031 * retain and the range covers the entire table.
1033 if (level
< retain_level
&& !(start_pfn
> level_pfn
||
1034 last_pfn
< level_pfn
+ level_size(level
) - 1)) {
1036 domain_flush_cache(domain
, pte
, sizeof(*pte
));
1037 free_pgtable_page(level_pte
);
1040 pfn
+= level_size(level
);
1041 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
1045 * clear last level (leaf) ptes and free page table pages below the
1046 * level we wish to keep intact.
1048 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
1049 unsigned long start_pfn
,
1050 unsigned long last_pfn
,
1053 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
1054 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
1055 BUG_ON(start_pfn
> last_pfn
);
1057 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
1059 /* We don't need lock here; nobody else touches the iova range */
1060 dma_pte_free_level(domain
, agaw_to_level(domain
->agaw
), retain_level
,
1061 domain
->pgd
, 0, start_pfn
, last_pfn
);
1064 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1065 free_pgtable_page(domain
->pgd
);
1070 /* When a page at a given level is being unlinked from its parent, we don't
1071 need to *modify* it at all. All we need to do is make a list of all the
1072 pages which can be freed just as soon as we've flushed the IOTLB and we
1073 know the hardware page-walk will no longer touch them.
1074 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1076 static struct page
*dma_pte_list_pagetables(struct dmar_domain
*domain
,
1077 int level
, struct dma_pte
*pte
,
1078 struct page
*freelist
)
1082 pg
= pfn_to_page(dma_pte_addr(pte
) >> PAGE_SHIFT
);
1083 pg
->freelist
= freelist
;
1089 pte
= page_address(pg
);
1091 if (dma_pte_present(pte
) && !dma_pte_superpage(pte
))
1092 freelist
= dma_pte_list_pagetables(domain
, level
- 1,
1095 } while (!first_pte_in_page(pte
));
1100 static struct page
*dma_pte_clear_level(struct dmar_domain
*domain
, int level
,
1101 struct dma_pte
*pte
, unsigned long pfn
,
1102 unsigned long start_pfn
,
1103 unsigned long last_pfn
,
1104 struct page
*freelist
)
1106 struct dma_pte
*first_pte
= NULL
, *last_pte
= NULL
;
1108 pfn
= max(start_pfn
, pfn
);
1109 pte
= &pte
[pfn_level_offset(pfn
, level
)];
1112 unsigned long level_pfn
;
1114 if (!dma_pte_present(pte
))
1117 level_pfn
= pfn
& level_mask(level
);
1119 /* If range covers entire pagetable, free it */
1120 if (start_pfn
<= level_pfn
&&
1121 last_pfn
>= level_pfn
+ level_size(level
) - 1) {
1122 /* These suborbinate page tables are going away entirely. Don't
1123 bother to clear them; we're just going to *free* them. */
1124 if (level
> 1 && !dma_pte_superpage(pte
))
1125 freelist
= dma_pte_list_pagetables(domain
, level
- 1, pte
, freelist
);
1131 } else if (level
> 1) {
1132 /* Recurse down into a level that isn't *entirely* obsolete */
1133 freelist
= dma_pte_clear_level(domain
, level
- 1,
1134 phys_to_virt(dma_pte_addr(pte
)),
1135 level_pfn
, start_pfn
, last_pfn
,
1139 pfn
+= level_size(level
);
1140 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
1143 domain_flush_cache(domain
, first_pte
,
1144 (void *)++last_pte
- (void *)first_pte
);
1149 /* We can't just free the pages because the IOMMU may still be walking
1150 the page tables, and may have cached the intermediate levels. The
1151 pages can only be freed after the IOTLB flush has been done. */
1152 static struct page
*domain_unmap(struct dmar_domain
*domain
,
1153 unsigned long start_pfn
,
1154 unsigned long last_pfn
)
1156 struct page
*freelist
;
1158 BUG_ON(!domain_pfn_supported(domain
, start_pfn
));
1159 BUG_ON(!domain_pfn_supported(domain
, last_pfn
));
1160 BUG_ON(start_pfn
> last_pfn
);
1162 /* we don't need lock here; nobody else touches the iova range */
1163 freelist
= dma_pte_clear_level(domain
, agaw_to_level(domain
->agaw
),
1164 domain
->pgd
, 0, start_pfn
, last_pfn
, NULL
);
1167 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1168 struct page
*pgd_page
= virt_to_page(domain
->pgd
);
1169 pgd_page
->freelist
= freelist
;
1170 freelist
= pgd_page
;
1178 static void dma_free_pagelist(struct page
*freelist
)
1182 while ((pg
= freelist
)) {
1183 freelist
= pg
->freelist
;
1184 free_pgtable_page(page_address(pg
));
1188 static void iova_entry_free(unsigned long data
)
1190 struct page
*freelist
= (struct page
*)data
;
1192 dma_free_pagelist(freelist
);
1195 /* iommu handling */
1196 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
1198 struct root_entry
*root
;
1199 unsigned long flags
;
1201 root
= (struct root_entry
*)alloc_pgtable_page(iommu
->node
);
1203 pr_err("Allocating root entry for %s failed\n",
1208 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
1210 spin_lock_irqsave(&iommu
->lock
, flags
);
1211 iommu
->root_entry
= root
;
1212 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1217 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
1223 addr
= virt_to_phys(iommu
->root_entry
);
1224 if (sm_supported(iommu
))
1225 addr
|= DMA_RTADDR_SMT
;
1227 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1228 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, addr
);
1230 writel(iommu
->gcmd
| DMA_GCMD_SRTP
, iommu
->reg
+ DMAR_GCMD_REG
);
1232 /* Make sure hardware complete it */
1233 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1234 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
1236 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1239 void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
1244 if (!rwbf_quirk
&& !cap_rwbf(iommu
->cap
))
1247 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1248 writel(iommu
->gcmd
| DMA_GCMD_WBF
, iommu
->reg
+ DMAR_GCMD_REG
);
1250 /* Make sure hardware complete it */
1251 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1252 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
1254 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1257 /* return value determine if we need a write buffer flush */
1258 static void __iommu_flush_context(struct intel_iommu
*iommu
,
1259 u16 did
, u16 source_id
, u8 function_mask
,
1266 case DMA_CCMD_GLOBAL_INVL
:
1267 val
= DMA_CCMD_GLOBAL_INVL
;
1269 case DMA_CCMD_DOMAIN_INVL
:
1270 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
1272 case DMA_CCMD_DEVICE_INVL
:
1273 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
1274 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
1279 val
|= DMA_CCMD_ICC
;
1281 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1282 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
1284 /* Make sure hardware complete it */
1285 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
1286 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
1288 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1291 /* return value determine if we need a write buffer flush */
1292 static void __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
1293 u64 addr
, unsigned int size_order
, u64 type
)
1295 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
1296 u64 val
= 0, val_iva
= 0;
1300 case DMA_TLB_GLOBAL_FLUSH
:
1301 /* global flush doesn't need set IVA_REG */
1302 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
1304 case DMA_TLB_DSI_FLUSH
:
1305 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1307 case DMA_TLB_PSI_FLUSH
:
1308 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1309 /* IH bit is passed in as part of address */
1310 val_iva
= size_order
| addr
;
1315 /* Note: set drain read/write */
1318 * This is probably to be super secure.. Looks like we can
1319 * ignore it without any impact.
1321 if (cap_read_drain(iommu
->cap
))
1322 val
|= DMA_TLB_READ_DRAIN
;
1324 if (cap_write_drain(iommu
->cap
))
1325 val
|= DMA_TLB_WRITE_DRAIN
;
1327 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1328 /* Note: Only uses first TLB reg currently */
1330 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
1331 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
1333 /* Make sure hardware complete it */
1334 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
1335 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
1337 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1339 /* check IOTLB invalidation granularity */
1340 if (DMA_TLB_IAIG(val
) == 0)
1341 pr_err("Flush IOTLB failed\n");
1342 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
1343 pr_debug("TLB flush request %Lx, actual %Lx\n",
1344 (unsigned long long)DMA_TLB_IIRG(type
),
1345 (unsigned long long)DMA_TLB_IAIG(val
));
1348 static struct device_domain_info
*
1349 iommu_support_dev_iotlb (struct dmar_domain
*domain
, struct intel_iommu
*iommu
,
1352 struct device_domain_info
*info
;
1354 assert_spin_locked(&device_domain_lock
);
1359 list_for_each_entry(info
, &domain
->devices
, link
)
1360 if (info
->iommu
== iommu
&& info
->bus
== bus
&&
1361 info
->devfn
== devfn
) {
1362 if (info
->ats_supported
&& info
->dev
)
1370 static void domain_update_iotlb(struct dmar_domain
*domain
)
1372 struct device_domain_info
*info
;
1373 bool has_iotlb_device
= false;
1375 assert_spin_locked(&device_domain_lock
);
1377 list_for_each_entry(info
, &domain
->devices
, link
) {
1378 struct pci_dev
*pdev
;
1380 if (!info
->dev
|| !dev_is_pci(info
->dev
))
1383 pdev
= to_pci_dev(info
->dev
);
1384 if (pdev
->ats_enabled
) {
1385 has_iotlb_device
= true;
1390 domain
->has_iotlb_device
= has_iotlb_device
;
1393 static void iommu_enable_dev_iotlb(struct device_domain_info
*info
)
1395 struct pci_dev
*pdev
;
1397 assert_spin_locked(&device_domain_lock
);
1399 if (!info
|| !dev_is_pci(info
->dev
))
1402 pdev
= to_pci_dev(info
->dev
);
1403 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1404 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1405 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1406 * reserved, which should be set to 0.
1408 if (!ecap_dit(info
->iommu
->ecap
))
1411 struct pci_dev
*pf_pdev
;
1413 /* pdev will be returned if device is not a vf */
1414 pf_pdev
= pci_physfn(pdev
);
1415 info
->pfsid
= pci_dev_id(pf_pdev
);
1418 #ifdef CONFIG_INTEL_IOMMU_SVM
1419 /* The PCIe spec, in its wisdom, declares that the behaviour of
1420 the device if you enable PASID support after ATS support is
1421 undefined. So always enable PASID support on devices which
1422 have it, even if we can't yet know if we're ever going to
1424 if (info
->pasid_supported
&& !pci_enable_pasid(pdev
, info
->pasid_supported
& ~1))
1425 info
->pasid_enabled
= 1;
1427 if (info
->pri_supported
&&
1428 (info
->pasid_enabled
? pci_prg_resp_pasid_required(pdev
) : 1) &&
1429 !pci_reset_pri(pdev
) && !pci_enable_pri(pdev
, 32))
1430 info
->pri_enabled
= 1;
1432 if (!pdev
->untrusted
&& info
->ats_supported
&&
1433 pci_ats_page_aligned(pdev
) &&
1434 !pci_enable_ats(pdev
, VTD_PAGE_SHIFT
)) {
1435 info
->ats_enabled
= 1;
1436 domain_update_iotlb(info
->domain
);
1437 info
->ats_qdep
= pci_ats_queue_depth(pdev
);
1441 static void iommu_disable_dev_iotlb(struct device_domain_info
*info
)
1443 struct pci_dev
*pdev
;
1445 assert_spin_locked(&device_domain_lock
);
1447 if (!dev_is_pci(info
->dev
))
1450 pdev
= to_pci_dev(info
->dev
);
1452 if (info
->ats_enabled
) {
1453 pci_disable_ats(pdev
);
1454 info
->ats_enabled
= 0;
1455 domain_update_iotlb(info
->domain
);
1457 #ifdef CONFIG_INTEL_IOMMU_SVM
1458 if (info
->pri_enabled
) {
1459 pci_disable_pri(pdev
);
1460 info
->pri_enabled
= 0;
1462 if (info
->pasid_enabled
) {
1463 pci_disable_pasid(pdev
);
1464 info
->pasid_enabled
= 0;
1469 static void iommu_flush_dev_iotlb(struct dmar_domain
*domain
,
1470 u64 addr
, unsigned mask
)
1473 unsigned long flags
;
1474 struct device_domain_info
*info
;
1476 if (!domain
->has_iotlb_device
)
1479 spin_lock_irqsave(&device_domain_lock
, flags
);
1480 list_for_each_entry(info
, &domain
->devices
, link
) {
1481 if (!info
->ats_enabled
)
1484 sid
= info
->bus
<< 8 | info
->devfn
;
1485 qdep
= info
->ats_qdep
;
1486 qi_flush_dev_iotlb(info
->iommu
, sid
, info
->pfsid
,
1489 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1492 static void iommu_flush_iotlb_psi(struct intel_iommu
*iommu
,
1493 struct dmar_domain
*domain
,
1494 unsigned long pfn
, unsigned int pages
,
1497 unsigned int mask
= ilog2(__roundup_pow_of_two(pages
));
1498 uint64_t addr
= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
1499 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1506 * Fallback to domain selective flush if no PSI support or the size is
1508 * PSI requires page size to be 2 ^ x, and the base address is naturally
1509 * aligned to the size
1511 if (!cap_pgsel_inv(iommu
->cap
) || mask
> cap_max_amask_val(iommu
->cap
))
1512 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1515 iommu
->flush
.flush_iotlb(iommu
, did
, addr
| ih
, mask
,
1519 * In caching mode, changes of pages from non-present to present require
1520 * flush. However, device IOTLB doesn't need to be flushed in this case.
1522 if (!cap_caching_mode(iommu
->cap
) || !map
)
1523 iommu_flush_dev_iotlb(domain
, addr
, mask
);
1526 /* Notification for newly created mappings */
1527 static inline void __mapping_notify_one(struct intel_iommu
*iommu
,
1528 struct dmar_domain
*domain
,
1529 unsigned long pfn
, unsigned int pages
)
1531 /* It's a non-present to present mapping. Only flush if caching mode */
1532 if (cap_caching_mode(iommu
->cap
))
1533 iommu_flush_iotlb_psi(iommu
, domain
, pfn
, pages
, 0, 1);
1535 iommu_flush_write_buffer(iommu
);
1538 static void iommu_flush_iova(struct iova_domain
*iovad
)
1540 struct dmar_domain
*domain
;
1543 domain
= container_of(iovad
, struct dmar_domain
, iovad
);
1545 for_each_domain_iommu(idx
, domain
) {
1546 struct intel_iommu
*iommu
= g_iommus
[idx
];
1547 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1549 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
1551 if (!cap_caching_mode(iommu
->cap
))
1552 iommu_flush_dev_iotlb(get_iommu_domain(iommu
, did
),
1553 0, MAX_AGAW_PFN_WIDTH
);
1557 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
1560 unsigned long flags
;
1562 if (!cap_plmr(iommu
->cap
) && !cap_phmr(iommu
->cap
))
1565 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1566 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
1567 pmen
&= ~DMA_PMEN_EPM
;
1568 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
1570 /* wait for the protected region status bit to clear */
1571 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
1572 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
1574 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1577 static void iommu_enable_translation(struct intel_iommu
*iommu
)
1580 unsigned long flags
;
1582 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1583 iommu
->gcmd
|= DMA_GCMD_TE
;
1584 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1586 /* Make sure hardware complete it */
1587 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1588 readl
, (sts
& DMA_GSTS_TES
), sts
);
1590 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1593 static void iommu_disable_translation(struct intel_iommu
*iommu
)
1598 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1599 iommu
->gcmd
&= ~DMA_GCMD_TE
;
1600 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1602 /* Make sure hardware complete it */
1603 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1604 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
1606 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1609 static int iommu_init_domains(struct intel_iommu
*iommu
)
1611 u32 ndomains
, nlongs
;
1614 ndomains
= cap_ndoms(iommu
->cap
);
1615 pr_debug("%s: Number of Domains supported <%d>\n",
1616 iommu
->name
, ndomains
);
1617 nlongs
= BITS_TO_LONGS(ndomains
);
1619 spin_lock_init(&iommu
->lock
);
1621 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1622 if (!iommu
->domain_ids
) {
1623 pr_err("%s: Allocating domain id array failed\n",
1628 size
= (ALIGN(ndomains
, 256) >> 8) * sizeof(struct dmar_domain
**);
1629 iommu
->domains
= kzalloc(size
, GFP_KERNEL
);
1631 if (iommu
->domains
) {
1632 size
= 256 * sizeof(struct dmar_domain
*);
1633 iommu
->domains
[0] = kzalloc(size
, GFP_KERNEL
);
1636 if (!iommu
->domains
|| !iommu
->domains
[0]) {
1637 pr_err("%s: Allocating domain array failed\n",
1639 kfree(iommu
->domain_ids
);
1640 kfree(iommu
->domains
);
1641 iommu
->domain_ids
= NULL
;
1642 iommu
->domains
= NULL
;
1647 * If Caching mode is set, then invalid translations are tagged
1648 * with domain-id 0, hence we need to pre-allocate it. We also
1649 * use domain-id 0 as a marker for non-allocated domain-id, so
1650 * make sure it is not used for a real domain.
1652 set_bit(0, iommu
->domain_ids
);
1655 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1656 * entry for first-level or pass-through translation modes should
1657 * be programmed with a domain id different from those used for
1658 * second-level or nested translation. We reserve a domain id for
1661 if (sm_supported(iommu
))
1662 set_bit(FLPT_DEFAULT_DID
, iommu
->domain_ids
);
1667 static void disable_dmar_iommu(struct intel_iommu
*iommu
)
1669 struct device_domain_info
*info
, *tmp
;
1670 unsigned long flags
;
1672 if (!iommu
->domains
|| !iommu
->domain_ids
)
1675 spin_lock_irqsave(&device_domain_lock
, flags
);
1676 list_for_each_entry_safe(info
, tmp
, &device_domain_list
, global
) {
1677 if (info
->iommu
!= iommu
)
1680 if (!info
->dev
|| !info
->domain
)
1683 __dmar_remove_one_dev_info(info
);
1685 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1687 if (iommu
->gcmd
& DMA_GCMD_TE
)
1688 iommu_disable_translation(iommu
);
1691 static void free_dmar_iommu(struct intel_iommu
*iommu
)
1693 if ((iommu
->domains
) && (iommu
->domain_ids
)) {
1694 int elems
= ALIGN(cap_ndoms(iommu
->cap
), 256) >> 8;
1697 for (i
= 0; i
< elems
; i
++)
1698 kfree(iommu
->domains
[i
]);
1699 kfree(iommu
->domains
);
1700 kfree(iommu
->domain_ids
);
1701 iommu
->domains
= NULL
;
1702 iommu
->domain_ids
= NULL
;
1705 g_iommus
[iommu
->seq_id
] = NULL
;
1707 /* free context mapping */
1708 free_context_table(iommu
);
1710 #ifdef CONFIG_INTEL_IOMMU_SVM
1711 if (pasid_supported(iommu
)) {
1712 if (ecap_prs(iommu
->ecap
))
1713 intel_svm_finish_prq(iommu
);
1718 static struct dmar_domain
*alloc_domain(int flags
)
1720 struct dmar_domain
*domain
;
1722 domain
= alloc_domain_mem();
1726 memset(domain
, 0, sizeof(*domain
));
1727 domain
->nid
= NUMA_NO_NODE
;
1728 domain
->flags
= flags
;
1729 domain
->has_iotlb_device
= false;
1730 INIT_LIST_HEAD(&domain
->devices
);
1735 /* Must be called with iommu->lock */
1736 static int domain_attach_iommu(struct dmar_domain
*domain
,
1737 struct intel_iommu
*iommu
)
1739 unsigned long ndomains
;
1742 assert_spin_locked(&device_domain_lock
);
1743 assert_spin_locked(&iommu
->lock
);
1745 domain
->iommu_refcnt
[iommu
->seq_id
] += 1;
1746 domain
->iommu_count
+= 1;
1747 if (domain
->iommu_refcnt
[iommu
->seq_id
] == 1) {
1748 ndomains
= cap_ndoms(iommu
->cap
);
1749 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1751 if (num
>= ndomains
) {
1752 pr_err("%s: No free domain ids\n", iommu
->name
);
1753 domain
->iommu_refcnt
[iommu
->seq_id
] -= 1;
1754 domain
->iommu_count
-= 1;
1758 set_bit(num
, iommu
->domain_ids
);
1759 set_iommu_domain(iommu
, num
, domain
);
1761 domain
->iommu_did
[iommu
->seq_id
] = num
;
1762 domain
->nid
= iommu
->node
;
1764 domain_update_iommu_cap(domain
);
1770 static int domain_detach_iommu(struct dmar_domain
*domain
,
1771 struct intel_iommu
*iommu
)
1775 assert_spin_locked(&device_domain_lock
);
1776 assert_spin_locked(&iommu
->lock
);
1778 domain
->iommu_refcnt
[iommu
->seq_id
] -= 1;
1779 count
= --domain
->iommu_count
;
1780 if (domain
->iommu_refcnt
[iommu
->seq_id
] == 0) {
1781 num
= domain
->iommu_did
[iommu
->seq_id
];
1782 clear_bit(num
, iommu
->domain_ids
);
1783 set_iommu_domain(iommu
, num
, NULL
);
1785 domain_update_iommu_cap(domain
);
1786 domain
->iommu_did
[iommu
->seq_id
] = 0;
1792 static struct iova_domain reserved_iova_list
;
1793 static struct lock_class_key reserved_rbtree_key
;
1795 static int dmar_init_reserved_ranges(void)
1797 struct pci_dev
*pdev
= NULL
;
1801 init_iova_domain(&reserved_iova_list
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
1803 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1804 &reserved_rbtree_key
);
1806 /* IOAPIC ranges shouldn't be accessed by DMA */
1807 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1808 IOVA_PFN(IOAPIC_RANGE_END
));
1810 pr_err("Reserve IOAPIC range failed\n");
1814 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1815 for_each_pci_dev(pdev
) {
1818 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1819 r
= &pdev
->resource
[i
];
1820 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1822 iova
= reserve_iova(&reserved_iova_list
,
1826 pci_err(pdev
, "Reserve iova for %pR failed\n", r
);
1834 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1836 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1839 static inline int guestwidth_to_adjustwidth(int gaw
)
1842 int r
= (gaw
- 12) % 9;
1853 static int domain_init(struct dmar_domain
*domain
, struct intel_iommu
*iommu
,
1856 int adjust_width
, agaw
;
1857 unsigned long sagaw
;
1860 init_iova_domain(&domain
->iovad
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
1862 err
= init_iova_flush_queue(&domain
->iovad
,
1863 iommu_flush_iova
, iova_entry_free
);
1867 domain_reserve_special_ranges(domain
);
1869 /* calculate AGAW */
1870 if (guest_width
> cap_mgaw(iommu
->cap
))
1871 guest_width
= cap_mgaw(iommu
->cap
);
1872 domain
->gaw
= guest_width
;
1873 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1874 agaw
= width_to_agaw(adjust_width
);
1875 sagaw
= cap_sagaw(iommu
->cap
);
1876 if (!test_bit(agaw
, &sagaw
)) {
1877 /* hardware doesn't support it, choose a bigger one */
1878 pr_debug("Hardware doesn't support agaw %d\n", agaw
);
1879 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1883 domain
->agaw
= agaw
;
1885 if (ecap_coherent(iommu
->ecap
))
1886 domain
->iommu_coherency
= 1;
1888 domain
->iommu_coherency
= 0;
1890 if (ecap_sc_support(iommu
->ecap
))
1891 domain
->iommu_snooping
= 1;
1893 domain
->iommu_snooping
= 0;
1895 if (intel_iommu_superpage
)
1896 domain
->iommu_superpage
= fls(cap_super_page_val(iommu
->cap
));
1898 domain
->iommu_superpage
= 0;
1900 domain
->nid
= iommu
->node
;
1902 /* always allocate the top pgd */
1903 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
1906 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1910 static void domain_exit(struct dmar_domain
*domain
)
1913 /* Remove associated devices and clear attached or cached domains */
1914 domain_remove_dev_info(domain
);
1917 put_iova_domain(&domain
->iovad
);
1920 struct page
*freelist
;
1922 freelist
= domain_unmap(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1923 dma_free_pagelist(freelist
);
1926 free_domain_mem(domain
);
1930 * Get the PASID directory size for scalable mode context entry.
1931 * Value of X in the PDTS field of a scalable mode context entry
1932 * indicates PASID directory with 2^(X + 7) entries.
1934 static inline unsigned long context_get_sm_pds(struct pasid_table
*table
)
1938 max_pde
= table
->max_pasid
>> PASID_PDE_SHIFT
;
1939 pds
= find_first_bit((unsigned long *)&max_pde
, MAX_NR_PASID_BITS
);
1947 * Set the RID_PASID field of a scalable mode context entry. The
1948 * IOMMU hardware will use the PASID value set in this field for
1949 * DMA translations of DMA requests without PASID.
1952 context_set_sm_rid2pasid(struct context_entry
*context
, unsigned long pasid
)
1954 context
->hi
|= pasid
& ((1 << 20) - 1);
1955 context
->hi
|= (1 << 20);
1959 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1962 static inline void context_set_sm_dte(struct context_entry
*context
)
1964 context
->lo
|= (1 << 2);
1968 * Set the PRE(Page Request Enable) field of a scalable mode context
1971 static inline void context_set_sm_pre(struct context_entry
*context
)
1973 context
->lo
|= (1 << 4);
1976 /* Convert value to context PASID directory size field coding. */
1977 #define context_pdts(pds) (((pds) & 0x7) << 9)
1979 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1980 struct intel_iommu
*iommu
,
1981 struct pasid_table
*table
,
1984 u16 did
= domain
->iommu_did
[iommu
->seq_id
];
1985 int translation
= CONTEXT_TT_MULTI_LEVEL
;
1986 struct device_domain_info
*info
= NULL
;
1987 struct context_entry
*context
;
1988 unsigned long flags
;
1993 if (hw_pass_through
&& domain_type_is_si(domain
))
1994 translation
= CONTEXT_TT_PASS_THROUGH
;
1996 pr_debug("Set context mapping for %02x:%02x.%d\n",
1997 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1999 BUG_ON(!domain
->pgd
);
2001 spin_lock_irqsave(&device_domain_lock
, flags
);
2002 spin_lock(&iommu
->lock
);
2005 context
= iommu_context_addr(iommu
, bus
, devfn
, 1);
2010 if (context_present(context
))
2014 * For kdump cases, old valid entries may be cached due to the
2015 * in-flight DMA and copied pgtable, but there is no unmapping
2016 * behaviour for them, thus we need an explicit cache flush for
2017 * the newly-mapped device. For kdump, at this point, the device
2018 * is supposed to finish reset at its driver probe stage, so no
2019 * in-flight DMA will exist, and we don't need to worry anymore
2022 if (context_copied(context
)) {
2023 u16 did_old
= context_domain_id(context
);
2025 if (did_old
< cap_ndoms(iommu
->cap
)) {
2026 iommu
->flush
.flush_context(iommu
, did_old
,
2027 (((u16
)bus
) << 8) | devfn
,
2028 DMA_CCMD_MASK_NOBIT
,
2029 DMA_CCMD_DEVICE_INVL
);
2030 iommu
->flush
.flush_iotlb(iommu
, did_old
, 0, 0,
2035 context_clear_entry(context
);
2037 if (sm_supported(iommu
)) {
2042 /* Setup the PASID DIR pointer: */
2043 pds
= context_get_sm_pds(table
);
2044 context
->lo
= (u64
)virt_to_phys(table
->table
) |
2047 /* Setup the RID_PASID field: */
2048 context_set_sm_rid2pasid(context
, PASID_RID2PASID
);
2051 * Setup the Device-TLB enable bit and Page request
2054 info
= iommu_support_dev_iotlb(domain
, iommu
, bus
, devfn
);
2055 if (info
&& info
->ats_supported
)
2056 context_set_sm_dte(context
);
2057 if (info
&& info
->pri_supported
)
2058 context_set_sm_pre(context
);
2060 struct dma_pte
*pgd
= domain
->pgd
;
2063 context_set_domain_id(context
, did
);
2065 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
2067 * Skip top levels of page tables for iommu which has
2068 * less agaw than default. Unnecessary for PT mode.
2070 for (agaw
= domain
->agaw
; agaw
> iommu
->agaw
; agaw
--) {
2072 pgd
= phys_to_virt(dma_pte_addr(pgd
));
2073 if (!dma_pte_present(pgd
))
2077 info
= iommu_support_dev_iotlb(domain
, iommu
, bus
, devfn
);
2078 if (info
&& info
->ats_supported
)
2079 translation
= CONTEXT_TT_DEV_IOTLB
;
2081 translation
= CONTEXT_TT_MULTI_LEVEL
;
2083 context_set_address_root(context
, virt_to_phys(pgd
));
2084 context_set_address_width(context
, agaw
);
2087 * In pass through mode, AW must be programmed to
2088 * indicate the largest AGAW value supported by
2089 * hardware. And ASR is ignored by hardware.
2091 context_set_address_width(context
, iommu
->msagaw
);
2094 context_set_translation_type(context
, translation
);
2097 context_set_fault_enable(context
);
2098 context_set_present(context
);
2099 if (!ecap_coherent(iommu
->ecap
))
2100 clflush_cache_range(context
, sizeof(*context
));
2103 * It's a non-present to present mapping. If hardware doesn't cache
2104 * non-present entry we only need to flush the write-buffer. If the
2105 * _does_ cache non-present entries, then it does so in the special
2106 * domain #0, which we have to flush:
2108 if (cap_caching_mode(iommu
->cap
)) {
2109 iommu
->flush
.flush_context(iommu
, 0,
2110 (((u16
)bus
) << 8) | devfn
,
2111 DMA_CCMD_MASK_NOBIT
,
2112 DMA_CCMD_DEVICE_INVL
);
2113 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
);
2115 iommu_flush_write_buffer(iommu
);
2117 iommu_enable_dev_iotlb(info
);
2122 spin_unlock(&iommu
->lock
);
2123 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2128 struct domain_context_mapping_data
{
2129 struct dmar_domain
*domain
;
2130 struct intel_iommu
*iommu
;
2131 struct pasid_table
*table
;
2134 static int domain_context_mapping_cb(struct pci_dev
*pdev
,
2135 u16 alias
, void *opaque
)
2137 struct domain_context_mapping_data
*data
= opaque
;
2139 return domain_context_mapping_one(data
->domain
, data
->iommu
,
2140 data
->table
, PCI_BUS_NUM(alias
),
2145 domain_context_mapping(struct dmar_domain
*domain
, struct device
*dev
)
2147 struct domain_context_mapping_data data
;
2148 struct pasid_table
*table
;
2149 struct intel_iommu
*iommu
;
2152 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2156 table
= intel_pasid_get_table(dev
);
2158 if (!dev_is_pci(dev
))
2159 return domain_context_mapping_one(domain
, iommu
, table
,
2162 data
.domain
= domain
;
2166 return pci_for_each_dma_alias(to_pci_dev(dev
),
2167 &domain_context_mapping_cb
, &data
);
2170 static int domain_context_mapped_cb(struct pci_dev
*pdev
,
2171 u16 alias
, void *opaque
)
2173 struct intel_iommu
*iommu
= opaque
;
2175 return !device_context_mapped(iommu
, PCI_BUS_NUM(alias
), alias
& 0xff);
2178 static int domain_context_mapped(struct device
*dev
)
2180 struct intel_iommu
*iommu
;
2183 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2187 if (!dev_is_pci(dev
))
2188 return device_context_mapped(iommu
, bus
, devfn
);
2190 return !pci_for_each_dma_alias(to_pci_dev(dev
),
2191 domain_context_mapped_cb
, iommu
);
2194 /* Returns a number of VTD pages, but aligned to MM page size */
2195 static inline unsigned long aligned_nrpages(unsigned long host_addr
,
2198 host_addr
&= ~PAGE_MASK
;
2199 return PAGE_ALIGN(host_addr
+ size
) >> VTD_PAGE_SHIFT
;
2202 /* Return largest possible superpage level for a given mapping */
2203 static inline int hardware_largepage_caps(struct dmar_domain
*domain
,
2204 unsigned long iov_pfn
,
2205 unsigned long phy_pfn
,
2206 unsigned long pages
)
2208 int support
, level
= 1;
2209 unsigned long pfnmerge
;
2211 support
= domain
->iommu_superpage
;
2213 /* To use a large page, the virtual *and* physical addresses
2214 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2215 of them will mean we have to use smaller pages. So just
2216 merge them and check both at once. */
2217 pfnmerge
= iov_pfn
| phy_pfn
;
2219 while (support
&& !(pfnmerge
& ~VTD_STRIDE_MASK
)) {
2220 pages
>>= VTD_STRIDE_SHIFT
;
2223 pfnmerge
>>= VTD_STRIDE_SHIFT
;
2230 static int __domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2231 struct scatterlist
*sg
, unsigned long phys_pfn
,
2232 unsigned long nr_pages
, int prot
)
2234 struct dma_pte
*first_pte
= NULL
, *pte
= NULL
;
2235 phys_addr_t
uninitialized_var(pteval
);
2236 unsigned long sg_res
= 0;
2237 unsigned int largepage_lvl
= 0;
2238 unsigned long lvl_pages
= 0;
2240 BUG_ON(!domain_pfn_supported(domain
, iov_pfn
+ nr_pages
- 1));
2242 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
2245 prot
&= DMA_PTE_READ
| DMA_PTE_WRITE
| DMA_PTE_SNP
;
2249 pteval
= ((phys_addr_t
)phys_pfn
<< VTD_PAGE_SHIFT
) | prot
;
2252 while (nr_pages
> 0) {
2256 unsigned int pgoff
= sg
->offset
& ~PAGE_MASK
;
2258 sg_res
= aligned_nrpages(sg
->offset
, sg
->length
);
2259 sg
->dma_address
= ((dma_addr_t
)iov_pfn
<< VTD_PAGE_SHIFT
) + pgoff
;
2260 sg
->dma_length
= sg
->length
;
2261 pteval
= (sg_phys(sg
) - pgoff
) | prot
;
2262 phys_pfn
= pteval
>> VTD_PAGE_SHIFT
;
2266 largepage_lvl
= hardware_largepage_caps(domain
, iov_pfn
, phys_pfn
, sg_res
);
2268 first_pte
= pte
= pfn_to_dma_pte(domain
, iov_pfn
, &largepage_lvl
);
2271 /* It is large page*/
2272 if (largepage_lvl
> 1) {
2273 unsigned long nr_superpages
, end_pfn
;
2275 pteval
|= DMA_PTE_LARGE_PAGE
;
2276 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
2278 nr_superpages
= sg_res
/ lvl_pages
;
2279 end_pfn
= iov_pfn
+ nr_superpages
* lvl_pages
- 1;
2282 * Ensure that old small page tables are
2283 * removed to make room for superpage(s).
2284 * We're adding new large pages, so make sure
2285 * we don't remove their parent tables.
2287 dma_pte_free_pagetable(domain
, iov_pfn
, end_pfn
,
2290 pteval
&= ~(uint64_t)DMA_PTE_LARGE_PAGE
;
2294 /* We don't need lock here, nobody else
2295 * touches the iova range
2297 tmp
= cmpxchg64_local(&pte
->val
, 0ULL, pteval
);
2299 static int dumps
= 5;
2300 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2301 iov_pfn
, tmp
, (unsigned long long)pteval
);
2304 debug_dma_dump_mappings(NULL
);
2309 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
2311 BUG_ON(nr_pages
< lvl_pages
);
2312 BUG_ON(sg_res
< lvl_pages
);
2314 nr_pages
-= lvl_pages
;
2315 iov_pfn
+= lvl_pages
;
2316 phys_pfn
+= lvl_pages
;
2317 pteval
+= lvl_pages
* VTD_PAGE_SIZE
;
2318 sg_res
-= lvl_pages
;
2320 /* If the next PTE would be the first in a new page, then we
2321 need to flush the cache on the entries we've just written.
2322 And then we'll need to recalculate 'pte', so clear it and
2323 let it get set again in the if (!pte) block above.
2325 If we're done (!nr_pages) we need to flush the cache too.
2327 Also if we've been setting superpages, we may need to
2328 recalculate 'pte' and switch back to smaller pages for the
2329 end of the mapping, if the trailing size is not enough to
2330 use another superpage (i.e. sg_res < lvl_pages). */
2332 if (!nr_pages
|| first_pte_in_page(pte
) ||
2333 (largepage_lvl
> 1 && sg_res
< lvl_pages
)) {
2334 domain_flush_cache(domain
, first_pte
,
2335 (void *)pte
- (void *)first_pte
);
2339 if (!sg_res
&& nr_pages
)
2345 static int domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2346 struct scatterlist
*sg
, unsigned long phys_pfn
,
2347 unsigned long nr_pages
, int prot
)
2350 struct intel_iommu
*iommu
;
2352 /* Do the real mapping first */
2353 ret
= __domain_mapping(domain
, iov_pfn
, sg
, phys_pfn
, nr_pages
, prot
);
2357 for_each_domain_iommu(iommu_id
, domain
) {
2358 iommu
= g_iommus
[iommu_id
];
2359 __mapping_notify_one(iommu
, domain
, iov_pfn
, nr_pages
);
2365 static inline int domain_sg_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2366 struct scatterlist
*sg
, unsigned long nr_pages
,
2369 return domain_mapping(domain
, iov_pfn
, sg
, 0, nr_pages
, prot
);
2372 static inline int domain_pfn_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2373 unsigned long phys_pfn
, unsigned long nr_pages
,
2376 return domain_mapping(domain
, iov_pfn
, NULL
, phys_pfn
, nr_pages
, prot
);
2379 static void domain_context_clear_one(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
2381 unsigned long flags
;
2382 struct context_entry
*context
;
2388 spin_lock_irqsave(&iommu
->lock
, flags
);
2389 context
= iommu_context_addr(iommu
, bus
, devfn
, 0);
2391 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2394 did_old
= context_domain_id(context
);
2395 context_clear_entry(context
);
2396 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
2397 spin_unlock_irqrestore(&iommu
->lock
, flags
);
2398 iommu
->flush
.flush_context(iommu
,
2400 (((u16
)bus
) << 8) | devfn
,
2401 DMA_CCMD_MASK_NOBIT
,
2402 DMA_CCMD_DEVICE_INVL
);
2403 iommu
->flush
.flush_iotlb(iommu
,
2410 static inline void unlink_domain_info(struct device_domain_info
*info
)
2412 assert_spin_locked(&device_domain_lock
);
2413 list_del(&info
->link
);
2414 list_del(&info
->global
);
2416 info
->dev
->archdata
.iommu
= NULL
;
2419 static void domain_remove_dev_info(struct dmar_domain
*domain
)
2421 struct device_domain_info
*info
, *tmp
;
2422 unsigned long flags
;
2424 spin_lock_irqsave(&device_domain_lock
, flags
);
2425 list_for_each_entry_safe(info
, tmp
, &domain
->devices
, link
)
2426 __dmar_remove_one_dev_info(info
);
2427 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2432 * Note: we use struct device->archdata.iommu stores the info
2434 static struct dmar_domain
*find_domain(struct device
*dev
)
2436 struct device_domain_info
*info
;
2438 if (unlikely(dev
->archdata
.iommu
== DEFER_DEVICE_DOMAIN_INFO
)) {
2439 struct iommu_domain
*domain
;
2441 dev
->archdata
.iommu
= NULL
;
2442 domain
= iommu_get_domain_for_dev(dev
);
2444 intel_iommu_attach_device(domain
, dev
);
2447 /* No lock here, assumes no domain exit in normal case */
2448 info
= dev
->archdata
.iommu
;
2451 return info
->domain
;
2455 static inline struct device_domain_info
*
2456 dmar_search_domain_by_dev_info(int segment
, int bus
, int devfn
)
2458 struct device_domain_info
*info
;
2460 list_for_each_entry(info
, &device_domain_list
, global
)
2461 if (info
->iommu
->segment
== segment
&& info
->bus
== bus
&&
2462 info
->devfn
== devfn
)
2468 static struct dmar_domain
*dmar_insert_one_dev_info(struct intel_iommu
*iommu
,
2471 struct dmar_domain
*domain
)
2473 struct dmar_domain
*found
= NULL
;
2474 struct device_domain_info
*info
;
2475 unsigned long flags
;
2478 info
= alloc_devinfo_mem();
2483 info
->devfn
= devfn
;
2484 info
->ats_supported
= info
->pasid_supported
= info
->pri_supported
= 0;
2485 info
->ats_enabled
= info
->pasid_enabled
= info
->pri_enabled
= 0;
2488 info
->domain
= domain
;
2489 info
->iommu
= iommu
;
2490 info
->pasid_table
= NULL
;
2491 info
->auxd_enabled
= 0;
2492 INIT_LIST_HEAD(&info
->auxiliary_domains
);
2494 if (dev
&& dev_is_pci(dev
)) {
2495 struct pci_dev
*pdev
= to_pci_dev(info
->dev
);
2497 if (!pdev
->untrusted
&&
2498 !pci_ats_disabled() &&
2499 ecap_dev_iotlb_support(iommu
->ecap
) &&
2500 pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_ATS
) &&
2501 dmar_find_matched_atsr_unit(pdev
))
2502 info
->ats_supported
= 1;
2504 if (sm_supported(iommu
)) {
2505 if (pasid_supported(iommu
)) {
2506 int features
= pci_pasid_features(pdev
);
2508 info
->pasid_supported
= features
| 1;
2511 if (info
->ats_supported
&& ecap_prs(iommu
->ecap
) &&
2512 pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_PRI
))
2513 info
->pri_supported
= 1;
2517 spin_lock_irqsave(&device_domain_lock
, flags
);
2519 found
= find_domain(dev
);
2522 struct device_domain_info
*info2
;
2523 info2
= dmar_search_domain_by_dev_info(iommu
->segment
, bus
, devfn
);
2525 found
= info2
->domain
;
2531 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2532 free_devinfo_mem(info
);
2533 /* Caller must free the original domain */
2537 spin_lock(&iommu
->lock
);
2538 ret
= domain_attach_iommu(domain
, iommu
);
2539 spin_unlock(&iommu
->lock
);
2542 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2543 free_devinfo_mem(info
);
2547 list_add(&info
->link
, &domain
->devices
);
2548 list_add(&info
->global
, &device_domain_list
);
2550 dev
->archdata
.iommu
= info
;
2551 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2553 /* PASID table is mandatory for a PCI device in scalable mode. */
2554 if (dev
&& dev_is_pci(dev
) && sm_supported(iommu
)) {
2555 ret
= intel_pasid_alloc_table(dev
);
2557 dev_err(dev
, "PASID table allocation failed\n");
2558 dmar_remove_one_dev_info(dev
);
2562 /* Setup the PASID entry for requests without PASID: */
2563 spin_lock(&iommu
->lock
);
2564 if (hw_pass_through
&& domain_type_is_si(domain
))
2565 ret
= intel_pasid_setup_pass_through(iommu
, domain
,
2566 dev
, PASID_RID2PASID
);
2568 ret
= intel_pasid_setup_second_level(iommu
, domain
,
2569 dev
, PASID_RID2PASID
);
2570 spin_unlock(&iommu
->lock
);
2572 dev_err(dev
, "Setup RID2PASID failed\n");
2573 dmar_remove_one_dev_info(dev
);
2578 if (dev
&& domain_context_mapping(domain
, dev
)) {
2579 dev_err(dev
, "Domain context map failed\n");
2580 dmar_remove_one_dev_info(dev
);
2587 static int get_last_alias(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
2589 *(u16
*)opaque
= alias
;
2593 static struct dmar_domain
*find_or_alloc_domain(struct device
*dev
, int gaw
)
2595 struct device_domain_info
*info
;
2596 struct dmar_domain
*domain
= NULL
;
2597 struct intel_iommu
*iommu
;
2599 unsigned long flags
;
2602 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2606 if (dev_is_pci(dev
)) {
2607 struct pci_dev
*pdev
= to_pci_dev(dev
);
2609 pci_for_each_dma_alias(pdev
, get_last_alias
, &dma_alias
);
2611 spin_lock_irqsave(&device_domain_lock
, flags
);
2612 info
= dmar_search_domain_by_dev_info(pci_domain_nr(pdev
->bus
),
2613 PCI_BUS_NUM(dma_alias
),
2616 iommu
= info
->iommu
;
2617 domain
= info
->domain
;
2619 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2621 /* DMA alias already has a domain, use it */
2626 /* Allocate and initialize new domain for the device */
2627 domain
= alloc_domain(0);
2630 if (domain_init(domain
, iommu
, gaw
)) {
2631 domain_exit(domain
);
2639 static struct dmar_domain
*set_domain_for_dev(struct device
*dev
,
2640 struct dmar_domain
*domain
)
2642 struct intel_iommu
*iommu
;
2643 struct dmar_domain
*tmp
;
2644 u16 req_id
, dma_alias
;
2647 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2651 req_id
= ((u16
)bus
<< 8) | devfn
;
2653 if (dev_is_pci(dev
)) {
2654 struct pci_dev
*pdev
= to_pci_dev(dev
);
2656 pci_for_each_dma_alias(pdev
, get_last_alias
, &dma_alias
);
2658 /* register PCI DMA alias device */
2659 if (req_id
!= dma_alias
) {
2660 tmp
= dmar_insert_one_dev_info(iommu
, PCI_BUS_NUM(dma_alias
),
2661 dma_alias
& 0xff, NULL
, domain
);
2663 if (!tmp
|| tmp
!= domain
)
2668 tmp
= dmar_insert_one_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2669 if (!tmp
|| tmp
!= domain
)
2675 static int iommu_domain_identity_map(struct dmar_domain
*domain
,
2676 unsigned long long start
,
2677 unsigned long long end
)
2679 unsigned long first_vpfn
= start
>> VTD_PAGE_SHIFT
;
2680 unsigned long last_vpfn
= end
>> VTD_PAGE_SHIFT
;
2682 if (!reserve_iova(&domain
->iovad
, dma_to_mm_pfn(first_vpfn
),
2683 dma_to_mm_pfn(last_vpfn
))) {
2684 pr_err("Reserving iova failed\n");
2688 pr_debug("Mapping reserved region %llx-%llx\n", start
, end
);
2690 * RMRR range might have overlap with physical memory range,
2693 dma_pte_clear_range(domain
, first_vpfn
, last_vpfn
);
2695 return __domain_mapping(domain
, first_vpfn
, NULL
,
2696 first_vpfn
, last_vpfn
- first_vpfn
+ 1,
2697 DMA_PTE_READ
|DMA_PTE_WRITE
);
2700 static int domain_prepare_identity_map(struct device
*dev
,
2701 struct dmar_domain
*domain
,
2702 unsigned long long start
,
2703 unsigned long long end
)
2705 /* For _hardware_ passthrough, don't bother. But for software
2706 passthrough, we do it anyway -- it may indicate a memory
2707 range which is reserved in E820, so which didn't get set
2708 up to start with in si_domain */
2709 if (domain
== si_domain
&& hw_pass_through
) {
2710 dev_warn(dev
, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2715 dev_info(dev
, "Setting identity map [0x%Lx - 0x%Lx]\n", start
, end
);
2718 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2719 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2720 dmi_get_system_info(DMI_BIOS_VENDOR
),
2721 dmi_get_system_info(DMI_BIOS_VERSION
),
2722 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2726 if (end
>> agaw_to_width(domain
->agaw
)) {
2727 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2728 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2729 agaw_to_width(domain
->agaw
),
2730 dmi_get_system_info(DMI_BIOS_VENDOR
),
2731 dmi_get_system_info(DMI_BIOS_VERSION
),
2732 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2736 return iommu_domain_identity_map(domain
, start
, end
);
2739 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
);
2741 static int __init
si_domain_init(int hw
)
2743 struct dmar_rmrr_unit
*rmrr
;
2747 si_domain
= alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY
);
2751 if (md_domain_init(si_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2752 domain_exit(si_domain
);
2759 for_each_online_node(nid
) {
2760 unsigned long start_pfn
, end_pfn
;
2763 for_each_mem_pfn_range(i
, nid
, &start_pfn
, &end_pfn
, NULL
) {
2764 ret
= iommu_domain_identity_map(si_domain
,
2765 PFN_PHYS(start_pfn
), PFN_PHYS(end_pfn
));
2772 * Identity map the RMRRs so that devices with RMRRs could also use
2775 for_each_rmrr_units(rmrr
) {
2776 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
2778 unsigned long long start
= rmrr
->base_address
;
2779 unsigned long long end
= rmrr
->end_address
;
2781 if (WARN_ON(end
< start
||
2782 end
>> agaw_to_width(si_domain
->agaw
)))
2785 ret
= iommu_domain_identity_map(si_domain
, start
, end
);
2794 static int identity_mapping(struct device
*dev
)
2796 struct device_domain_info
*info
;
2798 info
= dev
->archdata
.iommu
;
2799 if (info
&& info
!= DUMMY_DEVICE_DOMAIN_INFO
&& info
!= DEFER_DEVICE_DOMAIN_INFO
)
2800 return (info
->domain
== si_domain
);
2805 static int domain_add_dev_info(struct dmar_domain
*domain
, struct device
*dev
)
2807 struct dmar_domain
*ndomain
;
2808 struct intel_iommu
*iommu
;
2811 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2815 ndomain
= dmar_insert_one_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2816 if (ndomain
!= domain
)
2822 static bool device_has_rmrr(struct device
*dev
)
2824 struct dmar_rmrr_unit
*rmrr
;
2829 for_each_rmrr_units(rmrr
) {
2831 * Return TRUE if this RMRR contains the device that
2834 for_each_active_dev_scope(rmrr
->devices
,
2835 rmrr
->devices_cnt
, i
, tmp
)
2837 is_downstream_to_pci_bridge(dev
, tmp
)) {
2847 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2848 * is relaxable (ie. is allowed to be not enforced under some conditions)
2849 * @dev: device handle
2851 * We assume that PCI USB devices with RMRRs have them largely
2852 * for historical reasons and that the RMRR space is not actively used post
2853 * boot. This exclusion may change if vendors begin to abuse it.
2855 * The same exception is made for graphics devices, with the requirement that
2856 * any use of the RMRR regions will be torn down before assigning the device
2859 * Return: true if the RMRR is relaxable, false otherwise
2861 static bool device_rmrr_is_relaxable(struct device
*dev
)
2863 struct pci_dev
*pdev
;
2865 if (!dev_is_pci(dev
))
2868 pdev
= to_pci_dev(dev
);
2869 if (IS_USB_DEVICE(pdev
) || IS_GFX_DEVICE(pdev
))
2876 * There are a couple cases where we need to restrict the functionality of
2877 * devices associated with RMRRs. The first is when evaluating a device for
2878 * identity mapping because problems exist when devices are moved in and out
2879 * of domains and their respective RMRR information is lost. This means that
2880 * a device with associated RMRRs will never be in a "passthrough" domain.
2881 * The second is use of the device through the IOMMU API. This interface
2882 * expects to have full control of the IOVA space for the device. We cannot
2883 * satisfy both the requirement that RMRR access is maintained and have an
2884 * unencumbered IOVA space. We also have no ability to quiesce the device's
2885 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2886 * We therefore prevent devices associated with an RMRR from participating in
2887 * the IOMMU API, which eliminates them from device assignment.
2889 * In both cases, devices which have relaxable RMRRs are not concerned by this
2890 * restriction. See device_rmrr_is_relaxable comment.
2892 static bool device_is_rmrr_locked(struct device
*dev
)
2894 if (!device_has_rmrr(dev
))
2897 if (device_rmrr_is_relaxable(dev
))
2904 * Return the required default domain type for a specific device.
2906 * @dev: the device in query
2907 * @startup: true if this is during early boot
2910 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2911 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2912 * - 0: both identity and dynamic domains work for this device
2914 static int device_def_domain_type(struct device
*dev
)
2916 if (dev_is_pci(dev
)) {
2917 struct pci_dev
*pdev
= to_pci_dev(dev
);
2920 * Prevent any device marked as untrusted from getting
2921 * placed into the statically identity mapping domain.
2923 if (pdev
->untrusted
)
2924 return IOMMU_DOMAIN_DMA
;
2926 if ((iommu_identity_mapping
& IDENTMAP_AZALIA
) && IS_AZALIA(pdev
))
2927 return IOMMU_DOMAIN_IDENTITY
;
2929 if ((iommu_identity_mapping
& IDENTMAP_GFX
) && IS_GFX_DEVICE(pdev
))
2930 return IOMMU_DOMAIN_IDENTITY
;
2933 * We want to start off with all devices in the 1:1 domain, and
2934 * take them out later if we find they can't access all of memory.
2936 * However, we can't do this for PCI devices behind bridges,
2937 * because all PCI devices behind the same bridge will end up
2938 * with the same source-id on their transactions.
2940 * Practically speaking, we can't change things around for these
2941 * devices at run-time, because we can't be sure there'll be no
2942 * DMA transactions in flight for any of their siblings.
2944 * So PCI devices (unless they're on the root bus) as well as
2945 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2946 * the 1:1 domain, just in _case_ one of their siblings turns out
2947 * not to be able to map all of memory.
2949 if (!pci_is_pcie(pdev
)) {
2950 if (!pci_is_root_bus(pdev
->bus
))
2951 return IOMMU_DOMAIN_DMA
;
2952 if (pdev
->class >> 8 == PCI_CLASS_BRIDGE_PCI
)
2953 return IOMMU_DOMAIN_DMA
;
2954 } else if (pci_pcie_type(pdev
) == PCI_EXP_TYPE_PCI_BRIDGE
)
2955 return IOMMU_DOMAIN_DMA
;
2958 return (iommu_identity_mapping
& IDENTMAP_ALL
) ?
2959 IOMMU_DOMAIN_IDENTITY
: 0;
2962 static void intel_iommu_init_qi(struct intel_iommu
*iommu
)
2965 * Start from the sane iommu hardware state.
2966 * If the queued invalidation is already initialized by us
2967 * (for example, while enabling interrupt-remapping) then
2968 * we got the things already rolling from a sane state.
2972 * Clear any previous faults.
2974 dmar_fault(-1, iommu
);
2976 * Disable queued invalidation if supported and already enabled
2977 * before OS handover.
2979 dmar_disable_qi(iommu
);
2982 if (dmar_enable_qi(iommu
)) {
2984 * Queued Invalidate not enabled, use Register Based Invalidate
2986 iommu
->flush
.flush_context
= __iommu_flush_context
;
2987 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
2988 pr_info("%s: Using Register based invalidation\n",
2991 iommu
->flush
.flush_context
= qi_flush_context
;
2992 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2993 pr_info("%s: Using Queued invalidation\n", iommu
->name
);
2997 static int copy_context_table(struct intel_iommu
*iommu
,
2998 struct root_entry
*old_re
,
2999 struct context_entry
**tbl
,
3002 int tbl_idx
, pos
= 0, idx
, devfn
, ret
= 0, did
;
3003 struct context_entry
*new_ce
= NULL
, ce
;
3004 struct context_entry
*old_ce
= NULL
;
3005 struct root_entry re
;
3006 phys_addr_t old_ce_phys
;
3008 tbl_idx
= ext
? bus
* 2 : bus
;
3009 memcpy(&re
, old_re
, sizeof(re
));
3011 for (devfn
= 0; devfn
< 256; devfn
++) {
3012 /* First calculate the correct index */
3013 idx
= (ext
? devfn
* 2 : devfn
) % 256;
3016 /* First save what we may have and clean up */
3018 tbl
[tbl_idx
] = new_ce
;
3019 __iommu_flush_cache(iommu
, new_ce
,
3029 old_ce_phys
= root_entry_lctp(&re
);
3031 old_ce_phys
= root_entry_uctp(&re
);
3034 if (ext
&& devfn
== 0) {
3035 /* No LCTP, try UCTP */
3044 old_ce
= memremap(old_ce_phys
, PAGE_SIZE
,
3049 new_ce
= alloc_pgtable_page(iommu
->node
);
3056 /* Now copy the context entry */
3057 memcpy(&ce
, old_ce
+ idx
, sizeof(ce
));
3059 if (!__context_present(&ce
))
3062 did
= context_domain_id(&ce
);
3063 if (did
>= 0 && did
< cap_ndoms(iommu
->cap
))
3064 set_bit(did
, iommu
->domain_ids
);
3067 * We need a marker for copied context entries. This
3068 * marker needs to work for the old format as well as
3069 * for extended context entries.
3071 * Bit 67 of the context entry is used. In the old
3072 * format this bit is available to software, in the
3073 * extended format it is the PGE bit, but PGE is ignored
3074 * by HW if PASIDs are disabled (and thus still
3077 * So disable PASIDs first and then mark the entry
3078 * copied. This means that we don't copy PASID
3079 * translations from the old kernel, but this is fine as
3080 * faults there are not fatal.
3082 context_clear_pasid_enable(&ce
);
3083 context_set_copied(&ce
);
3088 tbl
[tbl_idx
+ pos
] = new_ce
;
3090 __iommu_flush_cache(iommu
, new_ce
, VTD_PAGE_SIZE
);
3099 static int copy_translation_tables(struct intel_iommu
*iommu
)
3101 struct context_entry
**ctxt_tbls
;
3102 struct root_entry
*old_rt
;
3103 phys_addr_t old_rt_phys
;
3104 int ctxt_table_entries
;
3105 unsigned long flags
;
3110 rtaddr_reg
= dmar_readq(iommu
->reg
+ DMAR_RTADDR_REG
);
3111 ext
= !!(rtaddr_reg
& DMA_RTADDR_RTT
);
3112 new_ext
= !!ecap_ecs(iommu
->ecap
);
3115 * The RTT bit can only be changed when translation is disabled,
3116 * but disabling translation means to open a window for data
3117 * corruption. So bail out and don't copy anything if we would
3118 * have to change the bit.
3123 old_rt_phys
= rtaddr_reg
& VTD_PAGE_MASK
;
3127 old_rt
= memremap(old_rt_phys
, PAGE_SIZE
, MEMREMAP_WB
);
3131 /* This is too big for the stack - allocate it from slab */
3132 ctxt_table_entries
= ext
? 512 : 256;
3134 ctxt_tbls
= kcalloc(ctxt_table_entries
, sizeof(void *), GFP_KERNEL
);
3138 for (bus
= 0; bus
< 256; bus
++) {
3139 ret
= copy_context_table(iommu
, &old_rt
[bus
],
3140 ctxt_tbls
, bus
, ext
);
3142 pr_err("%s: Failed to copy context table for bus %d\n",
3148 spin_lock_irqsave(&iommu
->lock
, flags
);
3150 /* Context tables are copied, now write them to the root_entry table */
3151 for (bus
= 0; bus
< 256; bus
++) {
3152 int idx
= ext
? bus
* 2 : bus
;
3155 if (ctxt_tbls
[idx
]) {
3156 val
= virt_to_phys(ctxt_tbls
[idx
]) | 1;
3157 iommu
->root_entry
[bus
].lo
= val
;
3160 if (!ext
|| !ctxt_tbls
[idx
+ 1])
3163 val
= virt_to_phys(ctxt_tbls
[idx
+ 1]) | 1;
3164 iommu
->root_entry
[bus
].hi
= val
;
3167 spin_unlock_irqrestore(&iommu
->lock
, flags
);
3171 __iommu_flush_cache(iommu
, iommu
->root_entry
, PAGE_SIZE
);
3181 static int __init
init_dmars(void)
3183 struct dmar_drhd_unit
*drhd
;
3184 struct intel_iommu
*iommu
;
3190 * initialize and program root entry to not present
3193 for_each_drhd_unit(drhd
) {
3195 * lock not needed as this is only incremented in the single
3196 * threaded kernel __init code path all other access are read
3199 if (g_num_of_iommus
< DMAR_UNITS_SUPPORTED
) {
3203 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED
);
3206 /* Preallocate enough resources for IOMMU hot-addition */
3207 if (g_num_of_iommus
< DMAR_UNITS_SUPPORTED
)
3208 g_num_of_iommus
= DMAR_UNITS_SUPPORTED
;
3210 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
3213 pr_err("Allocating global iommu array failed\n");
3218 for_each_iommu(iommu
, drhd
) {
3219 if (drhd
->ignored
) {
3220 iommu_disable_translation(iommu
);
3225 * Find the max pasid size of all IOMMU's in the system.
3226 * We need to ensure the system pasid table is no bigger
3227 * than the smallest supported.
3229 if (pasid_supported(iommu
)) {
3230 u32 temp
= 2 << ecap_pss(iommu
->ecap
);
3232 intel_pasid_max_id
= min_t(u32
, temp
,
3233 intel_pasid_max_id
);
3236 g_iommus
[iommu
->seq_id
] = iommu
;
3238 intel_iommu_init_qi(iommu
);
3240 ret
= iommu_init_domains(iommu
);
3244 init_translation_status(iommu
);
3246 if (translation_pre_enabled(iommu
) && !is_kdump_kernel()) {
3247 iommu_disable_translation(iommu
);
3248 clear_translation_pre_enabled(iommu
);
3249 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3255 * we could share the same root & context tables
3256 * among all IOMMU's. Need to Split it later.
3258 ret
= iommu_alloc_root_entry(iommu
);
3262 if (translation_pre_enabled(iommu
)) {
3263 pr_info("Translation already enabled - trying to copy translation structures\n");
3265 ret
= copy_translation_tables(iommu
);
3268 * We found the IOMMU with translation
3269 * enabled - but failed to copy over the
3270 * old root-entry table. Try to proceed
3271 * by disabling translation now and
3272 * allocating a clean root-entry table.
3273 * This might cause DMAR faults, but
3274 * probably the dump will still succeed.
3276 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3278 iommu_disable_translation(iommu
);
3279 clear_translation_pre_enabled(iommu
);
3281 pr_info("Copied translation tables from previous kernel for %s\n",
3286 if (!ecap_pass_through(iommu
->ecap
))
3287 hw_pass_through
= 0;
3288 #ifdef CONFIG_INTEL_IOMMU_SVM
3289 if (pasid_supported(iommu
))
3290 intel_svm_init(iommu
);
3295 * Now that qi is enabled on all iommus, set the root entry and flush
3296 * caches. This is required on some Intel X58 chipsets, otherwise the
3297 * flush_context function will loop forever and the boot hangs.
3299 for_each_active_iommu(iommu
, drhd
) {
3300 iommu_flush_write_buffer(iommu
);
3301 iommu_set_root_entry(iommu
);
3302 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
3303 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
3306 if (iommu_default_passthrough())
3307 iommu_identity_mapping
|= IDENTMAP_ALL
;
3309 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3314 iommu_identity_mapping
|= IDENTMAP_GFX
;
3316 check_tylersburg_isoch();
3318 ret
= si_domain_init(hw_pass_through
);
3325 * global invalidate context cache
3326 * global invalidate iotlb
3327 * enable translation
3329 for_each_iommu(iommu
, drhd
) {
3330 if (drhd
->ignored
) {
3332 * we always have to disable PMRs or DMA may fail on
3336 iommu_disable_protect_mem_regions(iommu
);
3340 iommu_flush_write_buffer(iommu
);
3342 #ifdef CONFIG_INTEL_IOMMU_SVM
3343 if (pasid_supported(iommu
) && ecap_prs(iommu
->ecap
)) {
3345 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3346 * could cause possible lock race condition.
3348 up_write(&dmar_global_lock
);
3349 ret
= intel_svm_enable_prq(iommu
);
3350 down_write(&dmar_global_lock
);
3355 ret
= dmar_set_interrupt(iommu
);
3363 for_each_active_iommu(iommu
, drhd
) {
3364 disable_dmar_iommu(iommu
);
3365 free_dmar_iommu(iommu
);
3374 /* This takes a number of _MM_ pages, not VTD pages */
3375 static unsigned long intel_alloc_iova(struct device
*dev
,
3376 struct dmar_domain
*domain
,
3377 unsigned long nrpages
, uint64_t dma_mask
)
3379 unsigned long iova_pfn
;
3381 /* Restrict dma_mask to the width that the iommu can handle */
3382 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
), dma_mask
);
3383 /* Ensure we reserve the whole size-aligned region */
3384 nrpages
= __roundup_pow_of_two(nrpages
);
3386 if (!dmar_forcedac
&& dma_mask
> DMA_BIT_MASK(32)) {
3388 * First try to allocate an io virtual address in
3389 * DMA_BIT_MASK(32) and if that fails then try allocating
3392 iova_pfn
= alloc_iova_fast(&domain
->iovad
, nrpages
,
3393 IOVA_PFN(DMA_BIT_MASK(32)), false);
3397 iova_pfn
= alloc_iova_fast(&domain
->iovad
, nrpages
,
3398 IOVA_PFN(dma_mask
), true);
3399 if (unlikely(!iova_pfn
)) {
3400 dev_err_once(dev
, "Allocating %ld-page iova failed\n",
3408 static struct dmar_domain
*get_private_domain_for_dev(struct device
*dev
)
3410 struct dmar_domain
*domain
, *tmp
;
3411 struct dmar_rmrr_unit
*rmrr
;
3412 struct device
*i_dev
;
3415 /* Device shouldn't be attached by any domains. */
3416 domain
= find_domain(dev
);
3420 domain
= find_or_alloc_domain(dev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
3424 /* We have a new domain - setup possible RMRRs for the device */
3426 for_each_rmrr_units(rmrr
) {
3427 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
3432 ret
= domain_prepare_identity_map(dev
, domain
,
3436 dev_err(dev
, "Mapping reserved region failed\n");
3441 tmp
= set_domain_for_dev(dev
, domain
);
3442 if (!tmp
|| domain
!= tmp
) {
3443 domain_exit(domain
);
3449 dev_err(dev
, "Allocating domain failed\n");
3451 domain
->domain
.type
= IOMMU_DOMAIN_DMA
;
3456 /* Check if the dev needs to go through non-identity map and unmap process.*/
3457 static bool iommu_need_mapping(struct device
*dev
)
3461 if (iommu_dummy(dev
))
3464 ret
= identity_mapping(dev
);
3466 u64 dma_mask
= *dev
->dma_mask
;
3468 if (dev
->coherent_dma_mask
&& dev
->coherent_dma_mask
< dma_mask
)
3469 dma_mask
= dev
->coherent_dma_mask
;
3471 if (dma_mask
>= dma_direct_get_required_mask(dev
))
3475 * 32 bit DMA is removed from si_domain and fall back to
3476 * non-identity mapping.
3478 dmar_remove_one_dev_info(dev
);
3479 ret
= iommu_request_dma_domain_for_dev(dev
);
3481 struct iommu_domain
*domain
;
3482 struct dmar_domain
*dmar_domain
;
3484 domain
= iommu_get_domain_for_dev(dev
);
3486 dmar_domain
= to_dmar_domain(domain
);
3487 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
3489 dmar_remove_one_dev_info(dev
);
3490 get_private_domain_for_dev(dev
);
3493 dev_info(dev
, "32bit DMA uses non-identity mapping\n");
3499 static dma_addr_t
__intel_map_single(struct device
*dev
, phys_addr_t paddr
,
3500 size_t size
, int dir
, u64 dma_mask
)
3502 struct dmar_domain
*domain
;
3503 phys_addr_t start_paddr
;
3504 unsigned long iova_pfn
;
3507 struct intel_iommu
*iommu
;
3508 unsigned long paddr_pfn
= paddr
>> PAGE_SHIFT
;
3510 BUG_ON(dir
== DMA_NONE
);
3512 domain
= find_domain(dev
);
3514 return DMA_MAPPING_ERROR
;
3516 iommu
= domain_get_iommu(domain
);
3517 size
= aligned_nrpages(paddr
, size
);
3519 iova_pfn
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
), dma_mask
);
3524 * Check if DMAR supports zero-length reads on write only
3527 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3528 !cap_zlr(iommu
->cap
))
3529 prot
|= DMA_PTE_READ
;
3530 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3531 prot
|= DMA_PTE_WRITE
;
3533 * paddr - (paddr + size) might be partial page, we should map the whole
3534 * page. Note: if two part of one page are separately mapped, we
3535 * might have two guest_addr mapping to the same host paddr, but this
3536 * is not a big problem
3538 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova_pfn
),
3539 mm_to_dma_pfn(paddr_pfn
), size
, prot
);
3543 start_paddr
= (phys_addr_t
)iova_pfn
<< PAGE_SHIFT
;
3544 start_paddr
+= paddr
& ~PAGE_MASK
;
3546 trace_map_single(dev
, start_paddr
, paddr
, size
<< VTD_PAGE_SHIFT
);
3552 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(size
));
3553 dev_err(dev
, "Device request: %zx@%llx dir %d --- failed\n",
3554 size
, (unsigned long long)paddr
, dir
);
3555 return DMA_MAPPING_ERROR
;
3558 static dma_addr_t
intel_map_page(struct device
*dev
, struct page
*page
,
3559 unsigned long offset
, size_t size
,
3560 enum dma_data_direction dir
,
3561 unsigned long attrs
)
3563 if (iommu_need_mapping(dev
))
3564 return __intel_map_single(dev
, page_to_phys(page
) + offset
,
3565 size
, dir
, *dev
->dma_mask
);
3566 return dma_direct_map_page(dev
, page
, offset
, size
, dir
, attrs
);
3569 static dma_addr_t
intel_map_resource(struct device
*dev
, phys_addr_t phys_addr
,
3570 size_t size
, enum dma_data_direction dir
,
3571 unsigned long attrs
)
3573 if (iommu_need_mapping(dev
))
3574 return __intel_map_single(dev
, phys_addr
, size
, dir
,
3576 return dma_direct_map_resource(dev
, phys_addr
, size
, dir
, attrs
);
3579 static void intel_unmap(struct device
*dev
, dma_addr_t dev_addr
, size_t size
)
3581 struct dmar_domain
*domain
;
3582 unsigned long start_pfn
, last_pfn
;
3583 unsigned long nrpages
;
3584 unsigned long iova_pfn
;
3585 struct intel_iommu
*iommu
;
3586 struct page
*freelist
;
3587 struct pci_dev
*pdev
= NULL
;
3589 domain
= find_domain(dev
);
3592 iommu
= domain_get_iommu(domain
);
3594 iova_pfn
= IOVA_PFN(dev_addr
);
3596 nrpages
= aligned_nrpages(dev_addr
, size
);
3597 start_pfn
= mm_to_dma_pfn(iova_pfn
);
3598 last_pfn
= start_pfn
+ nrpages
- 1;
3600 if (dev_is_pci(dev
))
3601 pdev
= to_pci_dev(dev
);
3603 freelist
= domain_unmap(domain
, start_pfn
, last_pfn
);
3604 if (intel_iommu_strict
|| (pdev
&& pdev
->untrusted
) ||
3605 !has_iova_flush_queue(&domain
->iovad
)) {
3606 iommu_flush_iotlb_psi(iommu
, domain
, start_pfn
,
3607 nrpages
, !freelist
, 0);
3609 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(nrpages
));
3610 dma_free_pagelist(freelist
);
3612 queue_iova(&domain
->iovad
, iova_pfn
, nrpages
,
3613 (unsigned long)freelist
);
3615 * queue up the release of the unmap to save the 1/6th of the
3616 * cpu used up by the iotlb flush operation...
3620 trace_unmap_single(dev
, dev_addr
, size
);
3623 static void intel_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
3624 size_t size
, enum dma_data_direction dir
,
3625 unsigned long attrs
)
3627 if (iommu_need_mapping(dev
))
3628 intel_unmap(dev
, dev_addr
, size
);
3630 dma_direct_unmap_page(dev
, dev_addr
, size
, dir
, attrs
);
3633 static void intel_unmap_resource(struct device
*dev
, dma_addr_t dev_addr
,
3634 size_t size
, enum dma_data_direction dir
, unsigned long attrs
)
3636 if (iommu_need_mapping(dev
))
3637 intel_unmap(dev
, dev_addr
, size
);
3640 static void *intel_alloc_coherent(struct device
*dev
, size_t size
,
3641 dma_addr_t
*dma_handle
, gfp_t flags
,
3642 unsigned long attrs
)
3644 struct page
*page
= NULL
;
3647 if (!iommu_need_mapping(dev
))
3648 return dma_direct_alloc(dev
, size
, dma_handle
, flags
, attrs
);
3650 size
= PAGE_ALIGN(size
);
3651 order
= get_order(size
);
3653 if (gfpflags_allow_blocking(flags
)) {
3654 unsigned int count
= size
>> PAGE_SHIFT
;
3656 page
= dma_alloc_from_contiguous(dev
, count
, order
,
3657 flags
& __GFP_NOWARN
);
3661 page
= alloc_pages(flags
, order
);
3664 memset(page_address(page
), 0, size
);
3666 *dma_handle
= __intel_map_single(dev
, page_to_phys(page
), size
,
3668 dev
->coherent_dma_mask
);
3669 if (*dma_handle
!= DMA_MAPPING_ERROR
)
3670 return page_address(page
);
3671 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3672 __free_pages(page
, order
);
3677 static void intel_free_coherent(struct device
*dev
, size_t size
, void *vaddr
,
3678 dma_addr_t dma_handle
, unsigned long attrs
)
3681 struct page
*page
= virt_to_page(vaddr
);
3683 if (!iommu_need_mapping(dev
))
3684 return dma_direct_free(dev
, size
, vaddr
, dma_handle
, attrs
);
3686 size
= PAGE_ALIGN(size
);
3687 order
= get_order(size
);
3689 intel_unmap(dev
, dma_handle
, size
);
3690 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3691 __free_pages(page
, order
);
3694 static void intel_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
,
3695 int nelems
, enum dma_data_direction dir
,
3696 unsigned long attrs
)
3698 dma_addr_t startaddr
= sg_dma_address(sglist
) & PAGE_MASK
;
3699 unsigned long nrpages
= 0;
3700 struct scatterlist
*sg
;
3703 if (!iommu_need_mapping(dev
))
3704 return dma_direct_unmap_sg(dev
, sglist
, nelems
, dir
, attrs
);
3706 for_each_sg(sglist
, sg
, nelems
, i
) {
3707 nrpages
+= aligned_nrpages(sg_dma_address(sg
), sg_dma_len(sg
));
3710 intel_unmap(dev
, startaddr
, nrpages
<< VTD_PAGE_SHIFT
);
3712 trace_unmap_sg(dev
, startaddr
, nrpages
<< VTD_PAGE_SHIFT
);
3715 static int intel_map_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3716 enum dma_data_direction dir
, unsigned long attrs
)
3719 struct dmar_domain
*domain
;
3722 unsigned long iova_pfn
;
3724 struct scatterlist
*sg
;
3725 unsigned long start_vpfn
;
3726 struct intel_iommu
*iommu
;
3728 BUG_ON(dir
== DMA_NONE
);
3729 if (!iommu_need_mapping(dev
))
3730 return dma_direct_map_sg(dev
, sglist
, nelems
, dir
, attrs
);
3732 domain
= find_domain(dev
);
3736 iommu
= domain_get_iommu(domain
);
3738 for_each_sg(sglist
, sg
, nelems
, i
)
3739 size
+= aligned_nrpages(sg
->offset
, sg
->length
);
3741 iova_pfn
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
),
3744 sglist
->dma_length
= 0;
3749 * Check if DMAR supports zero-length reads on write only
3752 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3753 !cap_zlr(iommu
->cap
))
3754 prot
|= DMA_PTE_READ
;
3755 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3756 prot
|= DMA_PTE_WRITE
;
3758 start_vpfn
= mm_to_dma_pfn(iova_pfn
);
3760 ret
= domain_sg_mapping(domain
, start_vpfn
, sglist
, size
, prot
);
3761 if (unlikely(ret
)) {
3762 dma_pte_free_pagetable(domain
, start_vpfn
,
3763 start_vpfn
+ size
- 1,
3764 agaw_to_level(domain
->agaw
) + 1);
3765 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(size
));
3769 trace_map_sg(dev
, iova_pfn
<< PAGE_SHIFT
,
3770 sg_phys(sglist
), size
<< VTD_PAGE_SHIFT
);
3775 static u64
intel_get_required_mask(struct device
*dev
)
3777 if (!iommu_need_mapping(dev
))
3778 return dma_direct_get_required_mask(dev
);
3779 return DMA_BIT_MASK(32);
3782 static const struct dma_map_ops intel_dma_ops
= {
3783 .alloc
= intel_alloc_coherent
,
3784 .free
= intel_free_coherent
,
3785 .map_sg
= intel_map_sg
,
3786 .unmap_sg
= intel_unmap_sg
,
3787 .map_page
= intel_map_page
,
3788 .unmap_page
= intel_unmap_page
,
3789 .map_resource
= intel_map_resource
,
3790 .unmap_resource
= intel_unmap_resource
,
3791 .dma_supported
= dma_direct_supported
,
3792 .mmap
= dma_common_mmap
,
3793 .get_sgtable
= dma_common_get_sgtable
,
3794 .get_required_mask
= intel_get_required_mask
,
3798 bounce_sync_single(struct device
*dev
, dma_addr_t addr
, size_t size
,
3799 enum dma_data_direction dir
, enum dma_sync_target target
)
3801 struct dmar_domain
*domain
;
3802 phys_addr_t tlb_addr
;
3804 domain
= find_domain(dev
);
3805 if (WARN_ON(!domain
))
3808 tlb_addr
= intel_iommu_iova_to_phys(&domain
->domain
, addr
);
3809 if (is_swiotlb_buffer(tlb_addr
))
3810 swiotlb_tbl_sync_single(dev
, tlb_addr
, size
, dir
, target
);
3814 bounce_map_single(struct device
*dev
, phys_addr_t paddr
, size_t size
,
3815 enum dma_data_direction dir
, unsigned long attrs
,
3818 size_t aligned_size
= ALIGN(size
, VTD_PAGE_SIZE
);
3819 struct dmar_domain
*domain
;
3820 struct intel_iommu
*iommu
;
3821 unsigned long iova_pfn
;
3822 unsigned long nrpages
;
3823 phys_addr_t tlb_addr
;
3827 domain
= find_domain(dev
);
3828 if (WARN_ON(dir
== DMA_NONE
|| !domain
))
3829 return DMA_MAPPING_ERROR
;
3831 iommu
= domain_get_iommu(domain
);
3832 if (WARN_ON(!iommu
))
3833 return DMA_MAPPING_ERROR
;
3835 nrpages
= aligned_nrpages(0, size
);
3836 iova_pfn
= intel_alloc_iova(dev
, domain
,
3837 dma_to_mm_pfn(nrpages
), dma_mask
);
3839 return DMA_MAPPING_ERROR
;
3842 * Check if DMAR supports zero-length reads on write only
3845 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
||
3846 !cap_zlr(iommu
->cap
))
3847 prot
|= DMA_PTE_READ
;
3848 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3849 prot
|= DMA_PTE_WRITE
;
3852 * If both the physical buffer start address and size are
3853 * page aligned, we don't need to use a bounce page.
3855 if (!IS_ALIGNED(paddr
| size
, VTD_PAGE_SIZE
)) {
3856 tlb_addr
= swiotlb_tbl_map_single(dev
,
3857 __phys_to_dma(dev
, io_tlb_start
),
3858 paddr
, size
, aligned_size
, dir
, attrs
);
3859 if (tlb_addr
== DMA_MAPPING_ERROR
) {
3862 /* Cleanup the padding area. */
3863 void *padding_start
= phys_to_virt(tlb_addr
);
3864 size_t padding_size
= aligned_size
;
3866 if (!(attrs
& DMA_ATTR_SKIP_CPU_SYNC
) &&
3867 (dir
== DMA_TO_DEVICE
||
3868 dir
== DMA_BIDIRECTIONAL
)) {
3869 padding_start
+= size
;
3870 padding_size
-= size
;
3873 memset(padding_start
, 0, padding_size
);
3879 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova_pfn
),
3880 tlb_addr
>> VTD_PAGE_SHIFT
, nrpages
, prot
);
3884 trace_bounce_map_single(dev
, iova_pfn
<< PAGE_SHIFT
, paddr
, size
);
3886 return (phys_addr_t
)iova_pfn
<< PAGE_SHIFT
;
3889 if (is_swiotlb_buffer(tlb_addr
))
3890 swiotlb_tbl_unmap_single(dev
, tlb_addr
, size
,
3891 aligned_size
, dir
, attrs
);
3893 free_iova_fast(&domain
->iovad
, iova_pfn
, dma_to_mm_pfn(nrpages
));
3894 dev_err(dev
, "Device bounce map: %zx@%llx dir %d --- failed\n",
3895 size
, (unsigned long long)paddr
, dir
);
3897 return DMA_MAPPING_ERROR
;
3901 bounce_unmap_single(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
3902 enum dma_data_direction dir
, unsigned long attrs
)
3904 size_t aligned_size
= ALIGN(size
, VTD_PAGE_SIZE
);
3905 struct dmar_domain
*domain
;
3906 phys_addr_t tlb_addr
;
3908 domain
= find_domain(dev
);
3909 if (WARN_ON(!domain
))
3912 tlb_addr
= intel_iommu_iova_to_phys(&domain
->domain
, dev_addr
);
3913 if (WARN_ON(!tlb_addr
))
3916 intel_unmap(dev
, dev_addr
, size
);
3917 if (is_swiotlb_buffer(tlb_addr
))
3918 swiotlb_tbl_unmap_single(dev
, tlb_addr
, size
,
3919 aligned_size
, dir
, attrs
);
3921 trace_bounce_unmap_single(dev
, dev_addr
, size
);
3925 bounce_map_page(struct device
*dev
, struct page
*page
, unsigned long offset
,
3926 size_t size
, enum dma_data_direction dir
, unsigned long attrs
)
3928 return bounce_map_single(dev
, page_to_phys(page
) + offset
,
3929 size
, dir
, attrs
, *dev
->dma_mask
);
3933 bounce_map_resource(struct device
*dev
, phys_addr_t phys_addr
, size_t size
,
3934 enum dma_data_direction dir
, unsigned long attrs
)
3936 return bounce_map_single(dev
, phys_addr
, size
,
3937 dir
, attrs
, *dev
->dma_mask
);
3941 bounce_unmap_page(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
3942 enum dma_data_direction dir
, unsigned long attrs
)
3944 bounce_unmap_single(dev
, dev_addr
, size
, dir
, attrs
);
3948 bounce_unmap_resource(struct device
*dev
, dma_addr_t dev_addr
, size_t size
,
3949 enum dma_data_direction dir
, unsigned long attrs
)
3951 bounce_unmap_single(dev
, dev_addr
, size
, dir
, attrs
);
3955 bounce_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3956 enum dma_data_direction dir
, unsigned long attrs
)
3958 struct scatterlist
*sg
;
3961 for_each_sg(sglist
, sg
, nelems
, i
)
3962 bounce_unmap_page(dev
, sg
->dma_address
,
3963 sg_dma_len(sg
), dir
, attrs
);
3967 bounce_map_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3968 enum dma_data_direction dir
, unsigned long attrs
)
3971 struct scatterlist
*sg
;
3973 for_each_sg(sglist
, sg
, nelems
, i
) {
3974 sg
->dma_address
= bounce_map_page(dev
, sg_page(sg
),
3975 sg
->offset
, sg
->length
,
3977 if (sg
->dma_address
== DMA_MAPPING_ERROR
)
3979 sg_dma_len(sg
) = sg
->length
;
3985 bounce_unmap_sg(dev
, sglist
, i
, dir
, attrs
| DMA_ATTR_SKIP_CPU_SYNC
);
3990 bounce_sync_single_for_cpu(struct device
*dev
, dma_addr_t addr
,
3991 size_t size
, enum dma_data_direction dir
)
3993 bounce_sync_single(dev
, addr
, size
, dir
, SYNC_FOR_CPU
);
3997 bounce_sync_single_for_device(struct device
*dev
, dma_addr_t addr
,
3998 size_t size
, enum dma_data_direction dir
)
4000 bounce_sync_single(dev
, addr
, size
, dir
, SYNC_FOR_DEVICE
);
4004 bounce_sync_sg_for_cpu(struct device
*dev
, struct scatterlist
*sglist
,
4005 int nelems
, enum dma_data_direction dir
)
4007 struct scatterlist
*sg
;
4010 for_each_sg(sglist
, sg
, nelems
, i
)
4011 bounce_sync_single(dev
, sg_dma_address(sg
),
4012 sg_dma_len(sg
), dir
, SYNC_FOR_CPU
);
4016 bounce_sync_sg_for_device(struct device
*dev
, struct scatterlist
*sglist
,
4017 int nelems
, enum dma_data_direction dir
)
4019 struct scatterlist
*sg
;
4022 for_each_sg(sglist
, sg
, nelems
, i
)
4023 bounce_sync_single(dev
, sg_dma_address(sg
),
4024 sg_dma_len(sg
), dir
, SYNC_FOR_DEVICE
);
4027 static const struct dma_map_ops bounce_dma_ops
= {
4028 .alloc
= intel_alloc_coherent
,
4029 .free
= intel_free_coherent
,
4030 .map_sg
= bounce_map_sg
,
4031 .unmap_sg
= bounce_unmap_sg
,
4032 .map_page
= bounce_map_page
,
4033 .unmap_page
= bounce_unmap_page
,
4034 .sync_single_for_cpu
= bounce_sync_single_for_cpu
,
4035 .sync_single_for_device
= bounce_sync_single_for_device
,
4036 .sync_sg_for_cpu
= bounce_sync_sg_for_cpu
,
4037 .sync_sg_for_device
= bounce_sync_sg_for_device
,
4038 .map_resource
= bounce_map_resource
,
4039 .unmap_resource
= bounce_unmap_resource
,
4040 .dma_supported
= dma_direct_supported
,
4043 static inline int iommu_domain_cache_init(void)
4047 iommu_domain_cache
= kmem_cache_create("iommu_domain",
4048 sizeof(struct dmar_domain
),
4053 if (!iommu_domain_cache
) {
4054 pr_err("Couldn't create iommu_domain cache\n");
4061 static inline int iommu_devinfo_cache_init(void)
4065 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
4066 sizeof(struct device_domain_info
),
4070 if (!iommu_devinfo_cache
) {
4071 pr_err("Couldn't create devinfo cache\n");
4078 static int __init
iommu_init_mempool(void)
4081 ret
= iova_cache_get();
4085 ret
= iommu_domain_cache_init();
4089 ret
= iommu_devinfo_cache_init();
4093 kmem_cache_destroy(iommu_domain_cache
);
4100 static void __init
iommu_exit_mempool(void)
4102 kmem_cache_destroy(iommu_devinfo_cache
);
4103 kmem_cache_destroy(iommu_domain_cache
);
4107 static void quirk_ioat_snb_local_iommu(struct pci_dev
*pdev
)
4109 struct dmar_drhd_unit
*drhd
;
4113 /* We know that this device on this chipset has its own IOMMU.
4114 * If we find it under a different IOMMU, then the BIOS is lying
4115 * to us. Hope that the IOMMU for this device is actually
4116 * disabled, and it needs no translation...
4118 rc
= pci_bus_read_config_dword(pdev
->bus
, PCI_DEVFN(0, 0), 0xb0, &vtbar
);
4120 /* "can't" happen */
4121 dev_info(&pdev
->dev
, "failed to run vt-d quirk\n");
4124 vtbar
&= 0xffff0000;
4126 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4127 drhd
= dmar_find_matched_drhd_unit(pdev
);
4128 if (!drhd
|| drhd
->reg_base_addr
- vtbar
!= 0xa000) {
4129 pr_warn_once(FW_BUG
"BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
4130 add_taint(TAINT_FIRMWARE_WORKAROUND
, LOCKDEP_STILL_OK
);
4131 pdev
->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
4134 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_IOAT_SNB
, quirk_ioat_snb_local_iommu
);
4136 static void __init
init_no_remapping_devices(void)
4138 struct dmar_drhd_unit
*drhd
;
4142 for_each_drhd_unit(drhd
) {
4143 if (!drhd
->include_all
) {
4144 for_each_active_dev_scope(drhd
->devices
,
4145 drhd
->devices_cnt
, i
, dev
)
4147 /* ignore DMAR unit if no devices exist */
4148 if (i
== drhd
->devices_cnt
)
4153 for_each_active_drhd_unit(drhd
) {
4154 if (drhd
->include_all
)
4157 for_each_active_dev_scope(drhd
->devices
,
4158 drhd
->devices_cnt
, i
, dev
)
4159 if (!dev_is_pci(dev
) || !IS_GFX_DEVICE(to_pci_dev(dev
)))
4161 if (i
< drhd
->devices_cnt
)
4164 /* This IOMMU has *only* gfx devices. Either bypass it or
4165 set the gfx_mapped flag, as appropriate */
4166 if (!dmar_map_gfx
) {
4168 for_each_active_dev_scope(drhd
->devices
,
4169 drhd
->devices_cnt
, i
, dev
)
4170 dev
->archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
4175 #ifdef CONFIG_SUSPEND
4176 static int init_iommu_hw(void)
4178 struct dmar_drhd_unit
*drhd
;
4179 struct intel_iommu
*iommu
= NULL
;
4181 for_each_active_iommu(iommu
, drhd
)
4183 dmar_reenable_qi(iommu
);
4185 for_each_iommu(iommu
, drhd
) {
4186 if (drhd
->ignored
) {
4188 * we always have to disable PMRs or DMA may fail on
4192 iommu_disable_protect_mem_regions(iommu
);
4196 iommu_flush_write_buffer(iommu
);
4198 iommu_set_root_entry(iommu
);
4200 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
4201 DMA_CCMD_GLOBAL_INVL
);
4202 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
4203 iommu_enable_translation(iommu
);
4204 iommu_disable_protect_mem_regions(iommu
);
4210 static void iommu_flush_all(void)
4212 struct dmar_drhd_unit
*drhd
;
4213 struct intel_iommu
*iommu
;
4215 for_each_active_iommu(iommu
, drhd
) {
4216 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
4217 DMA_CCMD_GLOBAL_INVL
);
4218 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
4219 DMA_TLB_GLOBAL_FLUSH
);
4223 static int iommu_suspend(void)
4225 struct dmar_drhd_unit
*drhd
;
4226 struct intel_iommu
*iommu
= NULL
;
4229 for_each_active_iommu(iommu
, drhd
) {
4230 iommu
->iommu_state
= kcalloc(MAX_SR_DMAR_REGS
, sizeof(u32
),
4232 if (!iommu
->iommu_state
)
4238 for_each_active_iommu(iommu
, drhd
) {
4239 iommu_disable_translation(iommu
);
4241 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
4243 iommu
->iommu_state
[SR_DMAR_FECTL_REG
] =
4244 readl(iommu
->reg
+ DMAR_FECTL_REG
);
4245 iommu
->iommu_state
[SR_DMAR_FEDATA_REG
] =
4246 readl(iommu
->reg
+ DMAR_FEDATA_REG
);
4247 iommu
->iommu_state
[SR_DMAR_FEADDR_REG
] =
4248 readl(iommu
->reg
+ DMAR_FEADDR_REG
);
4249 iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
] =
4250 readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
4252 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
4257 for_each_active_iommu(iommu
, drhd
)
4258 kfree(iommu
->iommu_state
);
4263 static void iommu_resume(void)
4265 struct dmar_drhd_unit
*drhd
;
4266 struct intel_iommu
*iommu
= NULL
;
4269 if (init_iommu_hw()) {
4271 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4273 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4277 for_each_active_iommu(iommu
, drhd
) {
4279 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
4281 writel(iommu
->iommu_state
[SR_DMAR_FECTL_REG
],
4282 iommu
->reg
+ DMAR_FECTL_REG
);
4283 writel(iommu
->iommu_state
[SR_DMAR_FEDATA_REG
],
4284 iommu
->reg
+ DMAR_FEDATA_REG
);
4285 writel(iommu
->iommu_state
[SR_DMAR_FEADDR_REG
],
4286 iommu
->reg
+ DMAR_FEADDR_REG
);
4287 writel(iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
],
4288 iommu
->reg
+ DMAR_FEUADDR_REG
);
4290 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
4293 for_each_active_iommu(iommu
, drhd
)
4294 kfree(iommu
->iommu_state
);
4297 static struct syscore_ops iommu_syscore_ops
= {
4298 .resume
= iommu_resume
,
4299 .suspend
= iommu_suspend
,
4302 static void __init
init_iommu_pm_ops(void)
4304 register_syscore_ops(&iommu_syscore_ops
);
4308 static inline void init_iommu_pm_ops(void) {}
4309 #endif /* CONFIG_PM */
4311 int __init
dmar_parse_one_rmrr(struct acpi_dmar_header
*header
, void *arg
)
4313 struct acpi_dmar_reserved_memory
*rmrr
;
4314 struct dmar_rmrr_unit
*rmrru
;
4316 rmrru
= kzalloc(sizeof(*rmrru
), GFP_KERNEL
);
4320 rmrru
->hdr
= header
;
4321 rmrr
= (struct acpi_dmar_reserved_memory
*)header
;
4322 rmrru
->base_address
= rmrr
->base_address
;
4323 rmrru
->end_address
= rmrr
->end_address
;
4325 rmrru
->devices
= dmar_alloc_dev_scope((void *)(rmrr
+ 1),
4326 ((void *)rmrr
) + rmrr
->header
.length
,
4327 &rmrru
->devices_cnt
);
4328 if (rmrru
->devices_cnt
&& rmrru
->devices
== NULL
)
4331 list_add(&rmrru
->list
, &dmar_rmrr_units
);
4340 static struct dmar_atsr_unit
*dmar_find_atsr(struct acpi_dmar_atsr
*atsr
)
4342 struct dmar_atsr_unit
*atsru
;
4343 struct acpi_dmar_atsr
*tmp
;
4345 list_for_each_entry_rcu(atsru
, &dmar_atsr_units
, list
,
4347 tmp
= (struct acpi_dmar_atsr
*)atsru
->hdr
;
4348 if (atsr
->segment
!= tmp
->segment
)
4350 if (atsr
->header
.length
!= tmp
->header
.length
)
4352 if (memcmp(atsr
, tmp
, atsr
->header
.length
) == 0)
4359 int dmar_parse_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4361 struct acpi_dmar_atsr
*atsr
;
4362 struct dmar_atsr_unit
*atsru
;
4364 if (system_state
>= SYSTEM_RUNNING
&& !intel_iommu_enabled
)
4367 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4368 atsru
= dmar_find_atsr(atsr
);
4372 atsru
= kzalloc(sizeof(*atsru
) + hdr
->length
, GFP_KERNEL
);
4377 * If memory is allocated from slab by ACPI _DSM method, we need to
4378 * copy the memory content because the memory buffer will be freed
4381 atsru
->hdr
= (void *)(atsru
+ 1);
4382 memcpy(atsru
->hdr
, hdr
, hdr
->length
);
4383 atsru
->include_all
= atsr
->flags
& 0x1;
4384 if (!atsru
->include_all
) {
4385 atsru
->devices
= dmar_alloc_dev_scope((void *)(atsr
+ 1),
4386 (void *)atsr
+ atsr
->header
.length
,
4387 &atsru
->devices_cnt
);
4388 if (atsru
->devices_cnt
&& atsru
->devices
== NULL
) {
4394 list_add_rcu(&atsru
->list
, &dmar_atsr_units
);
4399 static void intel_iommu_free_atsr(struct dmar_atsr_unit
*atsru
)
4401 dmar_free_dev_scope(&atsru
->devices
, &atsru
->devices_cnt
);
4405 int dmar_release_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4407 struct acpi_dmar_atsr
*atsr
;
4408 struct dmar_atsr_unit
*atsru
;
4410 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4411 atsru
= dmar_find_atsr(atsr
);
4413 list_del_rcu(&atsru
->list
);
4415 intel_iommu_free_atsr(atsru
);
4421 int dmar_check_one_atsr(struct acpi_dmar_header
*hdr
, void *arg
)
4425 struct acpi_dmar_atsr
*atsr
;
4426 struct dmar_atsr_unit
*atsru
;
4428 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
4429 atsru
= dmar_find_atsr(atsr
);
4433 if (!atsru
->include_all
&& atsru
->devices
&& atsru
->devices_cnt
) {
4434 for_each_active_dev_scope(atsru
->devices
, atsru
->devices_cnt
,
4442 static int intel_iommu_add(struct dmar_drhd_unit
*dmaru
)
4445 struct intel_iommu
*iommu
= dmaru
->iommu
;
4447 if (g_iommus
[iommu
->seq_id
])
4450 if (hw_pass_through
&& !ecap_pass_through(iommu
->ecap
)) {
4451 pr_warn("%s: Doesn't support hardware pass through.\n",
4455 if (!ecap_sc_support(iommu
->ecap
) &&
4456 domain_update_iommu_snooping(iommu
)) {
4457 pr_warn("%s: Doesn't support snooping.\n",
4461 sp
= domain_update_iommu_superpage(iommu
) - 1;
4462 if (sp
>= 0 && !(cap_super_page_val(iommu
->cap
) & (1 << sp
))) {
4463 pr_warn("%s: Doesn't support large page.\n",
4469 * Disable translation if already enabled prior to OS handover.
4471 if (iommu
->gcmd
& DMA_GCMD_TE
)
4472 iommu_disable_translation(iommu
);
4474 g_iommus
[iommu
->seq_id
] = iommu
;
4475 ret
= iommu_init_domains(iommu
);
4477 ret
= iommu_alloc_root_entry(iommu
);
4481 #ifdef CONFIG_INTEL_IOMMU_SVM
4482 if (pasid_supported(iommu
))
4483 intel_svm_init(iommu
);
4486 if (dmaru
->ignored
) {
4488 * we always have to disable PMRs or DMA may fail on this device
4491 iommu_disable_protect_mem_regions(iommu
);
4495 intel_iommu_init_qi(iommu
);
4496 iommu_flush_write_buffer(iommu
);
4498 #ifdef CONFIG_INTEL_IOMMU_SVM
4499 if (pasid_supported(iommu
) && ecap_prs(iommu
->ecap
)) {
4500 ret
= intel_svm_enable_prq(iommu
);
4505 ret
= dmar_set_interrupt(iommu
);
4509 iommu_set_root_entry(iommu
);
4510 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
4511 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
4512 iommu_enable_translation(iommu
);
4514 iommu_disable_protect_mem_regions(iommu
);
4518 disable_dmar_iommu(iommu
);
4520 free_dmar_iommu(iommu
);
4524 int dmar_iommu_hotplug(struct dmar_drhd_unit
*dmaru
, bool insert
)
4527 struct intel_iommu
*iommu
= dmaru
->iommu
;
4529 if (!intel_iommu_enabled
)
4535 ret
= intel_iommu_add(dmaru
);
4537 disable_dmar_iommu(iommu
);
4538 free_dmar_iommu(iommu
);
4544 static void intel_iommu_free_dmars(void)
4546 struct dmar_rmrr_unit
*rmrru
, *rmrr_n
;
4547 struct dmar_atsr_unit
*atsru
, *atsr_n
;
4549 list_for_each_entry_safe(rmrru
, rmrr_n
, &dmar_rmrr_units
, list
) {
4550 list_del(&rmrru
->list
);
4551 dmar_free_dev_scope(&rmrru
->devices
, &rmrru
->devices_cnt
);
4555 list_for_each_entry_safe(atsru
, atsr_n
, &dmar_atsr_units
, list
) {
4556 list_del(&atsru
->list
);
4557 intel_iommu_free_atsr(atsru
);
4561 int dmar_find_matched_atsr_unit(struct pci_dev
*dev
)
4564 struct pci_bus
*bus
;
4565 struct pci_dev
*bridge
= NULL
;
4567 struct acpi_dmar_atsr
*atsr
;
4568 struct dmar_atsr_unit
*atsru
;
4570 dev
= pci_physfn(dev
);
4571 for (bus
= dev
->bus
; bus
; bus
= bus
->parent
) {
4573 /* If it's an integrated device, allow ATS */
4576 /* Connected via non-PCIe: no ATS */
4577 if (!pci_is_pcie(bridge
) ||
4578 pci_pcie_type(bridge
) == PCI_EXP_TYPE_PCI_BRIDGE
)
4580 /* If we found the root port, look it up in the ATSR */
4581 if (pci_pcie_type(bridge
) == PCI_EXP_TYPE_ROOT_PORT
)
4586 list_for_each_entry_rcu(atsru
, &dmar_atsr_units
, list
) {
4587 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
4588 if (atsr
->segment
!= pci_domain_nr(dev
->bus
))
4591 for_each_dev_scope(atsru
->devices
, atsru
->devices_cnt
, i
, tmp
)
4592 if (tmp
== &bridge
->dev
)
4595 if (atsru
->include_all
)
4605 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info
*info
)
4608 struct dmar_rmrr_unit
*rmrru
;
4609 struct dmar_atsr_unit
*atsru
;
4610 struct acpi_dmar_atsr
*atsr
;
4611 struct acpi_dmar_reserved_memory
*rmrr
;
4613 if (!intel_iommu_enabled
&& system_state
>= SYSTEM_RUNNING
)
4616 list_for_each_entry(rmrru
, &dmar_rmrr_units
, list
) {
4617 rmrr
= container_of(rmrru
->hdr
,
4618 struct acpi_dmar_reserved_memory
, header
);
4619 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
4620 ret
= dmar_insert_dev_scope(info
, (void *)(rmrr
+ 1),
4621 ((void *)rmrr
) + rmrr
->header
.length
,
4622 rmrr
->segment
, rmrru
->devices
,
4623 rmrru
->devices_cnt
);
4626 } else if (info
->event
== BUS_NOTIFY_REMOVED_DEVICE
) {
4627 dmar_remove_dev_scope(info
, rmrr
->segment
,
4628 rmrru
->devices
, rmrru
->devices_cnt
);
4632 list_for_each_entry(atsru
, &dmar_atsr_units
, list
) {
4633 if (atsru
->include_all
)
4636 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
4637 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
4638 ret
= dmar_insert_dev_scope(info
, (void *)(atsr
+ 1),
4639 (void *)atsr
+ atsr
->header
.length
,
4640 atsr
->segment
, atsru
->devices
,
4641 atsru
->devices_cnt
);
4646 } else if (info
->event
== BUS_NOTIFY_REMOVED_DEVICE
) {
4647 if (dmar_remove_dev_scope(info
, atsr
->segment
,
4648 atsru
->devices
, atsru
->devices_cnt
))
4656 static int intel_iommu_memory_notifier(struct notifier_block
*nb
,
4657 unsigned long val
, void *v
)
4659 struct memory_notify
*mhp
= v
;
4660 unsigned long long start
, end
;
4661 unsigned long start_vpfn
, last_vpfn
;
4664 case MEM_GOING_ONLINE
:
4665 start
= mhp
->start_pfn
<< PAGE_SHIFT
;
4666 end
= ((mhp
->start_pfn
+ mhp
->nr_pages
) << PAGE_SHIFT
) - 1;
4667 if (iommu_domain_identity_map(si_domain
, start
, end
)) {
4668 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4675 case MEM_CANCEL_ONLINE
:
4676 start_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
);
4677 last_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
+ mhp
->nr_pages
- 1);
4678 while (start_vpfn
<= last_vpfn
) {
4680 struct dmar_drhd_unit
*drhd
;
4681 struct intel_iommu
*iommu
;
4682 struct page
*freelist
;
4684 iova
= find_iova(&si_domain
->iovad
, start_vpfn
);
4686 pr_debug("Failed get IOVA for PFN %lx\n",
4691 iova
= split_and_remove_iova(&si_domain
->iovad
, iova
,
4692 start_vpfn
, last_vpfn
);
4694 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4695 start_vpfn
, last_vpfn
);
4699 freelist
= domain_unmap(si_domain
, iova
->pfn_lo
,
4703 for_each_active_iommu(iommu
, drhd
)
4704 iommu_flush_iotlb_psi(iommu
, si_domain
,
4705 iova
->pfn_lo
, iova_size(iova
),
4708 dma_free_pagelist(freelist
);
4710 start_vpfn
= iova
->pfn_hi
+ 1;
4711 free_iova_mem(iova
);
4719 static struct notifier_block intel_iommu_memory_nb
= {
4720 .notifier_call
= intel_iommu_memory_notifier
,
4724 static void free_all_cpu_cached_iovas(unsigned int cpu
)
4728 for (i
= 0; i
< g_num_of_iommus
; i
++) {
4729 struct intel_iommu
*iommu
= g_iommus
[i
];
4730 struct dmar_domain
*domain
;
4736 for (did
= 0; did
< cap_ndoms(iommu
->cap
); did
++) {
4737 domain
= get_iommu_domain(iommu
, (u16
)did
);
4741 free_cpu_cached_iovas(cpu
, &domain
->iovad
);
4746 static int intel_iommu_cpu_dead(unsigned int cpu
)
4748 free_all_cpu_cached_iovas(cpu
);
4752 static void intel_disable_iommus(void)
4754 struct intel_iommu
*iommu
= NULL
;
4755 struct dmar_drhd_unit
*drhd
;
4757 for_each_iommu(iommu
, drhd
)
4758 iommu_disable_translation(iommu
);
4761 static inline struct intel_iommu
*dev_to_intel_iommu(struct device
*dev
)
4763 struct iommu_device
*iommu_dev
= dev_to_iommu_device(dev
);
4765 return container_of(iommu_dev
, struct intel_iommu
, iommu
);
4768 static ssize_t
intel_iommu_show_version(struct device
*dev
,
4769 struct device_attribute
*attr
,
4772 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4773 u32 ver
= readl(iommu
->reg
+ DMAR_VER_REG
);
4774 return sprintf(buf
, "%d:%d\n",
4775 DMAR_VER_MAJOR(ver
), DMAR_VER_MINOR(ver
));
4777 static DEVICE_ATTR(version
, S_IRUGO
, intel_iommu_show_version
, NULL
);
4779 static ssize_t
intel_iommu_show_address(struct device
*dev
,
4780 struct device_attribute
*attr
,
4783 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4784 return sprintf(buf
, "%llx\n", iommu
->reg_phys
);
4786 static DEVICE_ATTR(address
, S_IRUGO
, intel_iommu_show_address
, NULL
);
4788 static ssize_t
intel_iommu_show_cap(struct device
*dev
,
4789 struct device_attribute
*attr
,
4792 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4793 return sprintf(buf
, "%llx\n", iommu
->cap
);
4795 static DEVICE_ATTR(cap
, S_IRUGO
, intel_iommu_show_cap
, NULL
);
4797 static ssize_t
intel_iommu_show_ecap(struct device
*dev
,
4798 struct device_attribute
*attr
,
4801 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4802 return sprintf(buf
, "%llx\n", iommu
->ecap
);
4804 static DEVICE_ATTR(ecap
, S_IRUGO
, intel_iommu_show_ecap
, NULL
);
4806 static ssize_t
intel_iommu_show_ndoms(struct device
*dev
,
4807 struct device_attribute
*attr
,
4810 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4811 return sprintf(buf
, "%ld\n", cap_ndoms(iommu
->cap
));
4813 static DEVICE_ATTR(domains_supported
, S_IRUGO
, intel_iommu_show_ndoms
, NULL
);
4815 static ssize_t
intel_iommu_show_ndoms_used(struct device
*dev
,
4816 struct device_attribute
*attr
,
4819 struct intel_iommu
*iommu
= dev_to_intel_iommu(dev
);
4820 return sprintf(buf
, "%d\n", bitmap_weight(iommu
->domain_ids
,
4821 cap_ndoms(iommu
->cap
)));
4823 static DEVICE_ATTR(domains_used
, S_IRUGO
, intel_iommu_show_ndoms_used
, NULL
);
4825 static struct attribute
*intel_iommu_attrs
[] = {
4826 &dev_attr_version
.attr
,
4827 &dev_attr_address
.attr
,
4829 &dev_attr_ecap
.attr
,
4830 &dev_attr_domains_supported
.attr
,
4831 &dev_attr_domains_used
.attr
,
4835 static struct attribute_group intel_iommu_group
= {
4836 .name
= "intel-iommu",
4837 .attrs
= intel_iommu_attrs
,
4840 const struct attribute_group
*intel_iommu_groups
[] = {
4845 static inline bool has_untrusted_dev(void)
4847 struct pci_dev
*pdev
= NULL
;
4849 for_each_pci_dev(pdev
)
4850 if (pdev
->untrusted
)
4856 static int __init
platform_optin_force_iommu(void)
4858 if (!dmar_platform_optin() || no_platform_optin
|| !has_untrusted_dev())
4861 if (no_iommu
|| dmar_disabled
)
4862 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4865 * If Intel-IOMMU is disabled by default, we will apply identity
4866 * map for all devices except those marked as being untrusted.
4869 iommu_identity_mapping
|= IDENTMAP_ALL
;
4877 static int __init
probe_acpi_namespace_devices(void)
4879 struct dmar_drhd_unit
*drhd
;
4880 /* To avoid a -Wunused-but-set-variable warning. */
4881 struct intel_iommu
*iommu __maybe_unused
;
4885 for_each_active_iommu(iommu
, drhd
) {
4886 for_each_active_dev_scope(drhd
->devices
,
4887 drhd
->devices_cnt
, i
, dev
) {
4888 struct acpi_device_physical_node
*pn
;
4889 struct iommu_group
*group
;
4890 struct acpi_device
*adev
;
4892 if (dev
->bus
!= &acpi_bus_type
)
4895 adev
= to_acpi_device(dev
);
4896 mutex_lock(&adev
->physical_node_lock
);
4897 list_for_each_entry(pn
,
4898 &adev
->physical_node_list
, node
) {
4899 group
= iommu_group_get(pn
->dev
);
4901 iommu_group_put(group
);
4905 pn
->dev
->bus
->iommu_ops
= &intel_iommu_ops
;
4906 ret
= iommu_probe_device(pn
->dev
);
4910 mutex_unlock(&adev
->physical_node_lock
);
4920 int __init
intel_iommu_init(void)
4923 struct dmar_drhd_unit
*drhd
;
4924 struct intel_iommu
*iommu
;
4927 * Intel IOMMU is required for a TXT/tboot launch or platform
4928 * opt in, so enforce that.
4930 force_on
= tboot_force_iommu() || platform_optin_force_iommu();
4932 if (iommu_init_mempool()) {
4934 panic("tboot: Failed to initialize iommu memory\n");
4938 down_write(&dmar_global_lock
);
4939 if (dmar_table_init()) {
4941 panic("tboot: Failed to initialize DMAR table\n");
4945 if (dmar_dev_scope_init() < 0) {
4947 panic("tboot: Failed to initialize DMAR device scope\n");
4951 up_write(&dmar_global_lock
);
4954 * The bus notifier takes the dmar_global_lock, so lockdep will
4955 * complain later when we register it under the lock.
4957 dmar_register_bus_notifier();
4959 down_write(&dmar_global_lock
);
4962 intel_iommu_debugfs_init();
4964 if (no_iommu
|| dmar_disabled
) {
4966 * We exit the function here to ensure IOMMU's remapping and
4967 * mempool aren't setup, which means that the IOMMU's PMRs
4968 * won't be disabled via the call to init_dmars(). So disable
4969 * it explicitly here. The PMRs were setup by tboot prior to
4970 * calling SENTER, but the kernel is expected to reset/tear
4973 if (intel_iommu_tboot_noforce
) {
4974 for_each_iommu(iommu
, drhd
)
4975 iommu_disable_protect_mem_regions(iommu
);
4979 * Make sure the IOMMUs are switched off, even when we
4980 * boot into a kexec kernel and the previous kernel left
4983 intel_disable_iommus();
4987 if (list_empty(&dmar_rmrr_units
))
4988 pr_info("No RMRR found\n");
4990 if (list_empty(&dmar_atsr_units
))
4991 pr_info("No ATSR found\n");
4993 if (dmar_init_reserved_ranges()) {
4995 panic("tboot: Failed to reserve iommu ranges\n");
4996 goto out_free_reserved_range
;
5000 intel_iommu_gfx_mapped
= 1;
5002 init_no_remapping_devices();
5007 panic("tboot: Failed to initialize DMARs\n");
5008 pr_err("Initialization failed\n");
5009 goto out_free_reserved_range
;
5011 up_write(&dmar_global_lock
);
5013 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
5015 * If the system has no untrusted device or the user has decided
5016 * to disable the bounce page mechanisms, we don't need swiotlb.
5017 * Mark this and the pre-allocated bounce pages will be released
5020 if (!has_untrusted_dev() || intel_no_bounce
)
5023 dma_ops
= &intel_dma_ops
;
5025 init_iommu_pm_ops();
5027 down_read(&dmar_global_lock
);
5028 for_each_active_iommu(iommu
, drhd
) {
5029 iommu_device_sysfs_add(&iommu
->iommu
, NULL
,
5032 iommu_device_set_ops(&iommu
->iommu
, &intel_iommu_ops
);
5033 iommu_device_register(&iommu
->iommu
);
5035 up_read(&dmar_global_lock
);
5037 bus_set_iommu(&pci_bus_type
, &intel_iommu_ops
);
5038 if (si_domain
&& !hw_pass_through
)
5039 register_memory_notifier(&intel_iommu_memory_nb
);
5040 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD
, "iommu/intel:dead", NULL
,
5041 intel_iommu_cpu_dead
);
5043 down_read(&dmar_global_lock
);
5044 if (probe_acpi_namespace_devices())
5045 pr_warn("ACPI name space devices didn't probe correctly\n");
5047 /* Finally, we enable the DMA remapping hardware. */
5048 for_each_iommu(iommu
, drhd
) {
5049 if (!drhd
->ignored
&& !translation_pre_enabled(iommu
))
5050 iommu_enable_translation(iommu
);
5052 iommu_disable_protect_mem_regions(iommu
);
5054 up_read(&dmar_global_lock
);
5056 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5058 intel_iommu_enabled
= 1;
5062 out_free_reserved_range
:
5063 put_iova_domain(&reserved_iova_list
);
5065 intel_iommu_free_dmars();
5066 up_write(&dmar_global_lock
);
5067 iommu_exit_mempool();
5071 static int domain_context_clear_one_cb(struct pci_dev
*pdev
, u16 alias
, void *opaque
)
5073 struct intel_iommu
*iommu
= opaque
;
5075 domain_context_clear_one(iommu
, PCI_BUS_NUM(alias
), alias
& 0xff);
5080 * NB - intel-iommu lacks any sort of reference counting for the users of
5081 * dependent devices. If multiple endpoints have intersecting dependent
5082 * devices, unbinding the driver from any one of them will possibly leave
5083 * the others unable to operate.
5085 static void domain_context_clear(struct intel_iommu
*iommu
, struct device
*dev
)
5087 if (!iommu
|| !dev
|| !dev_is_pci(dev
))
5090 pci_for_each_dma_alias(to_pci_dev(dev
), &domain_context_clear_one_cb
, iommu
);
5093 static void __dmar_remove_one_dev_info(struct device_domain_info
*info
)
5095 struct dmar_domain
*domain
;
5096 struct intel_iommu
*iommu
;
5097 unsigned long flags
;
5099 assert_spin_locked(&device_domain_lock
);
5104 iommu
= info
->iommu
;
5105 domain
= info
->domain
;
5108 if (dev_is_pci(info
->dev
) && sm_supported(iommu
))
5109 intel_pasid_tear_down_entry(iommu
, info
->dev
,
5112 iommu_disable_dev_iotlb(info
);
5113 domain_context_clear(iommu
, info
->dev
);
5114 intel_pasid_free_table(info
->dev
);
5117 unlink_domain_info(info
);
5119 spin_lock_irqsave(&iommu
->lock
, flags
);
5120 domain_detach_iommu(domain
, iommu
);
5121 spin_unlock_irqrestore(&iommu
->lock
, flags
);
5123 /* free the private domain */
5124 if (domain
->flags
& DOMAIN_FLAG_LOSE_CHILDREN
&&
5125 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) &&
5126 list_empty(&domain
->devices
))
5127 domain_exit(info
->domain
);
5129 free_devinfo_mem(info
);
5132 static void dmar_remove_one_dev_info(struct device
*dev
)
5134 struct device_domain_info
*info
;
5135 unsigned long flags
;
5137 spin_lock_irqsave(&device_domain_lock
, flags
);
5138 info
= dev
->archdata
.iommu
;
5139 if (info
&& info
!= DEFER_DEVICE_DOMAIN_INFO
5140 && info
!= DUMMY_DEVICE_DOMAIN_INFO
)
5141 __dmar_remove_one_dev_info(info
);
5142 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5145 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
)
5149 init_iova_domain(&domain
->iovad
, VTD_PAGE_SIZE
, IOVA_START_PFN
);
5150 domain_reserve_special_ranges(domain
);
5152 /* calculate AGAW */
5153 domain
->gaw
= guest_width
;
5154 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
5155 domain
->agaw
= width_to_agaw(adjust_width
);
5157 domain
->iommu_coherency
= 0;
5158 domain
->iommu_snooping
= 0;
5159 domain
->iommu_superpage
= 0;
5160 domain
->max_addr
= 0;
5162 /* always allocate the top pgd */
5163 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
5166 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
5170 static struct iommu_domain
*intel_iommu_domain_alloc(unsigned type
)
5172 struct dmar_domain
*dmar_domain
;
5173 struct iommu_domain
*domain
;
5176 case IOMMU_DOMAIN_DMA
:
5178 case IOMMU_DOMAIN_UNMANAGED
:
5179 dmar_domain
= alloc_domain(0);
5181 pr_err("Can't allocate dmar_domain\n");
5184 if (md_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
5185 pr_err("Domain initialization failed\n");
5186 domain_exit(dmar_domain
);
5190 if (type
== IOMMU_DOMAIN_DMA
&&
5191 init_iova_flush_queue(&dmar_domain
->iovad
,
5192 iommu_flush_iova
, iova_entry_free
)) {
5193 pr_warn("iova flush queue initialization failed\n");
5194 intel_iommu_strict
= 1;
5197 domain_update_iommu_cap(dmar_domain
);
5199 domain
= &dmar_domain
->domain
;
5200 domain
->geometry
.aperture_start
= 0;
5201 domain
->geometry
.aperture_end
=
5202 __DOMAIN_MAX_ADDR(dmar_domain
->gaw
);
5203 domain
->geometry
.force_aperture
= true;
5206 case IOMMU_DOMAIN_IDENTITY
:
5207 return &si_domain
->domain
;
5215 static void intel_iommu_domain_free(struct iommu_domain
*domain
)
5217 if (domain
!= &si_domain
->domain
)
5218 domain_exit(to_dmar_domain(domain
));
5222 * Check whether a @domain could be attached to the @dev through the
5223 * aux-domain attach/detach APIs.
5226 is_aux_domain(struct device
*dev
, struct iommu_domain
*domain
)
5228 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5230 return info
&& info
->auxd_enabled
&&
5231 domain
->type
== IOMMU_DOMAIN_UNMANAGED
;
5234 static void auxiliary_link_device(struct dmar_domain
*domain
,
5237 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5239 assert_spin_locked(&device_domain_lock
);
5243 domain
->auxd_refcnt
++;
5244 list_add(&domain
->auxd
, &info
->auxiliary_domains
);
5247 static void auxiliary_unlink_device(struct dmar_domain
*domain
,
5250 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5252 assert_spin_locked(&device_domain_lock
);
5256 list_del(&domain
->auxd
);
5257 domain
->auxd_refcnt
--;
5259 if (!domain
->auxd_refcnt
&& domain
->default_pasid
> 0)
5260 intel_pasid_free_id(domain
->default_pasid
);
5263 static int aux_domain_add_dev(struct dmar_domain
*domain
,
5268 unsigned long flags
;
5269 struct intel_iommu
*iommu
;
5271 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5275 if (domain
->default_pasid
<= 0) {
5278 pasid
= intel_pasid_alloc_id(domain
, PASID_MIN
,
5279 pci_max_pasids(to_pci_dev(dev
)),
5282 pr_err("Can't allocate default pasid\n");
5285 domain
->default_pasid
= pasid
;
5288 spin_lock_irqsave(&device_domain_lock
, flags
);
5290 * iommu->lock must be held to attach domain to iommu and setup the
5291 * pasid entry for second level translation.
5293 spin_lock(&iommu
->lock
);
5294 ret
= domain_attach_iommu(domain
, iommu
);
5298 /* Setup the PASID entry for mediated devices: */
5299 ret
= intel_pasid_setup_second_level(iommu
, domain
, dev
,
5300 domain
->default_pasid
);
5303 spin_unlock(&iommu
->lock
);
5305 auxiliary_link_device(domain
, dev
);
5307 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5312 domain_detach_iommu(domain
, iommu
);
5314 spin_unlock(&iommu
->lock
);
5315 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5316 if (!domain
->auxd_refcnt
&& domain
->default_pasid
> 0)
5317 intel_pasid_free_id(domain
->default_pasid
);
5322 static void aux_domain_remove_dev(struct dmar_domain
*domain
,
5325 struct device_domain_info
*info
;
5326 struct intel_iommu
*iommu
;
5327 unsigned long flags
;
5329 if (!is_aux_domain(dev
, &domain
->domain
))
5332 spin_lock_irqsave(&device_domain_lock
, flags
);
5333 info
= dev
->archdata
.iommu
;
5334 iommu
= info
->iommu
;
5336 auxiliary_unlink_device(domain
, dev
);
5338 spin_lock(&iommu
->lock
);
5339 intel_pasid_tear_down_entry(iommu
, dev
, domain
->default_pasid
);
5340 domain_detach_iommu(domain
, iommu
);
5341 spin_unlock(&iommu
->lock
);
5343 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5346 static int prepare_domain_attach_device(struct iommu_domain
*domain
,
5349 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5350 struct intel_iommu
*iommu
;
5354 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5358 /* check if this iommu agaw is sufficient for max mapped address */
5359 addr_width
= agaw_to_width(iommu
->agaw
);
5360 if (addr_width
> cap_mgaw(iommu
->cap
))
5361 addr_width
= cap_mgaw(iommu
->cap
);
5363 if (dmar_domain
->max_addr
> (1LL << addr_width
)) {
5364 dev_err(dev
, "%s: iommu width (%d) is not "
5365 "sufficient for the mapped address (%llx)\n",
5366 __func__
, addr_width
, dmar_domain
->max_addr
);
5369 dmar_domain
->gaw
= addr_width
;
5372 * Knock out extra levels of page tables if necessary
5374 while (iommu
->agaw
< dmar_domain
->agaw
) {
5375 struct dma_pte
*pte
;
5377 pte
= dmar_domain
->pgd
;
5378 if (dma_pte_present(pte
)) {
5379 dmar_domain
->pgd
= (struct dma_pte
*)
5380 phys_to_virt(dma_pte_addr(pte
));
5381 free_pgtable_page(pte
);
5383 dmar_domain
->agaw
--;
5389 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
5394 if (domain
->type
== IOMMU_DOMAIN_UNMANAGED
&&
5395 device_is_rmrr_locked(dev
)) {
5396 dev_warn(dev
, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5400 if (is_aux_domain(dev
, domain
))
5403 /* normally dev is not mapped */
5404 if (unlikely(domain_context_mapped(dev
))) {
5405 struct dmar_domain
*old_domain
;
5407 old_domain
= find_domain(dev
);
5409 dmar_remove_one_dev_info(dev
);
5412 ret
= prepare_domain_attach_device(domain
, dev
);
5416 return domain_add_dev_info(to_dmar_domain(domain
), dev
);
5419 static int intel_iommu_aux_attach_device(struct iommu_domain
*domain
,
5424 if (!is_aux_domain(dev
, domain
))
5427 ret
= prepare_domain_attach_device(domain
, dev
);
5431 return aux_domain_add_dev(to_dmar_domain(domain
), dev
);
5434 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
5437 dmar_remove_one_dev_info(dev
);
5440 static void intel_iommu_aux_detach_device(struct iommu_domain
*domain
,
5443 aux_domain_remove_dev(to_dmar_domain(domain
), dev
);
5446 static int intel_iommu_map(struct iommu_domain
*domain
,
5447 unsigned long iova
, phys_addr_t hpa
,
5448 size_t size
, int iommu_prot
)
5450 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5455 if (iommu_prot
& IOMMU_READ
)
5456 prot
|= DMA_PTE_READ
;
5457 if (iommu_prot
& IOMMU_WRITE
)
5458 prot
|= DMA_PTE_WRITE
;
5459 if ((iommu_prot
& IOMMU_CACHE
) && dmar_domain
->iommu_snooping
)
5460 prot
|= DMA_PTE_SNP
;
5462 max_addr
= iova
+ size
;
5463 if (dmar_domain
->max_addr
< max_addr
) {
5466 /* check if minimum agaw is sufficient for mapped address */
5467 end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
) + 1;
5468 if (end
< max_addr
) {
5469 pr_err("%s: iommu width (%d) is not "
5470 "sufficient for the mapped address (%llx)\n",
5471 __func__
, dmar_domain
->gaw
, max_addr
);
5474 dmar_domain
->max_addr
= max_addr
;
5476 /* Round up size to next multiple of PAGE_SIZE, if it and
5477 the low bits of hpa would take us onto the next page */
5478 size
= aligned_nrpages(hpa
, size
);
5479 ret
= domain_pfn_mapping(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
5480 hpa
>> VTD_PAGE_SHIFT
, size
, prot
);
5484 static size_t intel_iommu_unmap(struct iommu_domain
*domain
,
5485 unsigned long iova
, size_t size
,
5486 struct iommu_iotlb_gather
*gather
)
5488 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5489 struct page
*freelist
= NULL
;
5490 unsigned long start_pfn
, last_pfn
;
5491 unsigned int npages
;
5492 int iommu_id
, level
= 0;
5494 /* Cope with horrid API which requires us to unmap more than the
5495 size argument if it happens to be a large-page mapping. */
5496 BUG_ON(!pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
));
5498 if (size
< VTD_PAGE_SIZE
<< level_to_offset_bits(level
))
5499 size
= VTD_PAGE_SIZE
<< level_to_offset_bits(level
);
5501 start_pfn
= iova
>> VTD_PAGE_SHIFT
;
5502 last_pfn
= (iova
+ size
- 1) >> VTD_PAGE_SHIFT
;
5504 freelist
= domain_unmap(dmar_domain
, start_pfn
, last_pfn
);
5506 npages
= last_pfn
- start_pfn
+ 1;
5508 for_each_domain_iommu(iommu_id
, dmar_domain
)
5509 iommu_flush_iotlb_psi(g_iommus
[iommu_id
], dmar_domain
,
5510 start_pfn
, npages
, !freelist
, 0);
5512 dma_free_pagelist(freelist
);
5514 if (dmar_domain
->max_addr
== iova
+ size
)
5515 dmar_domain
->max_addr
= iova
;
5520 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
5523 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5524 struct dma_pte
*pte
;
5528 pte
= pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
);
5529 if (pte
&& dma_pte_present(pte
))
5530 phys
= dma_pte_addr(pte
) +
5531 (iova
& (BIT_MASK(level_to_offset_bits(level
) +
5532 VTD_PAGE_SHIFT
) - 1));
5537 static inline bool scalable_mode_support(void)
5539 struct dmar_drhd_unit
*drhd
;
5540 struct intel_iommu
*iommu
;
5544 for_each_active_iommu(iommu
, drhd
) {
5545 if (!sm_supported(iommu
)) {
5555 static inline bool iommu_pasid_support(void)
5557 struct dmar_drhd_unit
*drhd
;
5558 struct intel_iommu
*iommu
;
5562 for_each_active_iommu(iommu
, drhd
) {
5563 if (!pasid_supported(iommu
)) {
5573 static bool intel_iommu_capable(enum iommu_cap cap
)
5575 if (cap
== IOMMU_CAP_CACHE_COHERENCY
)
5576 return domain_update_iommu_snooping(NULL
) == 1;
5577 if (cap
== IOMMU_CAP_INTR_REMAP
)
5578 return irq_remapping_enabled
== 1;
5583 static int intel_iommu_add_device(struct device
*dev
)
5585 struct dmar_domain
*dmar_domain
;
5586 struct iommu_domain
*domain
;
5587 struct intel_iommu
*iommu
;
5588 struct iommu_group
*group
;
5592 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5596 iommu_device_link(&iommu
->iommu
, dev
);
5598 if (translation_pre_enabled(iommu
))
5599 dev
->archdata
.iommu
= DEFER_DEVICE_DOMAIN_INFO
;
5601 group
= iommu_group_get_for_dev(dev
);
5603 if (IS_ERR(group
)) {
5604 ret
= PTR_ERR(group
);
5608 iommu_group_put(group
);
5610 domain
= iommu_get_domain_for_dev(dev
);
5611 dmar_domain
= to_dmar_domain(domain
);
5612 if (domain
->type
== IOMMU_DOMAIN_DMA
) {
5613 if (device_def_domain_type(dev
) == IOMMU_DOMAIN_IDENTITY
) {
5614 ret
= iommu_request_dm_for_dev(dev
);
5616 dmar_remove_one_dev_info(dev
);
5617 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
5618 domain_add_dev_info(si_domain
, dev
);
5620 "Device uses a private identity domain.\n");
5624 if (device_def_domain_type(dev
) == IOMMU_DOMAIN_DMA
) {
5625 ret
= iommu_request_dma_domain_for_dev(dev
);
5627 dmar_remove_one_dev_info(dev
);
5628 dmar_domain
->flags
|= DOMAIN_FLAG_LOSE_CHILDREN
;
5629 if (!get_private_domain_for_dev(dev
)) {
5631 "Failed to get a private domain.\n");
5637 "Device uses a private dma domain.\n");
5642 if (device_needs_bounce(dev
)) {
5643 dev_info(dev
, "Use Intel IOMMU bounce page dma_ops\n");
5644 set_dma_ops(dev
, &bounce_dma_ops
);
5650 iommu_device_unlink(&iommu
->iommu
, dev
);
5654 static void intel_iommu_remove_device(struct device
*dev
)
5656 struct intel_iommu
*iommu
;
5659 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5663 dmar_remove_one_dev_info(dev
);
5665 iommu_group_remove_device(dev
);
5667 iommu_device_unlink(&iommu
->iommu
, dev
);
5669 if (device_needs_bounce(dev
))
5670 set_dma_ops(dev
, NULL
);
5673 static void intel_iommu_get_resv_regions(struct device
*device
,
5674 struct list_head
*head
)
5676 int prot
= DMA_PTE_READ
| DMA_PTE_WRITE
;
5677 struct iommu_resv_region
*reg
;
5678 struct dmar_rmrr_unit
*rmrr
;
5679 struct device
*i_dev
;
5682 down_read(&dmar_global_lock
);
5683 for_each_rmrr_units(rmrr
) {
5684 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
5686 struct iommu_resv_region
*resv
;
5687 enum iommu_resv_type type
;
5690 if (i_dev
!= device
&&
5691 !is_downstream_to_pci_bridge(device
, i_dev
))
5694 length
= rmrr
->end_address
- rmrr
->base_address
+ 1;
5696 type
= device_rmrr_is_relaxable(device
) ?
5697 IOMMU_RESV_DIRECT_RELAXABLE
: IOMMU_RESV_DIRECT
;
5699 resv
= iommu_alloc_resv_region(rmrr
->base_address
,
5700 length
, prot
, type
);
5704 list_add_tail(&resv
->list
, head
);
5707 up_read(&dmar_global_lock
);
5709 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5710 if (dev_is_pci(device
)) {
5711 struct pci_dev
*pdev
= to_pci_dev(device
);
5713 if ((pdev
->class >> 8) == PCI_CLASS_BRIDGE_ISA
) {
5714 reg
= iommu_alloc_resv_region(0, 1UL << 24, prot
,
5715 IOMMU_RESV_DIRECT_RELAXABLE
);
5717 list_add_tail(®
->list
, head
);
5720 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5722 reg
= iommu_alloc_resv_region(IOAPIC_RANGE_START
,
5723 IOAPIC_RANGE_END
- IOAPIC_RANGE_START
+ 1,
5727 list_add_tail(®
->list
, head
);
5730 static void intel_iommu_put_resv_regions(struct device
*dev
,
5731 struct list_head
*head
)
5733 struct iommu_resv_region
*entry
, *next
;
5735 list_for_each_entry_safe(entry
, next
, head
, list
)
5739 int intel_iommu_enable_pasid(struct intel_iommu
*iommu
, struct device
*dev
)
5741 struct device_domain_info
*info
;
5742 struct context_entry
*context
;
5743 struct dmar_domain
*domain
;
5744 unsigned long flags
;
5748 domain
= find_domain(dev
);
5752 spin_lock_irqsave(&device_domain_lock
, flags
);
5753 spin_lock(&iommu
->lock
);
5756 info
= dev
->archdata
.iommu
;
5757 if (!info
|| !info
->pasid_supported
)
5760 context
= iommu_context_addr(iommu
, info
->bus
, info
->devfn
, 0);
5761 if (WARN_ON(!context
))
5764 ctx_lo
= context
[0].lo
;
5766 if (!(ctx_lo
& CONTEXT_PASIDE
)) {
5767 ctx_lo
|= CONTEXT_PASIDE
;
5768 context
[0].lo
= ctx_lo
;
5770 iommu
->flush
.flush_context(iommu
,
5771 domain
->iommu_did
[iommu
->seq_id
],
5772 PCI_DEVID(info
->bus
, info
->devfn
),
5773 DMA_CCMD_MASK_NOBIT
,
5774 DMA_CCMD_DEVICE_INVL
);
5777 /* Enable PASID support in the device, if it wasn't already */
5778 if (!info
->pasid_enabled
)
5779 iommu_enable_dev_iotlb(info
);
5784 spin_unlock(&iommu
->lock
);
5785 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5790 static void intel_iommu_apply_resv_region(struct device
*dev
,
5791 struct iommu_domain
*domain
,
5792 struct iommu_resv_region
*region
)
5794 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5795 unsigned long start
, end
;
5797 start
= IOVA_PFN(region
->start
);
5798 end
= IOVA_PFN(region
->start
+ region
->length
- 1);
5800 WARN_ON_ONCE(!reserve_iova(&dmar_domain
->iovad
, start
, end
));
5803 static struct iommu_group
*intel_iommu_device_group(struct device
*dev
)
5805 if (dev_is_pci(dev
))
5806 return pci_device_group(dev
);
5807 return generic_device_group(dev
);
5810 #ifdef CONFIG_INTEL_IOMMU_SVM
5811 struct intel_iommu
*intel_svm_device_to_iommu(struct device
*dev
)
5813 struct intel_iommu
*iommu
;
5816 if (iommu_dummy(dev
)) {
5818 "No IOMMU translation for device; cannot enable SVM\n");
5822 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5824 dev_err(dev
, "No IOMMU for device; cannot enable SVM\n");
5830 #endif /* CONFIG_INTEL_IOMMU_SVM */
5832 static int intel_iommu_enable_auxd(struct device
*dev
)
5834 struct device_domain_info
*info
;
5835 struct intel_iommu
*iommu
;
5836 unsigned long flags
;
5840 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
5841 if (!iommu
|| dmar_disabled
)
5844 if (!sm_supported(iommu
) || !pasid_supported(iommu
))
5847 ret
= intel_iommu_enable_pasid(iommu
, dev
);
5851 spin_lock_irqsave(&device_domain_lock
, flags
);
5852 info
= dev
->archdata
.iommu
;
5853 info
->auxd_enabled
= 1;
5854 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5859 static int intel_iommu_disable_auxd(struct device
*dev
)
5861 struct device_domain_info
*info
;
5862 unsigned long flags
;
5864 spin_lock_irqsave(&device_domain_lock
, flags
);
5865 info
= dev
->archdata
.iommu
;
5866 if (!WARN_ON(!info
))
5867 info
->auxd_enabled
= 0;
5868 spin_unlock_irqrestore(&device_domain_lock
, flags
);
5874 * A PCI express designated vendor specific extended capability is defined
5875 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5876 * for system software and tools to detect endpoint devices supporting the
5877 * Intel scalable IO virtualization without host driver dependency.
5879 * Returns the address of the matching extended capability structure within
5880 * the device's PCI configuration space or 0 if the device does not support
5883 static int siov_find_pci_dvsec(struct pci_dev
*pdev
)
5888 pos
= pci_find_next_ext_capability(pdev
, 0, 0x23);
5890 pci_read_config_word(pdev
, pos
+ 4, &vendor
);
5891 pci_read_config_word(pdev
, pos
+ 8, &id
);
5892 if (vendor
== PCI_VENDOR_ID_INTEL
&& id
== 5)
5895 pos
= pci_find_next_ext_capability(pdev
, pos
, 0x23);
5902 intel_iommu_dev_has_feat(struct device
*dev
, enum iommu_dev_features feat
)
5904 if (feat
== IOMMU_DEV_FEAT_AUX
) {
5907 if (!dev_is_pci(dev
) || dmar_disabled
||
5908 !scalable_mode_support() || !iommu_pasid_support())
5911 ret
= pci_pasid_features(to_pci_dev(dev
));
5915 return !!siov_find_pci_dvsec(to_pci_dev(dev
));
5922 intel_iommu_dev_enable_feat(struct device
*dev
, enum iommu_dev_features feat
)
5924 if (feat
== IOMMU_DEV_FEAT_AUX
)
5925 return intel_iommu_enable_auxd(dev
);
5931 intel_iommu_dev_disable_feat(struct device
*dev
, enum iommu_dev_features feat
)
5933 if (feat
== IOMMU_DEV_FEAT_AUX
)
5934 return intel_iommu_disable_auxd(dev
);
5940 intel_iommu_dev_feat_enabled(struct device
*dev
, enum iommu_dev_features feat
)
5942 struct device_domain_info
*info
= dev
->archdata
.iommu
;
5944 if (feat
== IOMMU_DEV_FEAT_AUX
)
5945 return scalable_mode_support() && info
&& info
->auxd_enabled
;
5951 intel_iommu_aux_get_pasid(struct iommu_domain
*domain
, struct device
*dev
)
5953 struct dmar_domain
*dmar_domain
= to_dmar_domain(domain
);
5955 return dmar_domain
->default_pasid
> 0 ?
5956 dmar_domain
->default_pasid
: -EINVAL
;
5959 static bool intel_iommu_is_attach_deferred(struct iommu_domain
*domain
,
5962 return dev
->archdata
.iommu
== DEFER_DEVICE_DOMAIN_INFO
;
5966 * Check that the device does not live on an external facing PCI port that is
5967 * marked as untrusted. Such devices should not be able to apply quirks and
5968 * thus not be able to bypass the IOMMU restrictions.
5970 static bool risky_device(struct pci_dev
*pdev
)
5972 if (pdev
->untrusted
) {
5974 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
5975 pdev
->vendor
, pdev
->device
);
5976 pci_info(pdev
, "Please check with your BIOS/Platform vendor about this\n");
5982 const struct iommu_ops intel_iommu_ops
= {
5983 .capable
= intel_iommu_capable
,
5984 .domain_alloc
= intel_iommu_domain_alloc
,
5985 .domain_free
= intel_iommu_domain_free
,
5986 .attach_dev
= intel_iommu_attach_device
,
5987 .detach_dev
= intel_iommu_detach_device
,
5988 .aux_attach_dev
= intel_iommu_aux_attach_device
,
5989 .aux_detach_dev
= intel_iommu_aux_detach_device
,
5990 .aux_get_pasid
= intel_iommu_aux_get_pasid
,
5991 .map
= intel_iommu_map
,
5992 .unmap
= intel_iommu_unmap
,
5993 .iova_to_phys
= intel_iommu_iova_to_phys
,
5994 .add_device
= intel_iommu_add_device
,
5995 .remove_device
= intel_iommu_remove_device
,
5996 .get_resv_regions
= intel_iommu_get_resv_regions
,
5997 .put_resv_regions
= intel_iommu_put_resv_regions
,
5998 .apply_resv_region
= intel_iommu_apply_resv_region
,
5999 .device_group
= intel_iommu_device_group
,
6000 .dev_has_feat
= intel_iommu_dev_has_feat
,
6001 .dev_feat_enabled
= intel_iommu_dev_feat_enabled
,
6002 .dev_enable_feat
= intel_iommu_dev_enable_feat
,
6003 .dev_disable_feat
= intel_iommu_dev_disable_feat
,
6004 .is_attach_deferred
= intel_iommu_is_attach_deferred
,
6005 .pgsize_bitmap
= INTEL_IOMMU_PGSIZES
,
6008 static void quirk_iommu_igfx(struct pci_dev
*dev
)
6010 if (risky_device(dev
))
6013 pci_info(dev
, "Disabling IOMMU for graphics on this chipset\n");
6017 /* G4x/GM45 integrated gfx dmar support is totally busted. */
6018 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_igfx
);
6019 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_igfx
);
6020 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_igfx
);
6021 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_igfx
);
6022 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_igfx
);
6023 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_igfx
);
6024 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_igfx
);
6026 /* Broadwell igfx malfunctions with dmar */
6027 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1606, quirk_iommu_igfx
);
6028 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160B, quirk_iommu_igfx
);
6029 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160E, quirk_iommu_igfx
);
6030 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1602, quirk_iommu_igfx
);
6031 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160A, quirk_iommu_igfx
);
6032 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x160D, quirk_iommu_igfx
);
6033 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1616, quirk_iommu_igfx
);
6034 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161B, quirk_iommu_igfx
);
6035 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161E, quirk_iommu_igfx
);
6036 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1612, quirk_iommu_igfx
);
6037 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161A, quirk_iommu_igfx
);
6038 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x161D, quirk_iommu_igfx
);
6039 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1626, quirk_iommu_igfx
);
6040 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162B, quirk_iommu_igfx
);
6041 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162E, quirk_iommu_igfx
);
6042 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1622, quirk_iommu_igfx
);
6043 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162A, quirk_iommu_igfx
);
6044 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x162D, quirk_iommu_igfx
);
6045 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1636, quirk_iommu_igfx
);
6046 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163B, quirk_iommu_igfx
);
6047 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163E, quirk_iommu_igfx
);
6048 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x1632, quirk_iommu_igfx
);
6049 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163A, quirk_iommu_igfx
);
6050 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x163D, quirk_iommu_igfx
);
6052 static void quirk_iommu_rwbf(struct pci_dev
*dev
)
6054 if (risky_device(dev
))
6058 * Mobile 4 Series Chipset neglects to set RWBF capability,
6059 * but needs it. Same seems to hold for the desktop versions.
6061 pci_info(dev
, "Forcing write-buffer flush capability\n");
6065 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_rwbf
);
6066 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_rwbf
);
6067 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_rwbf
);
6068 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_rwbf
);
6069 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_rwbf
);
6070 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_rwbf
);
6071 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_rwbf
);
6074 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
6075 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6076 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
6077 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
6078 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6079 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6080 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6081 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6083 static void quirk_calpella_no_shadow_gtt(struct pci_dev
*dev
)
6087 if (risky_device(dev
))
6090 if (pci_read_config_word(dev
, GGC
, &ggc
))
6093 if (!(ggc
& GGC_MEMORY_VT_ENABLED
)) {
6094 pci_info(dev
, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6096 } else if (dmar_map_gfx
) {
6097 /* we have to ensure the gfx device is idle before we flush */
6098 pci_info(dev
, "Disabling batched IOTLB flush on Ironlake\n");
6099 intel_iommu_strict
= 1;
6102 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0040, quirk_calpella_no_shadow_gtt
);
6103 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0044, quirk_calpella_no_shadow_gtt
);
6104 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0062, quirk_calpella_no_shadow_gtt
);
6105 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x006a, quirk_calpella_no_shadow_gtt
);
6107 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6108 ISOCH DMAR unit for the Azalia sound device, but not give it any
6109 TLB entries, which causes it to deadlock. Check for that. We do
6110 this in a function called from init_dmars(), instead of in a PCI
6111 quirk, because we don't want to print the obnoxious "BIOS broken"
6112 message if VT-d is actually disabled.
6114 static void __init
check_tylersburg_isoch(void)
6116 struct pci_dev
*pdev
;
6117 uint32_t vtisochctrl
;
6119 /* If there's no Azalia in the system anyway, forget it. */
6120 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x3a3e, NULL
);
6124 if (risky_device(pdev
)) {
6131 /* System Management Registers. Might be hidden, in which case
6132 we can't do the sanity check. But that's OK, because the
6133 known-broken BIOSes _don't_ actually hide it, so far. */
6134 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x342e, NULL
);
6138 if (risky_device(pdev
)) {
6143 if (pci_read_config_dword(pdev
, 0x188, &vtisochctrl
)) {
6150 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6151 if (vtisochctrl
& 1)
6154 /* Drop all bits other than the number of TLB entries */
6155 vtisochctrl
&= 0x1c;
6157 /* If we have the recommended number of TLB entries (16), fine. */
6158 if (vtisochctrl
== 0x10)
6161 /* Zero TLB entries? You get to ride the short bus to school. */
6163 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6164 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6165 dmi_get_system_info(DMI_BIOS_VENDOR
),
6166 dmi_get_system_info(DMI_BIOS_VERSION
),
6167 dmi_get_system_info(DMI_PRODUCT_VERSION
));
6168 iommu_identity_mapping
|= IDENTMAP_AZALIA
;
6172 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",