2 * Copyright © 2006-2014 Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
47 #include "irq_remapping.h"
50 #define ROOT_SIZE VTD_PAGE_SIZE
51 #define CONTEXT_SIZE VTD_PAGE_SIZE
53 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
54 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
55 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
56 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
58 #define IOAPIC_RANGE_START (0xfee00000)
59 #define IOAPIC_RANGE_END (0xfeefffff)
60 #define IOVA_START_ADDR (0x1000)
62 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
64 #define MAX_AGAW_WIDTH 64
65 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
67 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
68 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
71 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
72 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
73 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
74 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
76 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
77 #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
78 #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
80 /* page table handling */
81 #define LEVEL_STRIDE (9)
82 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
85 * This bitmap is used to advertise the page sizes our hardware support
86 * to the IOMMU core, which will then use this information to split
87 * physically contiguous memory regions it is mapping into page sizes
90 * Traditionally the IOMMU core just handed us the mappings directly,
91 * after making sure the size is an order of a 4KiB page and that the
92 * mapping has natural alignment.
94 * To retain this behavior, we currently advertise that we support
95 * all page sizes that are an order of 4KiB.
97 * If at some point we'd like to utilize the IOMMU core's new behavior,
98 * we could change this to advertise the real page sizes we support.
100 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
102 static inline int agaw_to_level(int agaw
)
107 static inline int agaw_to_width(int agaw
)
109 return min_t(int, 30 + agaw
* LEVEL_STRIDE
, MAX_AGAW_WIDTH
);
112 static inline int width_to_agaw(int width
)
114 return DIV_ROUND_UP(width
- 30, LEVEL_STRIDE
);
117 static inline unsigned int level_to_offset_bits(int level
)
119 return (level
- 1) * LEVEL_STRIDE
;
122 static inline int pfn_level_offset(unsigned long pfn
, int level
)
124 return (pfn
>> level_to_offset_bits(level
)) & LEVEL_MASK
;
127 static inline unsigned long level_mask(int level
)
129 return -1UL << level_to_offset_bits(level
);
132 static inline unsigned long level_size(int level
)
134 return 1UL << level_to_offset_bits(level
);
137 static inline unsigned long align_to_level(unsigned long pfn
, int level
)
139 return (pfn
+ level_size(level
) - 1) & level_mask(level
);
142 static inline unsigned long lvl_to_nr_pages(unsigned int lvl
)
144 return 1 << min_t(int, (lvl
- 1) * LEVEL_STRIDE
, MAX_AGAW_PFN_WIDTH
);
147 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
148 are never going to work. */
149 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn
)
151 return dma_pfn
>> (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
154 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn
)
156 return mm_pfn
<< (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
158 static inline unsigned long page_to_dma_pfn(struct page
*pg
)
160 return mm_to_dma_pfn(page_to_pfn(pg
));
162 static inline unsigned long virt_to_dma_pfn(void *p
)
164 return page_to_dma_pfn(virt_to_page(p
));
167 /* global iommu list, set NULL for ignored DMAR units */
168 static struct intel_iommu
**g_iommus
;
170 static void __init
check_tylersburg_isoch(void);
171 static int rwbf_quirk
;
174 * set to 1 to panic kernel if can't successfully enable VT-d
175 * (used when kernel is launched w/ TXT)
177 static int force_on
= 0;
182 * 12-63: Context Ptr (12 - (haw-1))
189 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
190 static inline bool root_present(struct root_entry
*root
)
192 return (root
->val
& 1);
194 static inline void set_root_present(struct root_entry
*root
)
198 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
200 root
->val
|= value
& VTD_PAGE_MASK
;
203 static inline struct context_entry
*
204 get_context_addr_from_root(struct root_entry
*root
)
206 return (struct context_entry
*)
207 (root_present(root
)?phys_to_virt(
208 root
->val
& VTD_PAGE_MASK
) :
215 * 1: fault processing disable
216 * 2-3: translation type
217 * 12-63: address space root
223 struct context_entry
{
228 static inline bool context_present(struct context_entry
*context
)
230 return (context
->lo
& 1);
232 static inline void context_set_present(struct context_entry
*context
)
237 static inline void context_set_fault_enable(struct context_entry
*context
)
239 context
->lo
&= (((u64
)-1) << 2) | 1;
242 static inline void context_set_translation_type(struct context_entry
*context
,
245 context
->lo
&= (((u64
)-1) << 4) | 3;
246 context
->lo
|= (value
& 3) << 2;
249 static inline void context_set_address_root(struct context_entry
*context
,
252 context
->lo
|= value
& VTD_PAGE_MASK
;
255 static inline void context_set_address_width(struct context_entry
*context
,
258 context
->hi
|= value
& 7;
261 static inline void context_set_domain_id(struct context_entry
*context
,
264 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
267 static inline void context_clear_entry(struct context_entry
*context
)
280 * 12-63: Host physcial address
286 static inline void dma_clear_pte(struct dma_pte
*pte
)
291 static inline u64
dma_pte_addr(struct dma_pte
*pte
)
294 return pte
->val
& VTD_PAGE_MASK
;
296 /* Must have a full atomic 64-bit read */
297 return __cmpxchg64(&pte
->val
, 0ULL, 0ULL) & VTD_PAGE_MASK
;
301 static inline bool dma_pte_present(struct dma_pte
*pte
)
303 return (pte
->val
& 3) != 0;
306 static inline bool dma_pte_superpage(struct dma_pte
*pte
)
308 return (pte
->val
& (1 << 7));
311 static inline int first_pte_in_page(struct dma_pte
*pte
)
313 return !((unsigned long)pte
& ~VTD_PAGE_MASK
);
317 * This domain is a statically identity mapping domain.
318 * 1. This domain creats a static 1:1 mapping to all usable memory.
319 * 2. It maps to each iommu if successful.
320 * 3. Each iommu mapps to this domain if successful.
322 static struct dmar_domain
*si_domain
;
323 static int hw_pass_through
= 1;
325 /* devices under the same p2p bridge are owned in one domain */
326 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
328 /* domain represents a virtual machine, more than one devices
329 * across iommus may be owned in one domain, e.g. kvm guest.
331 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
333 /* si_domain contains mulitple devices */
334 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
336 /* define the limit of IOMMUs supported in each domain */
338 # define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
340 # define IOMMU_UNITS_SUPPORTED 64
344 int id
; /* domain id */
345 int nid
; /* node id */
346 DECLARE_BITMAP(iommu_bmp
, IOMMU_UNITS_SUPPORTED
);
347 /* bitmap of iommus this domain uses*/
349 struct list_head devices
; /* all devices' list */
350 struct iova_domain iovad
; /* iova's that belong to this domain */
352 struct dma_pte
*pgd
; /* virtual address */
353 int gaw
; /* max guest address width */
355 /* adjusted guest address width, 0 is level 2 30-bit */
358 int flags
; /* flags to find out type of domain */
360 int iommu_coherency
;/* indicate coherency of iommu access */
361 int iommu_snooping
; /* indicate snooping control feature*/
362 int iommu_count
; /* reference count of iommu */
363 int iommu_superpage
;/* Level of superpages supported:
364 0 == 4KiB (no superpages), 1 == 2MiB,
365 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
366 spinlock_t iommu_lock
; /* protect iommu set in domain */
367 u64 max_addr
; /* maximum mapped address */
370 /* PCI domain-device relationship */
371 struct device_domain_info
{
372 struct list_head link
; /* link to domain siblings */
373 struct list_head global
; /* link to global list */
374 u8 bus
; /* PCI bus number */
375 u8 devfn
; /* PCI devfn number */
376 struct device
*dev
; /* it's NULL for PCIe-to-PCI bridge */
377 struct intel_iommu
*iommu
; /* IOMMU used by this device */
378 struct dmar_domain
*domain
; /* pointer to domain */
381 struct dmar_rmrr_unit
{
382 struct list_head list
; /* list of rmrr units */
383 struct acpi_dmar_header
*hdr
; /* ACPI header */
384 u64 base_address
; /* reserved base address*/
385 u64 end_address
; /* reserved end address */
386 struct dmar_dev_scope
*devices
; /* target devices */
387 int devices_cnt
; /* target device count */
390 struct dmar_atsr_unit
{
391 struct list_head list
; /* list of ATSR units */
392 struct acpi_dmar_header
*hdr
; /* ACPI header */
393 struct dmar_dev_scope
*devices
; /* target devices */
394 int devices_cnt
; /* target device count */
395 u8 include_all
:1; /* include all ports */
398 static LIST_HEAD(dmar_atsr_units
);
399 static LIST_HEAD(dmar_rmrr_units
);
401 #define for_each_rmrr_units(rmrr) \
402 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
404 static void flush_unmaps_timeout(unsigned long data
);
406 static DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
408 #define HIGH_WATER_MARK 250
409 struct deferred_flush_tables
{
411 struct iova
*iova
[HIGH_WATER_MARK
];
412 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
413 struct page
*freelist
[HIGH_WATER_MARK
];
416 static struct deferred_flush_tables
*deferred_flush
;
418 /* bitmap for indexing intel_iommus */
419 static int g_num_of_iommus
;
421 static DEFINE_SPINLOCK(async_umap_flush_lock
);
422 static LIST_HEAD(unmaps_to_do
);
425 static long list_size
;
427 static void domain_exit(struct dmar_domain
*domain
);
428 static void domain_remove_dev_info(struct dmar_domain
*domain
);
429 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
431 static void iommu_detach_dependent_devices(struct intel_iommu
*iommu
,
434 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
435 int dmar_disabled
= 0;
437 int dmar_disabled
= 1;
438 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
440 int intel_iommu_enabled
= 0;
441 EXPORT_SYMBOL_GPL(intel_iommu_enabled
);
443 static int dmar_map_gfx
= 1;
444 static int dmar_forcedac
;
445 static int intel_iommu_strict
;
446 static int intel_iommu_superpage
= 1;
448 int intel_iommu_gfx_mapped
;
449 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped
);
451 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
452 static DEFINE_SPINLOCK(device_domain_lock
);
453 static LIST_HEAD(device_domain_list
);
455 static struct iommu_ops intel_iommu_ops
;
457 static int __init
intel_iommu_setup(char *str
)
462 if (!strncmp(str
, "on", 2)) {
464 printk(KERN_INFO
"Intel-IOMMU: enabled\n");
465 } else if (!strncmp(str
, "off", 3)) {
467 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
468 } else if (!strncmp(str
, "igfx_off", 8)) {
471 "Intel-IOMMU: disable GFX device mapping\n");
472 } else if (!strncmp(str
, "forcedac", 8)) {
474 "Intel-IOMMU: Forcing DAC for PCI devices\n");
476 } else if (!strncmp(str
, "strict", 6)) {
478 "Intel-IOMMU: disable batched IOTLB flush\n");
479 intel_iommu_strict
= 1;
480 } else if (!strncmp(str
, "sp_off", 6)) {
482 "Intel-IOMMU: disable supported super page\n");
483 intel_iommu_superpage
= 0;
486 str
+= strcspn(str
, ",");
492 __setup("intel_iommu=", intel_iommu_setup
);
494 static struct kmem_cache
*iommu_domain_cache
;
495 static struct kmem_cache
*iommu_devinfo_cache
;
496 static struct kmem_cache
*iommu_iova_cache
;
498 static inline void *alloc_pgtable_page(int node
)
503 page
= alloc_pages_node(node
, GFP_ATOMIC
| __GFP_ZERO
, 0);
505 vaddr
= page_address(page
);
509 static inline void free_pgtable_page(void *vaddr
)
511 free_page((unsigned long)vaddr
);
514 static inline void *alloc_domain_mem(void)
516 return kmem_cache_alloc(iommu_domain_cache
, GFP_ATOMIC
);
519 static void free_domain_mem(void *vaddr
)
521 kmem_cache_free(iommu_domain_cache
, vaddr
);
524 static inline void * alloc_devinfo_mem(void)
526 return kmem_cache_alloc(iommu_devinfo_cache
, GFP_ATOMIC
);
529 static inline void free_devinfo_mem(void *vaddr
)
531 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
534 struct iova
*alloc_iova_mem(void)
536 return kmem_cache_alloc(iommu_iova_cache
, GFP_ATOMIC
);
539 void free_iova_mem(struct iova
*iova
)
541 kmem_cache_free(iommu_iova_cache
, iova
);
545 static int __iommu_calculate_agaw(struct intel_iommu
*iommu
, int max_gaw
)
550 sagaw
= cap_sagaw(iommu
->cap
);
551 for (agaw
= width_to_agaw(max_gaw
);
553 if (test_bit(agaw
, &sagaw
))
561 * Calculate max SAGAW for each iommu.
563 int iommu_calculate_max_sagaw(struct intel_iommu
*iommu
)
565 return __iommu_calculate_agaw(iommu
, MAX_AGAW_WIDTH
);
569 * calculate agaw for each iommu.
570 * "SAGAW" may be different across iommus, use a default agaw, and
571 * get a supported less agaw for iommus that don't support the default agaw.
573 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
575 return __iommu_calculate_agaw(iommu
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
578 /* This functionin only returns single iommu in a domain */
579 static struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
583 /* si_domain and vm domain should not get here. */
584 BUG_ON(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
);
585 BUG_ON(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
);
587 iommu_id
= find_first_bit(domain
->iommu_bmp
, g_num_of_iommus
);
588 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
591 return g_iommus
[iommu_id
];
594 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
596 struct dmar_drhd_unit
*drhd
;
597 struct intel_iommu
*iommu
;
600 domain
->iommu_coherency
= 1;
602 for_each_set_bit(i
, domain
->iommu_bmp
, g_num_of_iommus
) {
604 if (!ecap_coherent(g_iommus
[i
]->ecap
)) {
605 domain
->iommu_coherency
= 0;
612 /* No hardware attached; use lowest common denominator */
614 for_each_active_iommu(iommu
, drhd
) {
615 if (!ecap_coherent(iommu
->ecap
)) {
616 domain
->iommu_coherency
= 0;
623 static void domain_update_iommu_snooping(struct dmar_domain
*domain
)
627 domain
->iommu_snooping
= 1;
629 for_each_set_bit(i
, domain
->iommu_bmp
, g_num_of_iommus
) {
630 if (!ecap_sc_support(g_iommus
[i
]->ecap
)) {
631 domain
->iommu_snooping
= 0;
637 static void domain_update_iommu_superpage(struct dmar_domain
*domain
)
639 struct dmar_drhd_unit
*drhd
;
640 struct intel_iommu
*iommu
= NULL
;
643 if (!intel_iommu_superpage
) {
644 domain
->iommu_superpage
= 0;
648 /* set iommu_superpage to the smallest common denominator */
650 for_each_active_iommu(iommu
, drhd
) {
651 mask
&= cap_super_page_val(iommu
->cap
);
658 domain
->iommu_superpage
= fls(mask
);
661 /* Some capabilities may be different across iommus */
662 static void domain_update_iommu_cap(struct dmar_domain
*domain
)
664 domain_update_iommu_coherency(domain
);
665 domain_update_iommu_snooping(domain
);
666 domain_update_iommu_superpage(domain
);
669 static int iommu_dummy(struct device
*dev
)
671 return dev
->archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
;
674 static struct intel_iommu
*device_to_iommu(struct device
*dev
, u8
*bus
, u8
*devfn
)
676 struct dmar_drhd_unit
*drhd
= NULL
;
677 struct intel_iommu
*iommu
;
679 struct pci_dev
*ptmp
, *pdev
= NULL
;
683 if (iommu_dummy(dev
))
686 if (dev_is_pci(dev
)) {
687 pdev
= to_pci_dev(dev
);
688 segment
= pci_domain_nr(pdev
->bus
);
689 } else if (ACPI_COMPANION(dev
))
690 dev
= &ACPI_COMPANION(dev
)->dev
;
693 for_each_active_iommu(iommu
, drhd
) {
694 if (pdev
&& segment
!= drhd
->segment
)
697 for_each_active_dev_scope(drhd
->devices
,
698 drhd
->devices_cnt
, i
, tmp
) {
700 *bus
= drhd
->devices
[i
].bus
;
701 *devfn
= drhd
->devices
[i
].devfn
;
705 if (!pdev
|| !dev_is_pci(tmp
))
708 ptmp
= to_pci_dev(tmp
);
709 if (ptmp
->subordinate
&&
710 ptmp
->subordinate
->number
<= pdev
->bus
->number
&&
711 ptmp
->subordinate
->busn_res
.end
>= pdev
->bus
->number
)
715 if (pdev
&& drhd
->include_all
) {
717 *bus
= pdev
->bus
->number
;
718 *devfn
= pdev
->devfn
;
729 static void domain_flush_cache(struct dmar_domain
*domain
,
730 void *addr
, int size
)
732 if (!domain
->iommu_coherency
)
733 clflush_cache_range(addr
, size
);
736 /* Gets context entry for a given bus and devfn */
737 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
740 struct root_entry
*root
;
741 struct context_entry
*context
;
742 unsigned long phy_addr
;
745 spin_lock_irqsave(&iommu
->lock
, flags
);
746 root
= &iommu
->root_entry
[bus
];
747 context
= get_context_addr_from_root(root
);
749 context
= (struct context_entry
*)
750 alloc_pgtable_page(iommu
->node
);
752 spin_unlock_irqrestore(&iommu
->lock
, flags
);
755 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
756 phy_addr
= virt_to_phys((void *)context
);
757 set_root_value(root
, phy_addr
);
758 set_root_present(root
);
759 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
761 spin_unlock_irqrestore(&iommu
->lock
, flags
);
762 return &context
[devfn
];
765 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
767 struct root_entry
*root
;
768 struct context_entry
*context
;
772 spin_lock_irqsave(&iommu
->lock
, flags
);
773 root
= &iommu
->root_entry
[bus
];
774 context
= get_context_addr_from_root(root
);
779 ret
= context_present(&context
[devfn
]);
781 spin_unlock_irqrestore(&iommu
->lock
, flags
);
785 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
787 struct root_entry
*root
;
788 struct context_entry
*context
;
791 spin_lock_irqsave(&iommu
->lock
, flags
);
792 root
= &iommu
->root_entry
[bus
];
793 context
= get_context_addr_from_root(root
);
795 context_clear_entry(&context
[devfn
]);
796 __iommu_flush_cache(iommu
, &context
[devfn
], \
799 spin_unlock_irqrestore(&iommu
->lock
, flags
);
802 static void free_context_table(struct intel_iommu
*iommu
)
804 struct root_entry
*root
;
807 struct context_entry
*context
;
809 spin_lock_irqsave(&iommu
->lock
, flags
);
810 if (!iommu
->root_entry
) {
813 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
814 root
= &iommu
->root_entry
[i
];
815 context
= get_context_addr_from_root(root
);
817 free_pgtable_page(context
);
819 free_pgtable_page(iommu
->root_entry
);
820 iommu
->root_entry
= NULL
;
822 spin_unlock_irqrestore(&iommu
->lock
, flags
);
825 static struct dma_pte
*pfn_to_dma_pte(struct dmar_domain
*domain
,
826 unsigned long pfn
, int *target_level
)
828 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
829 struct dma_pte
*parent
, *pte
= NULL
;
830 int level
= agaw_to_level(domain
->agaw
);
833 BUG_ON(!domain
->pgd
);
835 if (addr_width
< BITS_PER_LONG
&& pfn
>> addr_width
)
836 /* Address beyond IOMMU's addressing capabilities. */
839 parent
= domain
->pgd
;
844 offset
= pfn_level_offset(pfn
, level
);
845 pte
= &parent
[offset
];
846 if (!*target_level
&& (dma_pte_superpage(pte
) || !dma_pte_present(pte
)))
848 if (level
== *target_level
)
851 if (!dma_pte_present(pte
)) {
854 tmp_page
= alloc_pgtable_page(domain
->nid
);
859 domain_flush_cache(domain
, tmp_page
, VTD_PAGE_SIZE
);
860 pteval
= ((uint64_t)virt_to_dma_pfn(tmp_page
) << VTD_PAGE_SHIFT
) | DMA_PTE_READ
| DMA_PTE_WRITE
;
861 if (cmpxchg64(&pte
->val
, 0ULL, pteval
)) {
862 /* Someone else set it while we were thinking; use theirs. */
863 free_pgtable_page(tmp_page
);
866 domain_flush_cache(domain
, pte
, sizeof(*pte
));
872 parent
= phys_to_virt(dma_pte_addr(pte
));
877 *target_level
= level
;
883 /* return address's pte at specific level */
884 static struct dma_pte
*dma_pfn_level_pte(struct dmar_domain
*domain
,
886 int level
, int *large_page
)
888 struct dma_pte
*parent
, *pte
= NULL
;
889 int total
= agaw_to_level(domain
->agaw
);
892 parent
= domain
->pgd
;
893 while (level
<= total
) {
894 offset
= pfn_level_offset(pfn
, total
);
895 pte
= &parent
[offset
];
899 if (!dma_pte_present(pte
)) {
904 if (pte
->val
& DMA_PTE_LARGE_PAGE
) {
909 parent
= phys_to_virt(dma_pte_addr(pte
));
915 /* clear last level pte, a tlb flush should be followed */
916 static void dma_pte_clear_range(struct dmar_domain
*domain
,
917 unsigned long start_pfn
,
918 unsigned long last_pfn
)
920 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
921 unsigned int large_page
= 1;
922 struct dma_pte
*first_pte
, *pte
;
924 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
925 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
926 BUG_ON(start_pfn
> last_pfn
);
928 /* we don't need lock here; nobody else touches the iova range */
931 first_pte
= pte
= dma_pfn_level_pte(domain
, start_pfn
, 1, &large_page
);
933 start_pfn
= align_to_level(start_pfn
+ 1, large_page
+ 1);
938 start_pfn
+= lvl_to_nr_pages(large_page
);
940 } while (start_pfn
<= last_pfn
&& !first_pte_in_page(pte
));
942 domain_flush_cache(domain
, first_pte
,
943 (void *)pte
- (void *)first_pte
);
945 } while (start_pfn
&& start_pfn
<= last_pfn
);
948 static void dma_pte_free_level(struct dmar_domain
*domain
, int level
,
949 struct dma_pte
*pte
, unsigned long pfn
,
950 unsigned long start_pfn
, unsigned long last_pfn
)
952 pfn
= max(start_pfn
, pfn
);
953 pte
= &pte
[pfn_level_offset(pfn
, level
)];
956 unsigned long level_pfn
;
957 struct dma_pte
*level_pte
;
959 if (!dma_pte_present(pte
) || dma_pte_superpage(pte
))
962 level_pfn
= pfn
& level_mask(level
- 1);
963 level_pte
= phys_to_virt(dma_pte_addr(pte
));
966 dma_pte_free_level(domain
, level
- 1, level_pte
,
967 level_pfn
, start_pfn
, last_pfn
);
969 /* If range covers entire pagetable, free it */
970 if (!(start_pfn
> level_pfn
||
971 last_pfn
< level_pfn
+ level_size(level
) - 1)) {
973 domain_flush_cache(domain
, pte
, sizeof(*pte
));
974 free_pgtable_page(level_pte
);
977 pfn
+= level_size(level
);
978 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
981 /* free page table pages. last level pte should already be cleared */
982 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
983 unsigned long start_pfn
,
984 unsigned long last_pfn
)
986 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
988 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
989 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
990 BUG_ON(start_pfn
> last_pfn
);
992 /* We don't need lock here; nobody else touches the iova range */
993 dma_pte_free_level(domain
, agaw_to_level(domain
->agaw
),
994 domain
->pgd
, 0, start_pfn
, last_pfn
);
997 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
998 free_pgtable_page(domain
->pgd
);
1003 /* When a page at a given level is being unlinked from its parent, we don't
1004 need to *modify* it at all. All we need to do is make a list of all the
1005 pages which can be freed just as soon as we've flushed the IOTLB and we
1006 know the hardware page-walk will no longer touch them.
1007 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1009 static struct page
*dma_pte_list_pagetables(struct dmar_domain
*domain
,
1010 int level
, struct dma_pte
*pte
,
1011 struct page
*freelist
)
1015 pg
= pfn_to_page(dma_pte_addr(pte
) >> PAGE_SHIFT
);
1016 pg
->freelist
= freelist
;
1022 pte
= page_address(pg
);
1024 if (dma_pte_present(pte
) && !dma_pte_superpage(pte
))
1025 freelist
= dma_pte_list_pagetables(domain
, level
- 1,
1028 } while (!first_pte_in_page(pte
));
1033 static struct page
*dma_pte_clear_level(struct dmar_domain
*domain
, int level
,
1034 struct dma_pte
*pte
, unsigned long pfn
,
1035 unsigned long start_pfn
,
1036 unsigned long last_pfn
,
1037 struct page
*freelist
)
1039 struct dma_pte
*first_pte
= NULL
, *last_pte
= NULL
;
1041 pfn
= max(start_pfn
, pfn
);
1042 pte
= &pte
[pfn_level_offset(pfn
, level
)];
1045 unsigned long level_pfn
;
1047 if (!dma_pte_present(pte
))
1050 level_pfn
= pfn
& level_mask(level
);
1052 /* If range covers entire pagetable, free it */
1053 if (start_pfn
<= level_pfn
&&
1054 last_pfn
>= level_pfn
+ level_size(level
) - 1) {
1055 /* These suborbinate page tables are going away entirely. Don't
1056 bother to clear them; we're just going to *free* them. */
1057 if (level
> 1 && !dma_pte_superpage(pte
))
1058 freelist
= dma_pte_list_pagetables(domain
, level
- 1, pte
, freelist
);
1064 } else if (level
> 1) {
1065 /* Recurse down into a level that isn't *entirely* obsolete */
1066 freelist
= dma_pte_clear_level(domain
, level
- 1,
1067 phys_to_virt(dma_pte_addr(pte
)),
1068 level_pfn
, start_pfn
, last_pfn
,
1072 pfn
+= level_size(level
);
1073 } while (!first_pte_in_page(++pte
) && pfn
<= last_pfn
);
1076 domain_flush_cache(domain
, first_pte
,
1077 (void *)++last_pte
- (void *)first_pte
);
1082 /* We can't just free the pages because the IOMMU may still be walking
1083 the page tables, and may have cached the intermediate levels. The
1084 pages can only be freed after the IOTLB flush has been done. */
1085 struct page
*domain_unmap(struct dmar_domain
*domain
,
1086 unsigned long start_pfn
,
1087 unsigned long last_pfn
)
1089 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
1090 struct page
*freelist
= NULL
;
1092 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
1093 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
1094 BUG_ON(start_pfn
> last_pfn
);
1096 /* we don't need lock here; nobody else touches the iova range */
1097 freelist
= dma_pte_clear_level(domain
, agaw_to_level(domain
->agaw
),
1098 domain
->pgd
, 0, start_pfn
, last_pfn
, NULL
);
1101 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1102 struct page
*pgd_page
= virt_to_page(domain
->pgd
);
1103 pgd_page
->freelist
= freelist
;
1104 freelist
= pgd_page
;
1112 void dma_free_pagelist(struct page
*freelist
)
1116 while ((pg
= freelist
)) {
1117 freelist
= pg
->freelist
;
1118 free_pgtable_page(page_address(pg
));
1122 /* iommu handling */
1123 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
1125 struct root_entry
*root
;
1126 unsigned long flags
;
1128 root
= (struct root_entry
*)alloc_pgtable_page(iommu
->node
);
1132 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
1134 spin_lock_irqsave(&iommu
->lock
, flags
);
1135 iommu
->root_entry
= root
;
1136 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1141 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
1147 addr
= iommu
->root_entry
;
1149 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1150 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
1152 writel(iommu
->gcmd
| DMA_GCMD_SRTP
, iommu
->reg
+ DMAR_GCMD_REG
);
1154 /* Make sure hardware complete it */
1155 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1156 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
1158 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1161 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
1166 if (!rwbf_quirk
&& !cap_rwbf(iommu
->cap
))
1169 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1170 writel(iommu
->gcmd
| DMA_GCMD_WBF
, iommu
->reg
+ DMAR_GCMD_REG
);
1172 /* Make sure hardware complete it */
1173 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1174 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
1176 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1179 /* return value determine if we need a write buffer flush */
1180 static void __iommu_flush_context(struct intel_iommu
*iommu
,
1181 u16 did
, u16 source_id
, u8 function_mask
,
1188 case DMA_CCMD_GLOBAL_INVL
:
1189 val
= DMA_CCMD_GLOBAL_INVL
;
1191 case DMA_CCMD_DOMAIN_INVL
:
1192 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
1194 case DMA_CCMD_DEVICE_INVL
:
1195 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
1196 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
1201 val
|= DMA_CCMD_ICC
;
1203 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1204 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
1206 /* Make sure hardware complete it */
1207 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
1208 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
1210 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1213 /* return value determine if we need a write buffer flush */
1214 static void __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
1215 u64 addr
, unsigned int size_order
, u64 type
)
1217 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
1218 u64 val
= 0, val_iva
= 0;
1222 case DMA_TLB_GLOBAL_FLUSH
:
1223 /* global flush doesn't need set IVA_REG */
1224 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
1226 case DMA_TLB_DSI_FLUSH
:
1227 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1229 case DMA_TLB_PSI_FLUSH
:
1230 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1231 /* IH bit is passed in as part of address */
1232 val_iva
= size_order
| addr
;
1237 /* Note: set drain read/write */
1240 * This is probably to be super secure.. Looks like we can
1241 * ignore it without any impact.
1243 if (cap_read_drain(iommu
->cap
))
1244 val
|= DMA_TLB_READ_DRAIN
;
1246 if (cap_write_drain(iommu
->cap
))
1247 val
|= DMA_TLB_WRITE_DRAIN
;
1249 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1250 /* Note: Only uses first TLB reg currently */
1252 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
1253 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
1255 /* Make sure hardware complete it */
1256 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
1257 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
1259 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1261 /* check IOTLB invalidation granularity */
1262 if (DMA_TLB_IAIG(val
) == 0)
1263 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
1264 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
1265 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1266 (unsigned long long)DMA_TLB_IIRG(type
),
1267 (unsigned long long)DMA_TLB_IAIG(val
));
1270 static struct device_domain_info
*
1271 iommu_support_dev_iotlb (struct dmar_domain
*domain
, struct intel_iommu
*iommu
,
1275 unsigned long flags
;
1276 struct device_domain_info
*info
;
1277 struct pci_dev
*pdev
;
1279 if (!ecap_dev_iotlb_support(iommu
->ecap
))
1285 spin_lock_irqsave(&device_domain_lock
, flags
);
1286 list_for_each_entry(info
, &domain
->devices
, link
)
1287 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1291 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1293 if (!found
|| !info
->dev
|| !dev_is_pci(info
->dev
))
1296 pdev
= to_pci_dev(info
->dev
);
1298 if (!pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_ATS
))
1301 if (!dmar_find_matched_atsr_unit(pdev
))
1307 static void iommu_enable_dev_iotlb(struct device_domain_info
*info
)
1309 if (!info
|| !dev_is_pci(info
->dev
))
1312 pci_enable_ats(to_pci_dev(info
->dev
), VTD_PAGE_SHIFT
);
1315 static void iommu_disable_dev_iotlb(struct device_domain_info
*info
)
1317 if (!info
->dev
|| !dev_is_pci(info
->dev
) ||
1318 !pci_ats_enabled(to_pci_dev(info
->dev
)))
1321 pci_disable_ats(to_pci_dev(info
->dev
));
1324 static void iommu_flush_dev_iotlb(struct dmar_domain
*domain
,
1325 u64 addr
, unsigned mask
)
1328 unsigned long flags
;
1329 struct device_domain_info
*info
;
1331 spin_lock_irqsave(&device_domain_lock
, flags
);
1332 list_for_each_entry(info
, &domain
->devices
, link
) {
1333 struct pci_dev
*pdev
;
1334 if (!info
->dev
|| !dev_is_pci(info
->dev
))
1337 pdev
= to_pci_dev(info
->dev
);
1338 if (!pci_ats_enabled(pdev
))
1341 sid
= info
->bus
<< 8 | info
->devfn
;
1342 qdep
= pci_ats_queue_depth(pdev
);
1343 qi_flush_dev_iotlb(info
->iommu
, sid
, qdep
, addr
, mask
);
1345 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1348 static void iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
1349 unsigned long pfn
, unsigned int pages
, int ih
, int map
)
1351 unsigned int mask
= ilog2(__roundup_pow_of_two(pages
));
1352 uint64_t addr
= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
1359 * Fallback to domain selective flush if no PSI support or the size is
1361 * PSI requires page size to be 2 ^ x, and the base address is naturally
1362 * aligned to the size
1364 if (!cap_pgsel_inv(iommu
->cap
) || mask
> cap_max_amask_val(iommu
->cap
))
1365 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1368 iommu
->flush
.flush_iotlb(iommu
, did
, addr
| ih
, mask
,
1372 * In caching mode, changes of pages from non-present to present require
1373 * flush. However, device IOTLB doesn't need to be flushed in this case.
1375 if (!cap_caching_mode(iommu
->cap
) || !map
)
1376 iommu_flush_dev_iotlb(iommu
->domains
[did
], addr
, mask
);
1379 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
1382 unsigned long flags
;
1384 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1385 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
1386 pmen
&= ~DMA_PMEN_EPM
;
1387 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
1389 /* wait for the protected region status bit to clear */
1390 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
1391 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
1393 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1396 static int iommu_enable_translation(struct intel_iommu
*iommu
)
1399 unsigned long flags
;
1401 raw_spin_lock_irqsave(&iommu
->register_lock
, flags
);
1402 iommu
->gcmd
|= DMA_GCMD_TE
;
1403 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1405 /* Make sure hardware complete it */
1406 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1407 readl
, (sts
& DMA_GSTS_TES
), sts
);
1409 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1413 static int iommu_disable_translation(struct intel_iommu
*iommu
)
1418 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
1419 iommu
->gcmd
&= ~DMA_GCMD_TE
;
1420 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1422 /* Make sure hardware complete it */
1423 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1424 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
1426 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1431 static int iommu_init_domains(struct intel_iommu
*iommu
)
1433 unsigned long ndomains
;
1434 unsigned long nlongs
;
1436 ndomains
= cap_ndoms(iommu
->cap
);
1437 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1438 iommu
->seq_id
, ndomains
);
1439 nlongs
= BITS_TO_LONGS(ndomains
);
1441 spin_lock_init(&iommu
->lock
);
1443 /* TBD: there might be 64K domains,
1444 * consider other allocation for future chip
1446 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1447 if (!iommu
->domain_ids
) {
1448 pr_err("IOMMU%d: allocating domain id array failed\n",
1452 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1454 if (!iommu
->domains
) {
1455 pr_err("IOMMU%d: allocating domain array failed\n",
1457 kfree(iommu
->domain_ids
);
1458 iommu
->domain_ids
= NULL
;
1463 * if Caching mode is set, then invalid translations are tagged
1464 * with domainid 0. Hence we need to pre-allocate it.
1466 if (cap_caching_mode(iommu
->cap
))
1467 set_bit(0, iommu
->domain_ids
);
1471 static void free_dmar_iommu(struct intel_iommu
*iommu
)
1473 struct dmar_domain
*domain
;
1475 unsigned long flags
;
1477 if ((iommu
->domains
) && (iommu
->domain_ids
)) {
1478 for_each_set_bit(i
, iommu
->domain_ids
, cap_ndoms(iommu
->cap
)) {
1480 * Domain id 0 is reserved for invalid translation
1481 * if hardware supports caching mode.
1483 if (cap_caching_mode(iommu
->cap
) && i
== 0)
1486 domain
= iommu
->domains
[i
];
1487 clear_bit(i
, iommu
->domain_ids
);
1489 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1490 count
= --domain
->iommu_count
;
1491 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1493 domain_exit(domain
);
1497 if (iommu
->gcmd
& DMA_GCMD_TE
)
1498 iommu_disable_translation(iommu
);
1500 kfree(iommu
->domains
);
1501 kfree(iommu
->domain_ids
);
1502 iommu
->domains
= NULL
;
1503 iommu
->domain_ids
= NULL
;
1505 g_iommus
[iommu
->seq_id
] = NULL
;
1507 /* free context mapping */
1508 free_context_table(iommu
);
1511 static struct dmar_domain
*alloc_domain(bool vm
)
1513 /* domain id for virtual machine, it won't be set in context */
1514 static atomic_t vm_domid
= ATOMIC_INIT(0);
1515 struct dmar_domain
*domain
;
1517 domain
= alloc_domain_mem();
1522 domain
->iommu_count
= 0;
1523 memset(domain
->iommu_bmp
, 0, sizeof(domain
->iommu_bmp
));
1525 spin_lock_init(&domain
->iommu_lock
);
1526 INIT_LIST_HEAD(&domain
->devices
);
1528 domain
->id
= atomic_inc_return(&vm_domid
);
1529 domain
->flags
= DOMAIN_FLAG_VIRTUAL_MACHINE
;
1535 static int iommu_attach_domain(struct dmar_domain
*domain
,
1536 struct intel_iommu
*iommu
)
1539 unsigned long ndomains
;
1540 unsigned long flags
;
1542 ndomains
= cap_ndoms(iommu
->cap
);
1544 spin_lock_irqsave(&iommu
->lock
, flags
);
1546 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1547 if (num
>= ndomains
) {
1548 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1549 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1554 domain
->iommu_count
++;
1555 set_bit(num
, iommu
->domain_ids
);
1556 set_bit(iommu
->seq_id
, domain
->iommu_bmp
);
1557 iommu
->domains
[num
] = domain
;
1558 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1563 static void iommu_detach_domain(struct dmar_domain
*domain
,
1564 struct intel_iommu
*iommu
)
1566 unsigned long flags
;
1569 spin_lock_irqsave(&iommu
->lock
, flags
);
1570 ndomains
= cap_ndoms(iommu
->cap
);
1571 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1572 if (iommu
->domains
[num
] == domain
) {
1573 clear_bit(num
, iommu
->domain_ids
);
1574 iommu
->domains
[num
] = NULL
;
1578 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1581 static struct iova_domain reserved_iova_list
;
1582 static struct lock_class_key reserved_rbtree_key
;
1584 static int dmar_init_reserved_ranges(void)
1586 struct pci_dev
*pdev
= NULL
;
1590 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1592 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1593 &reserved_rbtree_key
);
1595 /* IOAPIC ranges shouldn't be accessed by DMA */
1596 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1597 IOVA_PFN(IOAPIC_RANGE_END
));
1599 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1603 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1604 for_each_pci_dev(pdev
) {
1607 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1608 r
= &pdev
->resource
[i
];
1609 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1611 iova
= reserve_iova(&reserved_iova_list
,
1615 printk(KERN_ERR
"Reserve iova failed\n");
1623 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1625 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1628 static inline int guestwidth_to_adjustwidth(int gaw
)
1631 int r
= (gaw
- 12) % 9;
1642 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1644 struct intel_iommu
*iommu
;
1645 int adjust_width
, agaw
;
1646 unsigned long sagaw
;
1648 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1649 domain_reserve_special_ranges(domain
);
1651 /* calculate AGAW */
1652 iommu
= domain_get_iommu(domain
);
1653 if (guest_width
> cap_mgaw(iommu
->cap
))
1654 guest_width
= cap_mgaw(iommu
->cap
);
1655 domain
->gaw
= guest_width
;
1656 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1657 agaw
= width_to_agaw(adjust_width
);
1658 sagaw
= cap_sagaw(iommu
->cap
);
1659 if (!test_bit(agaw
, &sagaw
)) {
1660 /* hardware doesn't support it, choose a bigger one */
1661 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1662 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1666 domain
->agaw
= agaw
;
1668 if (ecap_coherent(iommu
->ecap
))
1669 domain
->iommu_coherency
= 1;
1671 domain
->iommu_coherency
= 0;
1673 if (ecap_sc_support(iommu
->ecap
))
1674 domain
->iommu_snooping
= 1;
1676 domain
->iommu_snooping
= 0;
1678 if (intel_iommu_superpage
)
1679 domain
->iommu_superpage
= fls(cap_super_page_val(iommu
->cap
));
1681 domain
->iommu_superpage
= 0;
1683 domain
->nid
= iommu
->node
;
1685 /* always allocate the top pgd */
1686 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
1689 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1693 static void domain_exit(struct dmar_domain
*domain
)
1695 struct dmar_drhd_unit
*drhd
;
1696 struct intel_iommu
*iommu
;
1697 struct page
*freelist
= NULL
;
1699 /* Domain 0 is reserved, so dont process it */
1703 /* Flush any lazy unmaps that may reference this domain */
1704 if (!intel_iommu_strict
)
1705 flush_unmaps_timeout(0);
1707 /* remove associated devices */
1708 domain_remove_dev_info(domain
);
1711 put_iova_domain(&domain
->iovad
);
1713 freelist
= domain_unmap(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1715 /* clear attached or cached domains */
1717 for_each_active_iommu(iommu
, drhd
)
1718 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
1719 test_bit(iommu
->seq_id
, domain
->iommu_bmp
))
1720 iommu_detach_domain(domain
, iommu
);
1723 dma_free_pagelist(freelist
);
1725 free_domain_mem(domain
);
1728 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1729 struct intel_iommu
*iommu
,
1730 u8 bus
, u8 devfn
, int translation
)
1732 struct context_entry
*context
;
1733 unsigned long flags
;
1734 struct dma_pte
*pgd
;
1736 unsigned long ndomains
;
1739 struct device_domain_info
*info
= NULL
;
1741 pr_debug("Set context mapping for %02x:%02x.%d\n",
1742 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1744 BUG_ON(!domain
->pgd
);
1745 BUG_ON(translation
!= CONTEXT_TT_PASS_THROUGH
&&
1746 translation
!= CONTEXT_TT_MULTI_LEVEL
);
1748 context
= device_to_context_entry(iommu
, bus
, devfn
);
1751 spin_lock_irqsave(&iommu
->lock
, flags
);
1752 if (context_present(context
)) {
1753 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1760 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
1761 domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) {
1764 /* find an available domain id for this device in iommu */
1765 ndomains
= cap_ndoms(iommu
->cap
);
1766 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1767 if (iommu
->domains
[num
] == domain
) {
1775 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1776 if (num
>= ndomains
) {
1777 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1778 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1782 set_bit(num
, iommu
->domain_ids
);
1783 iommu
->domains
[num
] = domain
;
1787 /* Skip top levels of page tables for
1788 * iommu which has less agaw than default.
1789 * Unnecessary for PT mode.
1791 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1792 for (agaw
= domain
->agaw
; agaw
!= iommu
->agaw
; agaw
--) {
1793 pgd
= phys_to_virt(dma_pte_addr(pgd
));
1794 if (!dma_pte_present(pgd
)) {
1795 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1802 context_set_domain_id(context
, id
);
1804 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1805 info
= iommu_support_dev_iotlb(domain
, iommu
, bus
, devfn
);
1806 translation
= info
? CONTEXT_TT_DEV_IOTLB
:
1807 CONTEXT_TT_MULTI_LEVEL
;
1810 * In pass through mode, AW must be programmed to indicate the largest
1811 * AGAW value supported by hardware. And ASR is ignored by hardware.
1813 if (unlikely(translation
== CONTEXT_TT_PASS_THROUGH
))
1814 context_set_address_width(context
, iommu
->msagaw
);
1816 context_set_address_root(context
, virt_to_phys(pgd
));
1817 context_set_address_width(context
, iommu
->agaw
);
1820 context_set_translation_type(context
, translation
);
1821 context_set_fault_enable(context
);
1822 context_set_present(context
);
1823 domain_flush_cache(domain
, context
, sizeof(*context
));
1826 * It's a non-present to present mapping. If hardware doesn't cache
1827 * non-present entry we only need to flush the write-buffer. If the
1828 * _does_ cache non-present entries, then it does so in the special
1829 * domain #0, which we have to flush:
1831 if (cap_caching_mode(iommu
->cap
)) {
1832 iommu
->flush
.flush_context(iommu
, 0,
1833 (((u16
)bus
) << 8) | devfn
,
1834 DMA_CCMD_MASK_NOBIT
,
1835 DMA_CCMD_DEVICE_INVL
);
1836 iommu
->flush
.flush_iotlb(iommu
, domain
->id
, 0, 0, DMA_TLB_DSI_FLUSH
);
1838 iommu_flush_write_buffer(iommu
);
1840 iommu_enable_dev_iotlb(info
);
1841 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1843 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1844 if (!test_and_set_bit(iommu
->seq_id
, domain
->iommu_bmp
)) {
1845 domain
->iommu_count
++;
1846 if (domain
->iommu_count
== 1)
1847 domain
->nid
= iommu
->node
;
1848 domain_update_iommu_cap(domain
);
1850 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1855 domain_context_mapping(struct dmar_domain
*domain
, struct device
*dev
,
1859 struct pci_dev
*pdev
, *tmp
, *parent
;
1860 struct intel_iommu
*iommu
;
1863 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
1867 ret
= domain_context_mapping_one(domain
, iommu
, bus
, devfn
,
1869 if (ret
|| !dev_is_pci(dev
))
1872 /* dependent device mapping */
1873 pdev
= to_pci_dev(dev
);
1874 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1877 /* Secondary interface's bus number and devfn 0 */
1878 parent
= pdev
->bus
->self
;
1879 while (parent
!= tmp
) {
1880 ret
= domain_context_mapping_one(domain
, iommu
,
1881 parent
->bus
->number
,
1882 parent
->devfn
, translation
);
1885 parent
= parent
->bus
->self
;
1887 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
1888 return domain_context_mapping_one(domain
, iommu
,
1889 tmp
->subordinate
->number
, 0,
1891 else /* this is a legacy PCI bridge */
1892 return domain_context_mapping_one(domain
, iommu
,
1898 static int domain_context_mapped(struct device
*dev
)
1901 struct pci_dev
*pdev
, *tmp
, *parent
;
1902 struct intel_iommu
*iommu
;
1905 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
1909 ret
= device_context_mapped(iommu
, bus
, devfn
);
1910 if (!ret
|| !dev_is_pci(dev
))
1913 /* dependent device mapping */
1914 pdev
= to_pci_dev(dev
);
1915 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1918 /* Secondary interface's bus number and devfn 0 */
1919 parent
= pdev
->bus
->self
;
1920 while (parent
!= tmp
) {
1921 ret
= device_context_mapped(iommu
, parent
->bus
->number
,
1925 parent
= parent
->bus
->self
;
1927 if (pci_is_pcie(tmp
))
1928 return device_context_mapped(iommu
, tmp
->subordinate
->number
,
1931 return device_context_mapped(iommu
, tmp
->bus
->number
,
1935 /* Returns a number of VTD pages, but aligned to MM page size */
1936 static inline unsigned long aligned_nrpages(unsigned long host_addr
,
1939 host_addr
&= ~PAGE_MASK
;
1940 return PAGE_ALIGN(host_addr
+ size
) >> VTD_PAGE_SHIFT
;
1943 /* Return largest possible superpage level for a given mapping */
1944 static inline int hardware_largepage_caps(struct dmar_domain
*domain
,
1945 unsigned long iov_pfn
,
1946 unsigned long phy_pfn
,
1947 unsigned long pages
)
1949 int support
, level
= 1;
1950 unsigned long pfnmerge
;
1952 support
= domain
->iommu_superpage
;
1954 /* To use a large page, the virtual *and* physical addresses
1955 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1956 of them will mean we have to use smaller pages. So just
1957 merge them and check both at once. */
1958 pfnmerge
= iov_pfn
| phy_pfn
;
1960 while (support
&& !(pfnmerge
& ~VTD_STRIDE_MASK
)) {
1961 pages
>>= VTD_STRIDE_SHIFT
;
1964 pfnmerge
>>= VTD_STRIDE_SHIFT
;
1971 static int __domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1972 struct scatterlist
*sg
, unsigned long phys_pfn
,
1973 unsigned long nr_pages
, int prot
)
1975 struct dma_pte
*first_pte
= NULL
, *pte
= NULL
;
1976 phys_addr_t
uninitialized_var(pteval
);
1977 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
1978 unsigned long sg_res
= 0;
1979 unsigned int largepage_lvl
= 0;
1980 unsigned long lvl_pages
= 0;
1982 BUG_ON(addr_width
< BITS_PER_LONG
&& (iov_pfn
+ nr_pages
- 1) >> addr_width
);
1984 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1987 prot
&= DMA_PTE_READ
| DMA_PTE_WRITE
| DMA_PTE_SNP
;
1991 pteval
= ((phys_addr_t
)phys_pfn
<< VTD_PAGE_SHIFT
) | prot
;
1994 while (nr_pages
> 0) {
1998 sg_res
= aligned_nrpages(sg
->offset
, sg
->length
);
1999 sg
->dma_address
= ((dma_addr_t
)iov_pfn
<< VTD_PAGE_SHIFT
) + sg
->offset
;
2000 sg
->dma_length
= sg
->length
;
2001 pteval
= page_to_phys(sg_page(sg
)) | prot
;
2002 phys_pfn
= pteval
>> VTD_PAGE_SHIFT
;
2006 largepage_lvl
= hardware_largepage_caps(domain
, iov_pfn
, phys_pfn
, sg_res
);
2008 first_pte
= pte
= pfn_to_dma_pte(domain
, iov_pfn
, &largepage_lvl
);
2011 /* It is large page*/
2012 if (largepage_lvl
> 1) {
2013 pteval
|= DMA_PTE_LARGE_PAGE
;
2014 /* Ensure that old small page tables are removed to make room
2015 for superpage, if they exist. */
2016 dma_pte_clear_range(domain
, iov_pfn
,
2017 iov_pfn
+ lvl_to_nr_pages(largepage_lvl
) - 1);
2018 dma_pte_free_pagetable(domain
, iov_pfn
,
2019 iov_pfn
+ lvl_to_nr_pages(largepage_lvl
) - 1);
2021 pteval
&= ~(uint64_t)DMA_PTE_LARGE_PAGE
;
2025 /* We don't need lock here, nobody else
2026 * touches the iova range
2028 tmp
= cmpxchg64_local(&pte
->val
, 0ULL, pteval
);
2030 static int dumps
= 5;
2031 printk(KERN_CRIT
"ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2032 iov_pfn
, tmp
, (unsigned long long)pteval
);
2035 debug_dma_dump_mappings(NULL
);
2040 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
2042 BUG_ON(nr_pages
< lvl_pages
);
2043 BUG_ON(sg_res
< lvl_pages
);
2045 nr_pages
-= lvl_pages
;
2046 iov_pfn
+= lvl_pages
;
2047 phys_pfn
+= lvl_pages
;
2048 pteval
+= lvl_pages
* VTD_PAGE_SIZE
;
2049 sg_res
-= lvl_pages
;
2051 /* If the next PTE would be the first in a new page, then we
2052 need to flush the cache on the entries we've just written.
2053 And then we'll need to recalculate 'pte', so clear it and
2054 let it get set again in the if (!pte) block above.
2056 If we're done (!nr_pages) we need to flush the cache too.
2058 Also if we've been setting superpages, we may need to
2059 recalculate 'pte' and switch back to smaller pages for the
2060 end of the mapping, if the trailing size is not enough to
2061 use another superpage (i.e. sg_res < lvl_pages). */
2063 if (!nr_pages
|| first_pte_in_page(pte
) ||
2064 (largepage_lvl
> 1 && sg_res
< lvl_pages
)) {
2065 domain_flush_cache(domain
, first_pte
,
2066 (void *)pte
- (void *)first_pte
);
2070 if (!sg_res
&& nr_pages
)
2076 static inline int domain_sg_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2077 struct scatterlist
*sg
, unsigned long nr_pages
,
2080 return __domain_mapping(domain
, iov_pfn
, sg
, 0, nr_pages
, prot
);
2083 static inline int domain_pfn_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2084 unsigned long phys_pfn
, unsigned long nr_pages
,
2087 return __domain_mapping(domain
, iov_pfn
, NULL
, phys_pfn
, nr_pages
, prot
);
2090 static void iommu_detach_dev(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
2095 clear_context_table(iommu
, bus
, devfn
);
2096 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
2097 DMA_CCMD_GLOBAL_INVL
);
2098 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
2101 static inline void unlink_domain_info(struct device_domain_info
*info
)
2103 assert_spin_locked(&device_domain_lock
);
2104 list_del(&info
->link
);
2105 list_del(&info
->global
);
2107 info
->dev
->archdata
.iommu
= NULL
;
2110 static void domain_remove_dev_info(struct dmar_domain
*domain
)
2112 struct device_domain_info
*info
;
2113 unsigned long flags
, flags2
;
2115 spin_lock_irqsave(&device_domain_lock
, flags
);
2116 while (!list_empty(&domain
->devices
)) {
2117 info
= list_entry(domain
->devices
.next
,
2118 struct device_domain_info
, link
);
2119 unlink_domain_info(info
);
2120 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2122 iommu_disable_dev_iotlb(info
);
2123 iommu_detach_dev(info
->iommu
, info
->bus
, info
->devfn
);
2125 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) {
2126 iommu_detach_dependent_devices(info
->iommu
, info
->dev
);
2127 /* clear this iommu in iommu_bmp, update iommu count
2130 spin_lock_irqsave(&domain
->iommu_lock
, flags2
);
2131 if (test_and_clear_bit(info
->iommu
->seq_id
,
2132 domain
->iommu_bmp
)) {
2133 domain
->iommu_count
--;
2134 domain_update_iommu_cap(domain
);
2136 spin_unlock_irqrestore(&domain
->iommu_lock
, flags2
);
2139 free_devinfo_mem(info
);
2140 spin_lock_irqsave(&device_domain_lock
, flags
);
2142 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2147 * Note: we use struct device->archdata.iommu stores the info
2149 static struct dmar_domain
*find_domain(struct device
*dev
)
2151 struct device_domain_info
*info
;
2153 /* No lock here, assumes no domain exit in normal case */
2154 info
= dev
->archdata
.iommu
;
2156 return info
->domain
;
2160 static inline struct device_domain_info
*
2161 dmar_search_domain_by_dev_info(int segment
, int bus
, int devfn
)
2163 struct device_domain_info
*info
;
2165 list_for_each_entry(info
, &device_domain_list
, global
)
2166 if (info
->iommu
->segment
== segment
&& info
->bus
== bus
&&
2167 info
->devfn
== devfn
)
2173 static struct dmar_domain
*dmar_insert_dev_info(struct intel_iommu
*iommu
,
2176 struct dmar_domain
*domain
)
2178 struct dmar_domain
*found
= NULL
;
2179 struct device_domain_info
*info
;
2180 unsigned long flags
;
2182 info
= alloc_devinfo_mem();
2187 info
->devfn
= devfn
;
2189 info
->domain
= domain
;
2190 info
->iommu
= iommu
;
2192 domain
->flags
|= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES
;
2194 spin_lock_irqsave(&device_domain_lock
, flags
);
2196 found
= find_domain(dev
);
2198 struct device_domain_info
*info2
;
2199 info2
= dmar_search_domain_by_dev_info(iommu
->segment
, bus
, devfn
);
2201 found
= info2
->domain
;
2204 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2205 free_devinfo_mem(info
);
2206 /* Caller must free the original domain */
2210 list_add(&info
->link
, &domain
->devices
);
2211 list_add(&info
->global
, &device_domain_list
);
2213 dev
->archdata
.iommu
= info
;
2214 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2219 /* domain is initialized */
2220 static struct dmar_domain
*get_domain_for_dev(struct device
*dev
, int gaw
)
2222 struct dmar_domain
*domain
, *free
= NULL
;
2223 struct intel_iommu
*iommu
= NULL
;
2224 struct device_domain_info
*info
;
2225 struct pci_dev
*dev_tmp
= NULL
;
2226 unsigned long flags
;
2227 u8 bus
, devfn
, bridge_bus
, bridge_devfn
;
2229 domain
= find_domain(dev
);
2233 if (dev_is_pci(dev
)) {
2234 struct pci_dev
*pdev
= to_pci_dev(dev
);
2237 segment
= pci_domain_nr(pdev
->bus
);
2238 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
2240 if (pci_is_pcie(dev_tmp
)) {
2241 bridge_bus
= dev_tmp
->subordinate
->number
;
2244 bridge_bus
= dev_tmp
->bus
->number
;
2245 bridge_devfn
= dev_tmp
->devfn
;
2247 spin_lock_irqsave(&device_domain_lock
, flags
);
2248 info
= dmar_search_domain_by_dev_info(segment
,
2252 iommu
= info
->iommu
;
2253 domain
= info
->domain
;
2255 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2256 /* pcie-pci bridge already has a domain, uses it */
2262 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2266 /* Allocate and initialize new domain for the device */
2267 domain
= alloc_domain(false);
2270 if (iommu_attach_domain(domain
, iommu
)) {
2271 free_domain_mem(domain
);
2276 if (domain_init(domain
, gaw
))
2279 /* register pcie-to-pci device */
2281 domain
= dmar_insert_dev_info(iommu
, bridge_bus
, bridge_devfn
,
2288 domain
= dmar_insert_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2296 static int iommu_identity_mapping
;
2297 #define IDENTMAP_ALL 1
2298 #define IDENTMAP_GFX 2
2299 #define IDENTMAP_AZALIA 4
2301 static int iommu_domain_identity_map(struct dmar_domain
*domain
,
2302 unsigned long long start
,
2303 unsigned long long end
)
2305 unsigned long first_vpfn
= start
>> VTD_PAGE_SHIFT
;
2306 unsigned long last_vpfn
= end
>> VTD_PAGE_SHIFT
;
2308 if (!reserve_iova(&domain
->iovad
, dma_to_mm_pfn(first_vpfn
),
2309 dma_to_mm_pfn(last_vpfn
))) {
2310 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
2314 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2315 start
, end
, domain
->id
);
2317 * RMRR range might have overlap with physical memory range,
2320 dma_pte_clear_range(domain
, first_vpfn
, last_vpfn
);
2322 return domain_pfn_mapping(domain
, first_vpfn
, first_vpfn
,
2323 last_vpfn
- first_vpfn
+ 1,
2324 DMA_PTE_READ
|DMA_PTE_WRITE
);
2327 static int iommu_prepare_identity_map(struct device
*dev
,
2328 unsigned long long start
,
2329 unsigned long long end
)
2331 struct dmar_domain
*domain
;
2334 domain
= get_domain_for_dev(dev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2338 /* For _hardware_ passthrough, don't bother. But for software
2339 passthrough, we do it anyway -- it may indicate a memory
2340 range which is reserved in E820, so which didn't get set
2341 up to start with in si_domain */
2342 if (domain
== si_domain
&& hw_pass_through
) {
2343 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2344 dev_name(dev
), start
, end
);
2349 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2350 dev_name(dev
), start
, end
);
2353 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2354 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2355 dmi_get_system_info(DMI_BIOS_VENDOR
),
2356 dmi_get_system_info(DMI_BIOS_VERSION
),
2357 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2362 if (end
>> agaw_to_width(domain
->agaw
)) {
2363 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2364 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2365 agaw_to_width(domain
->agaw
),
2366 dmi_get_system_info(DMI_BIOS_VENDOR
),
2367 dmi_get_system_info(DMI_BIOS_VERSION
),
2368 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2373 ret
= iommu_domain_identity_map(domain
, start
, end
);
2377 /* context entry init */
2378 ret
= domain_context_mapping(domain
, dev
, CONTEXT_TT_MULTI_LEVEL
);
2385 domain_exit(domain
);
2389 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
2392 if (dev
->archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2394 return iommu_prepare_identity_map(dev
, rmrr
->base_address
,
2398 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2399 static inline void iommu_prepare_isa(void)
2401 struct pci_dev
*pdev
;
2404 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
2408 printk(KERN_INFO
"IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2409 ret
= iommu_prepare_identity_map(&pdev
->dev
, 0, 16*1024*1024 - 1);
2412 printk(KERN_ERR
"IOMMU: Failed to create 0-16MiB identity map; "
2413 "floppy might not work\n");
2417 static inline void iommu_prepare_isa(void)
2421 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2423 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
);
2425 static int __init
si_domain_init(int hw
)
2427 struct dmar_drhd_unit
*drhd
;
2428 struct intel_iommu
*iommu
;
2431 si_domain
= alloc_domain(false);
2435 si_domain
->flags
= DOMAIN_FLAG_STATIC_IDENTITY
;
2437 for_each_active_iommu(iommu
, drhd
) {
2438 ret
= iommu_attach_domain(si_domain
, iommu
);
2440 domain_exit(si_domain
);
2445 if (md_domain_init(si_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2446 domain_exit(si_domain
);
2450 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2456 for_each_online_node(nid
) {
2457 unsigned long start_pfn
, end_pfn
;
2460 for_each_mem_pfn_range(i
, nid
, &start_pfn
, &end_pfn
, NULL
) {
2461 ret
= iommu_domain_identity_map(si_domain
,
2462 PFN_PHYS(start_pfn
), PFN_PHYS(end_pfn
));
2471 static int identity_mapping(struct device
*dev
)
2473 struct device_domain_info
*info
;
2475 if (likely(!iommu_identity_mapping
))
2478 info
= dev
->archdata
.iommu
;
2479 if (info
&& info
!= DUMMY_DEVICE_DOMAIN_INFO
)
2480 return (info
->domain
== si_domain
);
2485 static int domain_add_dev_info(struct dmar_domain
*domain
,
2486 struct device
*dev
, int translation
)
2488 struct dmar_domain
*ndomain
;
2489 struct intel_iommu
*iommu
;
2493 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
2497 ndomain
= dmar_insert_dev_info(iommu
, bus
, devfn
, dev
, domain
);
2498 if (ndomain
!= domain
)
2501 ret
= domain_context_mapping(domain
, dev
, translation
);
2503 domain_remove_one_dev_info(domain
, dev
);
2510 static bool device_has_rmrr(struct device
*dev
)
2512 struct dmar_rmrr_unit
*rmrr
;
2517 for_each_rmrr_units(rmrr
) {
2519 * Return TRUE if this RMRR contains the device that
2522 for_each_active_dev_scope(rmrr
->devices
,
2523 rmrr
->devices_cnt
, i
, tmp
)
2534 * There are a couple cases where we need to restrict the functionality of
2535 * devices associated with RMRRs. The first is when evaluating a device for
2536 * identity mapping because problems exist when devices are moved in and out
2537 * of domains and their respective RMRR information is lost. This means that
2538 * a device with associated RMRRs will never be in a "passthrough" domain.
2539 * The second is use of the device through the IOMMU API. This interface
2540 * expects to have full control of the IOVA space for the device. We cannot
2541 * satisfy both the requirement that RMRR access is maintained and have an
2542 * unencumbered IOVA space. We also have no ability to quiesce the device's
2543 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2544 * We therefore prevent devices associated with an RMRR from participating in
2545 * the IOMMU API, which eliminates them from device assignment.
2547 * In both cases we assume that PCI USB devices with RMRRs have them largely
2548 * for historical reasons and that the RMRR space is not actively used post
2549 * boot. This exclusion may change if vendors begin to abuse it.
2551 * The same exception is made for graphics devices, with the requirement that
2552 * any use of the RMRR regions will be torn down before assigning the device
2555 static bool device_is_rmrr_locked(struct device
*dev
)
2557 if (!device_has_rmrr(dev
))
2560 if (dev_is_pci(dev
)) {
2561 struct pci_dev
*pdev
= to_pci_dev(dev
);
2563 if (IS_USB_DEVICE(pdev
) || IS_GFX_DEVICE(pdev
))
2570 static int iommu_should_identity_map(struct device
*dev
, int startup
)
2573 if (dev_is_pci(dev
)) {
2574 struct pci_dev
*pdev
= to_pci_dev(dev
);
2576 if (device_is_rmrr_locked(dev
))
2579 if ((iommu_identity_mapping
& IDENTMAP_AZALIA
) && IS_AZALIA(pdev
))
2582 if ((iommu_identity_mapping
& IDENTMAP_GFX
) && IS_GFX_DEVICE(pdev
))
2585 if (!(iommu_identity_mapping
& IDENTMAP_ALL
))
2589 * We want to start off with all devices in the 1:1 domain, and
2590 * take them out later if we find they can't access all of memory.
2592 * However, we can't do this for PCI devices behind bridges,
2593 * because all PCI devices behind the same bridge will end up
2594 * with the same source-id on their transactions.
2596 * Practically speaking, we can't change things around for these
2597 * devices at run-time, because we can't be sure there'll be no
2598 * DMA transactions in flight for any of their siblings.
2600 * So PCI devices (unless they're on the root bus) as well as
2601 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2602 * the 1:1 domain, just in _case_ one of their siblings turns out
2603 * not to be able to map all of memory.
2605 if (!pci_is_pcie(pdev
)) {
2606 if (!pci_is_root_bus(pdev
->bus
))
2608 if (pdev
->class >> 8 == PCI_CLASS_BRIDGE_PCI
)
2610 } else if (pci_pcie_type(pdev
) == PCI_EXP_TYPE_PCI_BRIDGE
)
2613 if (device_has_rmrr(dev
))
2618 * At boot time, we don't yet know if devices will be 64-bit capable.
2619 * Assume that they will — if they turn out not to be, then we can
2620 * take them out of the 1:1 domain later.
2624 * If the device's dma_mask is less than the system's memory
2625 * size then this is not a candidate for identity mapping.
2627 u64 dma_mask
= *dev
->dma_mask
;
2629 if (dev
->coherent_dma_mask
&&
2630 dev
->coherent_dma_mask
< dma_mask
)
2631 dma_mask
= dev
->coherent_dma_mask
;
2633 return dma_mask
>= dma_get_required_mask(dev
);
2639 static int __init
dev_prepare_static_identity_mapping(struct device
*dev
, int hw
)
2643 if (!iommu_should_identity_map(dev
, 1))
2646 ret
= domain_add_dev_info(si_domain
, dev
,
2647 hw
? CONTEXT_TT_PASS_THROUGH
:
2648 CONTEXT_TT_MULTI_LEVEL
);
2650 pr_info("IOMMU: %s identity mapping for device %s\n",
2651 hw
? "hardware" : "software", dev_name(dev
));
2652 else if (ret
== -ENODEV
)
2653 /* device not associated with an iommu */
2660 static int __init
iommu_prepare_static_identity_mapping(int hw
)
2662 struct pci_dev
*pdev
= NULL
;
2663 struct dmar_drhd_unit
*drhd
;
2664 struct intel_iommu
*iommu
;
2669 ret
= si_domain_init(hw
);
2673 for_each_pci_dev(pdev
) {
2674 ret
= dev_prepare_static_identity_mapping(&pdev
->dev
, hw
);
2679 for_each_active_iommu(iommu
, drhd
)
2680 for_each_active_dev_scope(drhd
->devices
, drhd
->devices_cnt
, i
, dev
) {
2681 struct acpi_device_physical_node
*pn
;
2682 struct acpi_device
*adev
;
2684 if (dev
->bus
!= &acpi_bus_type
)
2687 adev
= to_acpi_device(dev
);
2688 mutex_lock(&adev
->physical_node_lock
);
2689 list_for_each_entry(pn
, &adev
->physical_node_list
, node
) {
2690 ret
= dev_prepare_static_identity_mapping(pn
->dev
, hw
);
2694 mutex_unlock(&adev
->physical_node_lock
);
2702 static int __init
init_dmars(void)
2704 struct dmar_drhd_unit
*drhd
;
2705 struct dmar_rmrr_unit
*rmrr
;
2707 struct intel_iommu
*iommu
;
2713 * initialize and program root entry to not present
2716 for_each_drhd_unit(drhd
) {
2718 * lock not needed as this is only incremented in the single
2719 * threaded kernel __init code path all other access are read
2722 if (g_num_of_iommus
< IOMMU_UNITS_SUPPORTED
) {
2726 printk_once(KERN_ERR
"intel-iommu: exceeded %d IOMMUs\n",
2727 IOMMU_UNITS_SUPPORTED
);
2730 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
2733 printk(KERN_ERR
"Allocating global iommu array failed\n");
2738 deferred_flush
= kzalloc(g_num_of_iommus
*
2739 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
2740 if (!deferred_flush
) {
2745 for_each_active_iommu(iommu
, drhd
) {
2746 g_iommus
[iommu
->seq_id
] = iommu
;
2748 ret
= iommu_init_domains(iommu
);
2754 * we could share the same root & context tables
2755 * among all IOMMU's. Need to Split it later.
2757 ret
= iommu_alloc_root_entry(iommu
);
2759 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
2762 if (!ecap_pass_through(iommu
->ecap
))
2763 hw_pass_through
= 0;
2767 * Start from the sane iommu hardware state.
2769 for_each_active_iommu(iommu
, drhd
) {
2771 * If the queued invalidation is already initialized by us
2772 * (for example, while enabling interrupt-remapping) then
2773 * we got the things already rolling from a sane state.
2779 * Clear any previous faults.
2781 dmar_fault(-1, iommu
);
2783 * Disable queued invalidation if supported and already enabled
2784 * before OS handover.
2786 dmar_disable_qi(iommu
);
2789 for_each_active_iommu(iommu
, drhd
) {
2790 if (dmar_enable_qi(iommu
)) {
2792 * Queued Invalidate not enabled, use Register Based
2795 iommu
->flush
.flush_context
= __iommu_flush_context
;
2796 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
2797 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Register based "
2800 (unsigned long long)drhd
->reg_base_addr
);
2802 iommu
->flush
.flush_context
= qi_flush_context
;
2803 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2804 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Queued "
2807 (unsigned long long)drhd
->reg_base_addr
);
2811 if (iommu_pass_through
)
2812 iommu_identity_mapping
|= IDENTMAP_ALL
;
2814 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2815 iommu_identity_mapping
|= IDENTMAP_GFX
;
2818 check_tylersburg_isoch();
2821 * If pass through is not set or not enabled, setup context entries for
2822 * identity mappings for rmrr, gfx, and isa and may fall back to static
2823 * identity mapping if iommu_identity_mapping is set.
2825 if (iommu_identity_mapping
) {
2826 ret
= iommu_prepare_static_identity_mapping(hw_pass_through
);
2828 printk(KERN_CRIT
"Failed to setup IOMMU pass-through\n");
2834 * for each dev attached to rmrr
2836 * locate drhd for dev, alloc domain for dev
2837 * allocate free domain
2838 * allocate page table entries for rmrr
2839 * if context not allocated for bus
2840 * allocate and init context
2841 * set present in root table for this bus
2842 * init context with domain, translation etc
2846 printk(KERN_INFO
"IOMMU: Setting RMRR:\n");
2847 for_each_rmrr_units(rmrr
) {
2848 /* some BIOS lists non-exist devices in DMAR table. */
2849 for_each_active_dev_scope(rmrr
->devices
, rmrr
->devices_cnt
,
2851 ret
= iommu_prepare_rmrr_dev(rmrr
, dev
);
2854 "IOMMU: mapping reserved region failed\n");
2858 iommu_prepare_isa();
2863 * global invalidate context cache
2864 * global invalidate iotlb
2865 * enable translation
2867 for_each_iommu(iommu
, drhd
) {
2868 if (drhd
->ignored
) {
2870 * we always have to disable PMRs or DMA may fail on
2874 iommu_disable_protect_mem_regions(iommu
);
2878 iommu_flush_write_buffer(iommu
);
2880 ret
= dmar_set_interrupt(iommu
);
2884 iommu_set_root_entry(iommu
);
2886 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
2887 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
2889 ret
= iommu_enable_translation(iommu
);
2893 iommu_disable_protect_mem_regions(iommu
);
2899 for_each_active_iommu(iommu
, drhd
)
2900 free_dmar_iommu(iommu
);
2901 kfree(deferred_flush
);
2908 /* This takes a number of _MM_ pages, not VTD pages */
2909 static struct iova
*intel_alloc_iova(struct device
*dev
,
2910 struct dmar_domain
*domain
,
2911 unsigned long nrpages
, uint64_t dma_mask
)
2913 struct iova
*iova
= NULL
;
2915 /* Restrict dma_mask to the width that the iommu can handle */
2916 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
), dma_mask
);
2918 if (!dmar_forcedac
&& dma_mask
> DMA_BIT_MASK(32)) {
2920 * First try to allocate an io virtual address in
2921 * DMA_BIT_MASK(32) and if that fails then try allocating
2924 iova
= alloc_iova(&domain
->iovad
, nrpages
,
2925 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2929 iova
= alloc_iova(&domain
->iovad
, nrpages
, IOVA_PFN(dma_mask
), 1);
2930 if (unlikely(!iova
)) {
2931 printk(KERN_ERR
"Allocating %ld-page iova for %s failed",
2932 nrpages
, dev_name(dev
));
2939 static struct dmar_domain
*__get_valid_domain_for_dev(struct device
*dev
)
2941 struct dmar_domain
*domain
;
2944 domain
= get_domain_for_dev(dev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2946 printk(KERN_ERR
"Allocating domain for %s failed",
2951 /* make sure context mapping is ok */
2952 if (unlikely(!domain_context_mapped(dev
))) {
2953 ret
= domain_context_mapping(domain
, dev
, CONTEXT_TT_MULTI_LEVEL
);
2955 printk(KERN_ERR
"Domain context map for %s failed",
2964 static inline struct dmar_domain
*get_valid_domain_for_dev(struct device
*dev
)
2966 struct device_domain_info
*info
;
2968 /* No lock here, assumes no domain exit in normal case */
2969 info
= dev
->archdata
.iommu
;
2971 return info
->domain
;
2973 return __get_valid_domain_for_dev(dev
);
2976 /* Check if the dev needs to go through non-identity map and unmap process.*/
2977 static int iommu_no_mapping(struct device
*dev
)
2981 if (iommu_dummy(dev
))
2984 if (!iommu_identity_mapping
)
2987 found
= identity_mapping(dev
);
2989 if (iommu_should_identity_map(dev
, 0))
2993 * 32 bit DMA is removed from si_domain and fall back
2994 * to non-identity mapping.
2996 domain_remove_one_dev_info(si_domain
, dev
);
2997 printk(KERN_INFO
"32bit %s uses non-identity mapping\n",
3003 * In case of a detached 64 bit DMA device from vm, the device
3004 * is put into si_domain for identity mapping.
3006 if (iommu_should_identity_map(dev
, 0)) {
3008 ret
= domain_add_dev_info(si_domain
, dev
,
3010 CONTEXT_TT_PASS_THROUGH
:
3011 CONTEXT_TT_MULTI_LEVEL
);
3013 printk(KERN_INFO
"64bit %s uses identity mapping\n",
3023 static dma_addr_t
__intel_map_single(struct device
*dev
, phys_addr_t paddr
,
3024 size_t size
, int dir
, u64 dma_mask
)
3026 struct dmar_domain
*domain
;
3027 phys_addr_t start_paddr
;
3031 struct intel_iommu
*iommu
;
3032 unsigned long paddr_pfn
= paddr
>> PAGE_SHIFT
;
3034 BUG_ON(dir
== DMA_NONE
);
3036 if (iommu_no_mapping(dev
))
3039 domain
= get_valid_domain_for_dev(dev
);
3043 iommu
= domain_get_iommu(domain
);
3044 size
= aligned_nrpages(paddr
, size
);
3046 iova
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
), dma_mask
);
3051 * Check if DMAR supports zero-length reads on write only
3054 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3055 !cap_zlr(iommu
->cap
))
3056 prot
|= DMA_PTE_READ
;
3057 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3058 prot
|= DMA_PTE_WRITE
;
3060 * paddr - (paddr + size) might be partial page, we should map the whole
3061 * page. Note: if two part of one page are separately mapped, we
3062 * might have two guest_addr mapping to the same host paddr, but this
3063 * is not a big problem
3065 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova
->pfn_lo
),
3066 mm_to_dma_pfn(paddr_pfn
), size
, prot
);
3070 /* it's a non-present to present mapping. Only flush if caching mode */
3071 if (cap_caching_mode(iommu
->cap
))
3072 iommu_flush_iotlb_psi(iommu
, domain
->id
, mm_to_dma_pfn(iova
->pfn_lo
), size
, 0, 1);
3074 iommu_flush_write_buffer(iommu
);
3076 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
3077 start_paddr
+= paddr
& ~PAGE_MASK
;
3082 __free_iova(&domain
->iovad
, iova
);
3083 printk(KERN_ERR
"Device %s request: %zx@%llx dir %d --- failed\n",
3084 dev_name(dev
), size
, (unsigned long long)paddr
, dir
);
3088 static dma_addr_t
intel_map_page(struct device
*dev
, struct page
*page
,
3089 unsigned long offset
, size_t size
,
3090 enum dma_data_direction dir
,
3091 struct dma_attrs
*attrs
)
3093 return __intel_map_single(dev
, page_to_phys(page
) + offset
, size
,
3094 dir
, *dev
->dma_mask
);
3097 static void flush_unmaps(void)
3103 /* just flush them all */
3104 for (i
= 0; i
< g_num_of_iommus
; i
++) {
3105 struct intel_iommu
*iommu
= g_iommus
[i
];
3109 if (!deferred_flush
[i
].next
)
3112 /* In caching mode, global flushes turn emulation expensive */
3113 if (!cap_caching_mode(iommu
->cap
))
3114 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3115 DMA_TLB_GLOBAL_FLUSH
);
3116 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
3118 struct iova
*iova
= deferred_flush
[i
].iova
[j
];
3119 struct dmar_domain
*domain
= deferred_flush
[i
].domain
[j
];
3121 /* On real hardware multiple invalidations are expensive */
3122 if (cap_caching_mode(iommu
->cap
))
3123 iommu_flush_iotlb_psi(iommu
, domain
->id
,
3124 iova
->pfn_lo
, iova
->pfn_hi
- iova
->pfn_lo
+ 1,
3125 !deferred_flush
[i
].freelist
[j
], 0);
3127 mask
= ilog2(mm_to_dma_pfn(iova
->pfn_hi
- iova
->pfn_lo
+ 1));
3128 iommu_flush_dev_iotlb(deferred_flush
[i
].domain
[j
],
3129 (uint64_t)iova
->pfn_lo
<< PAGE_SHIFT
, mask
);
3131 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
, iova
);
3132 if (deferred_flush
[i
].freelist
[j
])
3133 dma_free_pagelist(deferred_flush
[i
].freelist
[j
]);
3135 deferred_flush
[i
].next
= 0;
3141 static void flush_unmaps_timeout(unsigned long data
)
3143 unsigned long flags
;
3145 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
3147 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
3150 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
, struct page
*freelist
)
3152 unsigned long flags
;
3154 struct intel_iommu
*iommu
;
3156 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
3157 if (list_size
== HIGH_WATER_MARK
)
3160 iommu
= domain_get_iommu(dom
);
3161 iommu_id
= iommu
->seq_id
;
3163 next
= deferred_flush
[iommu_id
].next
;
3164 deferred_flush
[iommu_id
].domain
[next
] = dom
;
3165 deferred_flush
[iommu_id
].iova
[next
] = iova
;
3166 deferred_flush
[iommu_id
].freelist
[next
] = freelist
;
3167 deferred_flush
[iommu_id
].next
++;
3170 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
3174 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
3177 static void intel_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
3178 size_t size
, enum dma_data_direction dir
,
3179 struct dma_attrs
*attrs
)
3181 struct dmar_domain
*domain
;
3182 unsigned long start_pfn
, last_pfn
;
3184 struct intel_iommu
*iommu
;
3185 struct page
*freelist
;
3187 if (iommu_no_mapping(dev
))
3190 domain
= find_domain(dev
);
3193 iommu
= domain_get_iommu(domain
);
3195 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
3196 if (WARN_ONCE(!iova
, "Driver unmaps unmatched page at PFN %llx\n",
3197 (unsigned long long)dev_addr
))
3200 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
3201 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
3203 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3204 dev_name(dev
), start_pfn
, last_pfn
);
3206 freelist
= domain_unmap(domain
, start_pfn
, last_pfn
);
3208 if (intel_iommu_strict
) {
3209 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
3210 last_pfn
- start_pfn
+ 1, !freelist
, 0);
3212 __free_iova(&domain
->iovad
, iova
);
3213 dma_free_pagelist(freelist
);
3215 add_unmap(domain
, iova
, freelist
);
3217 * queue up the release of the unmap to save the 1/6th of the
3218 * cpu used up by the iotlb flush operation...
3223 static void *intel_alloc_coherent(struct device
*dev
, size_t size
,
3224 dma_addr_t
*dma_handle
, gfp_t flags
,
3225 struct dma_attrs
*attrs
)
3227 struct page
*page
= NULL
;
3230 size
= PAGE_ALIGN(size
);
3231 order
= get_order(size
);
3233 if (!iommu_no_mapping(dev
))
3234 flags
&= ~(GFP_DMA
| GFP_DMA32
);
3235 else if (dev
->coherent_dma_mask
< dma_get_required_mask(dev
)) {
3236 if (dev
->coherent_dma_mask
< DMA_BIT_MASK(32))
3242 if (flags
& __GFP_WAIT
) {
3243 unsigned int count
= size
>> PAGE_SHIFT
;
3245 page
= dma_alloc_from_contiguous(dev
, count
, order
);
3246 if (page
&& iommu_no_mapping(dev
) &&
3247 page_to_phys(page
) + size
> dev
->coherent_dma_mask
) {
3248 dma_release_from_contiguous(dev
, page
, count
);
3254 page
= alloc_pages(flags
, order
);
3257 memset(page_address(page
), 0, size
);
3259 *dma_handle
= __intel_map_single(dev
, page_to_phys(page
), size
,
3261 dev
->coherent_dma_mask
);
3263 return page_address(page
);
3264 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3265 __free_pages(page
, order
);
3270 static void intel_free_coherent(struct device
*dev
, size_t size
, void *vaddr
,
3271 dma_addr_t dma_handle
, struct dma_attrs
*attrs
)
3274 struct page
*page
= virt_to_page(vaddr
);
3276 size
= PAGE_ALIGN(size
);
3277 order
= get_order(size
);
3279 intel_unmap_page(dev
, dma_handle
, size
, DMA_BIDIRECTIONAL
, NULL
);
3280 if (!dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
))
3281 __free_pages(page
, order
);
3284 static void intel_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
,
3285 int nelems
, enum dma_data_direction dir
,
3286 struct dma_attrs
*attrs
)
3288 struct dmar_domain
*domain
;
3289 unsigned long start_pfn
, last_pfn
;
3291 struct intel_iommu
*iommu
;
3292 struct page
*freelist
;
3294 if (iommu_no_mapping(dev
))
3297 domain
= find_domain(dev
);
3300 iommu
= domain_get_iommu(domain
);
3302 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
3303 if (WARN_ONCE(!iova
, "Driver unmaps unmatched sglist at PFN %llx\n",
3304 (unsigned long long)sglist
[0].dma_address
))
3307 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
3308 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
3310 freelist
= domain_unmap(domain
, start_pfn
, last_pfn
);
3312 if (intel_iommu_strict
) {
3313 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
3314 last_pfn
- start_pfn
+ 1, !freelist
, 0);
3316 __free_iova(&domain
->iovad
, iova
);
3317 dma_free_pagelist(freelist
);
3319 add_unmap(domain
, iova
, freelist
);
3321 * queue up the release of the unmap to save the 1/6th of the
3322 * cpu used up by the iotlb flush operation...
3327 static int intel_nontranslate_map_sg(struct device
*hddev
,
3328 struct scatterlist
*sglist
, int nelems
, int dir
)
3331 struct scatterlist
*sg
;
3333 for_each_sg(sglist
, sg
, nelems
, i
) {
3334 BUG_ON(!sg_page(sg
));
3335 sg
->dma_address
= page_to_phys(sg_page(sg
)) + sg
->offset
;
3336 sg
->dma_length
= sg
->length
;
3341 static int intel_map_sg(struct device
*dev
, struct scatterlist
*sglist
, int nelems
,
3342 enum dma_data_direction dir
, struct dma_attrs
*attrs
)
3345 struct dmar_domain
*domain
;
3348 struct iova
*iova
= NULL
;
3350 struct scatterlist
*sg
;
3351 unsigned long start_vpfn
;
3352 struct intel_iommu
*iommu
;
3354 BUG_ON(dir
== DMA_NONE
);
3355 if (iommu_no_mapping(dev
))
3356 return intel_nontranslate_map_sg(dev
, sglist
, nelems
, dir
);
3358 domain
= get_valid_domain_for_dev(dev
);
3362 iommu
= domain_get_iommu(domain
);
3364 for_each_sg(sglist
, sg
, nelems
, i
)
3365 size
+= aligned_nrpages(sg
->offset
, sg
->length
);
3367 iova
= intel_alloc_iova(dev
, domain
, dma_to_mm_pfn(size
),
3370 sglist
->dma_length
= 0;
3375 * Check if DMAR supports zero-length reads on write only
3378 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3379 !cap_zlr(iommu
->cap
))
3380 prot
|= DMA_PTE_READ
;
3381 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3382 prot
|= DMA_PTE_WRITE
;
3384 start_vpfn
= mm_to_dma_pfn(iova
->pfn_lo
);
3386 ret
= domain_sg_mapping(domain
, start_vpfn
, sglist
, size
, prot
);
3387 if (unlikely(ret
)) {
3388 /* clear the page */
3389 dma_pte_clear_range(domain
, start_vpfn
,
3390 start_vpfn
+ size
- 1);
3391 /* free page tables */
3392 dma_pte_free_pagetable(domain
, start_vpfn
,
3393 start_vpfn
+ size
- 1);
3395 __free_iova(&domain
->iovad
, iova
);
3399 /* it's a non-present to present mapping. Only flush if caching mode */
3400 if (cap_caching_mode(iommu
->cap
))
3401 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_vpfn
, size
, 0, 1);
3403 iommu_flush_write_buffer(iommu
);
3408 static int intel_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
3413 struct dma_map_ops intel_dma_ops
= {
3414 .alloc
= intel_alloc_coherent
,
3415 .free
= intel_free_coherent
,
3416 .map_sg
= intel_map_sg
,
3417 .unmap_sg
= intel_unmap_sg
,
3418 .map_page
= intel_map_page
,
3419 .unmap_page
= intel_unmap_page
,
3420 .mapping_error
= intel_mapping_error
,
3423 static inline int iommu_domain_cache_init(void)
3427 iommu_domain_cache
= kmem_cache_create("iommu_domain",
3428 sizeof(struct dmar_domain
),
3433 if (!iommu_domain_cache
) {
3434 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
3441 static inline int iommu_devinfo_cache_init(void)
3445 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
3446 sizeof(struct device_domain_info
),
3450 if (!iommu_devinfo_cache
) {
3451 printk(KERN_ERR
"Couldn't create devinfo cache\n");
3458 static inline int iommu_iova_cache_init(void)
3462 iommu_iova_cache
= kmem_cache_create("iommu_iova",
3463 sizeof(struct iova
),
3467 if (!iommu_iova_cache
) {
3468 printk(KERN_ERR
"Couldn't create iova cache\n");
3475 static int __init
iommu_init_mempool(void)
3478 ret
= iommu_iova_cache_init();
3482 ret
= iommu_domain_cache_init();
3486 ret
= iommu_devinfo_cache_init();
3490 kmem_cache_destroy(iommu_domain_cache
);
3492 kmem_cache_destroy(iommu_iova_cache
);
3497 static void __init
iommu_exit_mempool(void)
3499 kmem_cache_destroy(iommu_devinfo_cache
);
3500 kmem_cache_destroy(iommu_domain_cache
);
3501 kmem_cache_destroy(iommu_iova_cache
);
3505 static void quirk_ioat_snb_local_iommu(struct pci_dev
*pdev
)
3507 struct dmar_drhd_unit
*drhd
;
3511 /* We know that this device on this chipset has its own IOMMU.
3512 * If we find it under a different IOMMU, then the BIOS is lying
3513 * to us. Hope that the IOMMU for this device is actually
3514 * disabled, and it needs no translation...
3516 rc
= pci_bus_read_config_dword(pdev
->bus
, PCI_DEVFN(0, 0), 0xb0, &vtbar
);
3518 /* "can't" happen */
3519 dev_info(&pdev
->dev
, "failed to run vt-d quirk\n");
3522 vtbar
&= 0xffff0000;
3524 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3525 drhd
= dmar_find_matched_drhd_unit(pdev
);
3526 if (WARN_TAINT_ONCE(!drhd
|| drhd
->reg_base_addr
- vtbar
!= 0xa000,
3527 TAINT_FIRMWARE_WORKAROUND
,
3528 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3529 pdev
->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3531 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_IOAT_SNB
, quirk_ioat_snb_local_iommu
);
3533 static void __init
init_no_remapping_devices(void)
3535 struct dmar_drhd_unit
*drhd
;
3539 for_each_drhd_unit(drhd
) {
3540 if (!drhd
->include_all
) {
3541 for_each_active_dev_scope(drhd
->devices
,
3542 drhd
->devices_cnt
, i
, dev
)
3544 /* ignore DMAR unit if no devices exist */
3545 if (i
== drhd
->devices_cnt
)
3550 for_each_active_drhd_unit(drhd
) {
3551 if (drhd
->include_all
)
3554 for_each_active_dev_scope(drhd
->devices
,
3555 drhd
->devices_cnt
, i
, dev
)
3556 if (!dev_is_pci(dev
) || !IS_GFX_DEVICE(to_pci_dev(dev
)))
3558 if (i
< drhd
->devices_cnt
)
3561 /* This IOMMU has *only* gfx devices. Either bypass it or
3562 set the gfx_mapped flag, as appropriate */
3564 intel_iommu_gfx_mapped
= 1;
3567 for_each_active_dev_scope(drhd
->devices
,
3568 drhd
->devices_cnt
, i
, dev
)
3569 dev
->archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3574 #ifdef CONFIG_SUSPEND
3575 static int init_iommu_hw(void)
3577 struct dmar_drhd_unit
*drhd
;
3578 struct intel_iommu
*iommu
= NULL
;
3580 for_each_active_iommu(iommu
, drhd
)
3582 dmar_reenable_qi(iommu
);
3584 for_each_iommu(iommu
, drhd
) {
3585 if (drhd
->ignored
) {
3587 * we always have to disable PMRs or DMA may fail on
3591 iommu_disable_protect_mem_regions(iommu
);
3595 iommu_flush_write_buffer(iommu
);
3597 iommu_set_root_entry(iommu
);
3599 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3600 DMA_CCMD_GLOBAL_INVL
);
3601 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3602 DMA_TLB_GLOBAL_FLUSH
);
3603 if (iommu_enable_translation(iommu
))
3605 iommu_disable_protect_mem_regions(iommu
);
3611 static void iommu_flush_all(void)
3613 struct dmar_drhd_unit
*drhd
;
3614 struct intel_iommu
*iommu
;
3616 for_each_active_iommu(iommu
, drhd
) {
3617 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3618 DMA_CCMD_GLOBAL_INVL
);
3619 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3620 DMA_TLB_GLOBAL_FLUSH
);
3624 static int iommu_suspend(void)
3626 struct dmar_drhd_unit
*drhd
;
3627 struct intel_iommu
*iommu
= NULL
;
3630 for_each_active_iommu(iommu
, drhd
) {
3631 iommu
->iommu_state
= kzalloc(sizeof(u32
) * MAX_SR_DMAR_REGS
,
3633 if (!iommu
->iommu_state
)
3639 for_each_active_iommu(iommu
, drhd
) {
3640 iommu_disable_translation(iommu
);
3642 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
3644 iommu
->iommu_state
[SR_DMAR_FECTL_REG
] =
3645 readl(iommu
->reg
+ DMAR_FECTL_REG
);
3646 iommu
->iommu_state
[SR_DMAR_FEDATA_REG
] =
3647 readl(iommu
->reg
+ DMAR_FEDATA_REG
);
3648 iommu
->iommu_state
[SR_DMAR_FEADDR_REG
] =
3649 readl(iommu
->reg
+ DMAR_FEADDR_REG
);
3650 iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
] =
3651 readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
3653 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3658 for_each_active_iommu(iommu
, drhd
)
3659 kfree(iommu
->iommu_state
);
3664 static void iommu_resume(void)
3666 struct dmar_drhd_unit
*drhd
;
3667 struct intel_iommu
*iommu
= NULL
;
3670 if (init_iommu_hw()) {
3672 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3674 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3678 for_each_active_iommu(iommu
, drhd
) {
3680 raw_spin_lock_irqsave(&iommu
->register_lock
, flag
);
3682 writel(iommu
->iommu_state
[SR_DMAR_FECTL_REG
],
3683 iommu
->reg
+ DMAR_FECTL_REG
);
3684 writel(iommu
->iommu_state
[SR_DMAR_FEDATA_REG
],
3685 iommu
->reg
+ DMAR_FEDATA_REG
);
3686 writel(iommu
->iommu_state
[SR_DMAR_FEADDR_REG
],
3687 iommu
->reg
+ DMAR_FEADDR_REG
);
3688 writel(iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
],
3689 iommu
->reg
+ DMAR_FEUADDR_REG
);
3691 raw_spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3694 for_each_active_iommu(iommu
, drhd
)
3695 kfree(iommu
->iommu_state
);
3698 static struct syscore_ops iommu_syscore_ops
= {
3699 .resume
= iommu_resume
,
3700 .suspend
= iommu_suspend
,
3703 static void __init
init_iommu_pm_ops(void)
3705 register_syscore_ops(&iommu_syscore_ops
);
3709 static inline void init_iommu_pm_ops(void) {}
3710 #endif /* CONFIG_PM */
3713 int __init
dmar_parse_one_rmrr(struct acpi_dmar_header
*header
)
3715 struct acpi_dmar_reserved_memory
*rmrr
;
3716 struct dmar_rmrr_unit
*rmrru
;
3718 rmrru
= kzalloc(sizeof(*rmrru
), GFP_KERNEL
);
3722 rmrru
->hdr
= header
;
3723 rmrr
= (struct acpi_dmar_reserved_memory
*)header
;
3724 rmrru
->base_address
= rmrr
->base_address
;
3725 rmrru
->end_address
= rmrr
->end_address
;
3726 rmrru
->devices
= dmar_alloc_dev_scope((void *)(rmrr
+ 1),
3727 ((void *)rmrr
) + rmrr
->header
.length
,
3728 &rmrru
->devices_cnt
);
3729 if (rmrru
->devices_cnt
&& rmrru
->devices
== NULL
) {
3734 list_add(&rmrru
->list
, &dmar_rmrr_units
);
3739 int __init
dmar_parse_one_atsr(struct acpi_dmar_header
*hdr
)
3741 struct acpi_dmar_atsr
*atsr
;
3742 struct dmar_atsr_unit
*atsru
;
3744 atsr
= container_of(hdr
, struct acpi_dmar_atsr
, header
);
3745 atsru
= kzalloc(sizeof(*atsru
), GFP_KERNEL
);
3750 atsru
->include_all
= atsr
->flags
& 0x1;
3751 if (!atsru
->include_all
) {
3752 atsru
->devices
= dmar_alloc_dev_scope((void *)(atsr
+ 1),
3753 (void *)atsr
+ atsr
->header
.length
,
3754 &atsru
->devices_cnt
);
3755 if (atsru
->devices_cnt
&& atsru
->devices
== NULL
) {
3761 list_add_rcu(&atsru
->list
, &dmar_atsr_units
);
3766 static void intel_iommu_free_atsr(struct dmar_atsr_unit
*atsru
)
3768 dmar_free_dev_scope(&atsru
->devices
, &atsru
->devices_cnt
);
3772 static void intel_iommu_free_dmars(void)
3774 struct dmar_rmrr_unit
*rmrru
, *rmrr_n
;
3775 struct dmar_atsr_unit
*atsru
, *atsr_n
;
3777 list_for_each_entry_safe(rmrru
, rmrr_n
, &dmar_rmrr_units
, list
) {
3778 list_del(&rmrru
->list
);
3779 dmar_free_dev_scope(&rmrru
->devices
, &rmrru
->devices_cnt
);
3783 list_for_each_entry_safe(atsru
, atsr_n
, &dmar_atsr_units
, list
) {
3784 list_del(&atsru
->list
);
3785 intel_iommu_free_atsr(atsru
);
3789 int dmar_find_matched_atsr_unit(struct pci_dev
*dev
)
3792 struct pci_bus
*bus
;
3793 struct pci_dev
*bridge
= NULL
;
3795 struct acpi_dmar_atsr
*atsr
;
3796 struct dmar_atsr_unit
*atsru
;
3798 dev
= pci_physfn(dev
);
3799 for (bus
= dev
->bus
; bus
; bus
= bus
->parent
) {
3801 /* If it's an integrated device, allow ATS */
3804 /* Connected via non-PCIe: no ATS */
3805 if (!pci_is_pcie(bridge
) ||
3806 pci_pcie_type(bridge
) == PCI_EXP_TYPE_PCI_BRIDGE
)
3808 /* If we found the root port, look it up in the ATSR */
3809 if (pci_pcie_type(bridge
) == PCI_EXP_TYPE_ROOT_PORT
)
3814 list_for_each_entry_rcu(atsru
, &dmar_atsr_units
, list
) {
3815 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
3816 if (atsr
->segment
!= pci_domain_nr(dev
->bus
))
3819 for_each_dev_scope(atsru
->devices
, atsru
->devices_cnt
, i
, tmp
)
3820 if (tmp
== &bridge
->dev
)
3823 if (atsru
->include_all
)
3833 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info
*info
)
3836 struct dmar_rmrr_unit
*rmrru
;
3837 struct dmar_atsr_unit
*atsru
;
3838 struct acpi_dmar_atsr
*atsr
;
3839 struct acpi_dmar_reserved_memory
*rmrr
;
3841 if (!intel_iommu_enabled
&& system_state
!= SYSTEM_BOOTING
)
3844 list_for_each_entry(rmrru
, &dmar_rmrr_units
, list
) {
3845 rmrr
= container_of(rmrru
->hdr
,
3846 struct acpi_dmar_reserved_memory
, header
);
3847 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
3848 ret
= dmar_insert_dev_scope(info
, (void *)(rmrr
+ 1),
3849 ((void *)rmrr
) + rmrr
->header
.length
,
3850 rmrr
->segment
, rmrru
->devices
,
3851 rmrru
->devices_cnt
);
3854 } else if (info
->event
== BUS_NOTIFY_DEL_DEVICE
) {
3855 dmar_remove_dev_scope(info
, rmrr
->segment
,
3856 rmrru
->devices
, rmrru
->devices_cnt
);
3860 list_for_each_entry(atsru
, &dmar_atsr_units
, list
) {
3861 if (atsru
->include_all
)
3864 atsr
= container_of(atsru
->hdr
, struct acpi_dmar_atsr
, header
);
3865 if (info
->event
== BUS_NOTIFY_ADD_DEVICE
) {
3866 ret
= dmar_insert_dev_scope(info
, (void *)(atsr
+ 1),
3867 (void *)atsr
+ atsr
->header
.length
,
3868 atsr
->segment
, atsru
->devices
,
3869 atsru
->devices_cnt
);
3874 } else if (info
->event
== BUS_NOTIFY_DEL_DEVICE
) {
3875 if (dmar_remove_dev_scope(info
, atsr
->segment
,
3876 atsru
->devices
, atsru
->devices_cnt
))
3885 * Here we only respond to action of unbound device from driver.
3887 * Added device is not attached to its DMAR domain here yet. That will happen
3888 * when mapping the device to iova.
3890 static int device_notifier(struct notifier_block
*nb
,
3891 unsigned long action
, void *data
)
3893 struct device
*dev
= data
;
3894 struct dmar_domain
*domain
;
3896 if (iommu_dummy(dev
))
3899 if (action
!= BUS_NOTIFY_UNBOUND_DRIVER
&&
3900 action
!= BUS_NOTIFY_DEL_DEVICE
)
3904 * If the device is still attached to a device driver we can't
3905 * tear down the domain yet as DMA mappings may still be in use.
3906 * Wait for the BUS_NOTIFY_UNBOUND_DRIVER event to do that.
3908 if (action
== BUS_NOTIFY_DEL_DEVICE
&& dev
->driver
!= NULL
)
3911 domain
= find_domain(dev
);
3915 down_read(&dmar_global_lock
);
3916 domain_remove_one_dev_info(domain
, dev
);
3917 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
3918 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) &&
3919 list_empty(&domain
->devices
))
3920 domain_exit(domain
);
3921 up_read(&dmar_global_lock
);
3926 static struct notifier_block device_nb
= {
3927 .notifier_call
= device_notifier
,
3930 static int intel_iommu_memory_notifier(struct notifier_block
*nb
,
3931 unsigned long val
, void *v
)
3933 struct memory_notify
*mhp
= v
;
3934 unsigned long long start
, end
;
3935 unsigned long start_vpfn
, last_vpfn
;
3938 case MEM_GOING_ONLINE
:
3939 start
= mhp
->start_pfn
<< PAGE_SHIFT
;
3940 end
= ((mhp
->start_pfn
+ mhp
->nr_pages
) << PAGE_SHIFT
) - 1;
3941 if (iommu_domain_identity_map(si_domain
, start
, end
)) {
3942 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3949 case MEM_CANCEL_ONLINE
:
3950 start_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
);
3951 last_vpfn
= mm_to_dma_pfn(mhp
->start_pfn
+ mhp
->nr_pages
- 1);
3952 while (start_vpfn
<= last_vpfn
) {
3954 struct dmar_drhd_unit
*drhd
;
3955 struct intel_iommu
*iommu
;
3956 struct page
*freelist
;
3958 iova
= find_iova(&si_domain
->iovad
, start_vpfn
);
3960 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3965 iova
= split_and_remove_iova(&si_domain
->iovad
, iova
,
3966 start_vpfn
, last_vpfn
);
3968 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3969 start_vpfn
, last_vpfn
);
3973 freelist
= domain_unmap(si_domain
, iova
->pfn_lo
,
3977 for_each_active_iommu(iommu
, drhd
)
3978 iommu_flush_iotlb_psi(iommu
, si_domain
->id
,
3980 iova
->pfn_hi
- iova
->pfn_lo
+ 1,
3983 dma_free_pagelist(freelist
);
3985 start_vpfn
= iova
->pfn_hi
+ 1;
3986 free_iova_mem(iova
);
3994 static struct notifier_block intel_iommu_memory_nb
= {
3995 .notifier_call
= intel_iommu_memory_notifier
,
3999 int __init
intel_iommu_init(void)
4002 struct dmar_drhd_unit
*drhd
;
4003 struct intel_iommu
*iommu
;
4005 /* VT-d is required for a TXT/tboot launch, so enforce that */
4006 force_on
= tboot_force_iommu();
4008 if (iommu_init_mempool()) {
4010 panic("tboot: Failed to initialize iommu memory\n");
4014 down_write(&dmar_global_lock
);
4015 if (dmar_table_init()) {
4017 panic("tboot: Failed to initialize DMAR table\n");
4022 * Disable translation if already enabled prior to OS handover.
4024 for_each_active_iommu(iommu
, drhd
)
4025 if (iommu
->gcmd
& DMA_GCMD_TE
)
4026 iommu_disable_translation(iommu
);
4028 if (dmar_dev_scope_init() < 0) {
4030 panic("tboot: Failed to initialize DMAR device scope\n");
4034 if (no_iommu
|| dmar_disabled
)
4037 if (list_empty(&dmar_rmrr_units
))
4038 printk(KERN_INFO
"DMAR: No RMRR found\n");
4040 if (list_empty(&dmar_atsr_units
))
4041 printk(KERN_INFO
"DMAR: No ATSR found\n");
4043 if (dmar_init_reserved_ranges()) {
4045 panic("tboot: Failed to reserve iommu ranges\n");
4046 goto out_free_reserved_range
;
4049 init_no_remapping_devices();
4054 panic("tboot: Failed to initialize DMARs\n");
4055 printk(KERN_ERR
"IOMMU: dmar init failed\n");
4056 goto out_free_reserved_range
;
4058 up_write(&dmar_global_lock
);
4060 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4062 init_timer(&unmap_timer
);
4063 #ifdef CONFIG_SWIOTLB
4066 dma_ops
= &intel_dma_ops
;
4068 init_iommu_pm_ops();
4070 bus_set_iommu(&pci_bus_type
, &intel_iommu_ops
);
4071 bus_register_notifier(&pci_bus_type
, &device_nb
);
4072 if (si_domain
&& !hw_pass_through
)
4073 register_memory_notifier(&intel_iommu_memory_nb
);
4075 intel_iommu_enabled
= 1;
4079 out_free_reserved_range
:
4080 put_iova_domain(&reserved_iova_list
);
4082 intel_iommu_free_dmars();
4083 up_write(&dmar_global_lock
);
4084 iommu_exit_mempool();
4088 static void iommu_detach_dependent_devices(struct intel_iommu
*iommu
,
4091 struct pci_dev
*tmp
, *parent
, *pdev
;
4093 if (!iommu
|| !dev
|| !dev_is_pci(dev
))
4096 pdev
= to_pci_dev(dev
);
4098 /* dependent device detach */
4099 tmp
= pci_find_upstream_pcie_bridge(pdev
);
4100 /* Secondary interface's bus number and devfn 0 */
4102 parent
= pdev
->bus
->self
;
4103 while (parent
!= tmp
) {
4104 iommu_detach_dev(iommu
, parent
->bus
->number
,
4106 parent
= parent
->bus
->self
;
4108 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
4109 iommu_detach_dev(iommu
,
4110 tmp
->subordinate
->number
, 0);
4111 else /* this is a legacy PCI bridge */
4112 iommu_detach_dev(iommu
, tmp
->bus
->number
,
4117 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
4120 struct device_domain_info
*info
, *tmp
;
4121 struct intel_iommu
*iommu
;
4122 unsigned long flags
;
4126 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
4130 spin_lock_irqsave(&device_domain_lock
, flags
);
4131 list_for_each_entry_safe(info
, tmp
, &domain
->devices
, link
) {
4132 if (info
->iommu
== iommu
&& info
->bus
== bus
&&
4133 info
->devfn
== devfn
) {
4134 unlink_domain_info(info
);
4135 spin_unlock_irqrestore(&device_domain_lock
, flags
);
4137 iommu_disable_dev_iotlb(info
);
4138 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
4139 iommu_detach_dependent_devices(iommu
, dev
);
4140 free_devinfo_mem(info
);
4142 spin_lock_irqsave(&device_domain_lock
, flags
);
4150 /* if there is no other devices under the same iommu
4151 * owned by this domain, clear this iommu in iommu_bmp
4152 * update iommu count and coherency
4154 if (info
->iommu
== iommu
)
4158 spin_unlock_irqrestore(&device_domain_lock
, flags
);
4161 unsigned long tmp_flags
;
4162 spin_lock_irqsave(&domain
->iommu_lock
, tmp_flags
);
4163 clear_bit(iommu
->seq_id
, domain
->iommu_bmp
);
4164 domain
->iommu_count
--;
4165 domain_update_iommu_cap(domain
);
4166 spin_unlock_irqrestore(&domain
->iommu_lock
, tmp_flags
);
4168 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
4169 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)) {
4170 spin_lock_irqsave(&iommu
->lock
, tmp_flags
);
4171 clear_bit(domain
->id
, iommu
->domain_ids
);
4172 iommu
->domains
[domain
->id
] = NULL
;
4173 spin_unlock_irqrestore(&iommu
->lock
, tmp_flags
);
4178 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
)
4182 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
4183 domain_reserve_special_ranges(domain
);
4185 /* calculate AGAW */
4186 domain
->gaw
= guest_width
;
4187 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
4188 domain
->agaw
= width_to_agaw(adjust_width
);
4190 domain
->iommu_coherency
= 0;
4191 domain
->iommu_snooping
= 0;
4192 domain
->iommu_superpage
= 0;
4193 domain
->max_addr
= 0;
4196 /* always allocate the top pgd */
4197 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
4200 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
4204 static int intel_iommu_domain_init(struct iommu_domain
*domain
)
4206 struct dmar_domain
*dmar_domain
;
4208 dmar_domain
= alloc_domain(true);
4211 "intel_iommu_domain_init: dmar_domain == NULL\n");
4214 if (md_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
4216 "intel_iommu_domain_init() failed\n");
4217 domain_exit(dmar_domain
);
4220 domain_update_iommu_cap(dmar_domain
);
4221 domain
->priv
= dmar_domain
;
4223 domain
->geometry
.aperture_start
= 0;
4224 domain
->geometry
.aperture_end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
);
4225 domain
->geometry
.force_aperture
= true;
4230 static void intel_iommu_domain_destroy(struct iommu_domain
*domain
)
4232 struct dmar_domain
*dmar_domain
= domain
->priv
;
4234 domain
->priv
= NULL
;
4235 domain_exit(dmar_domain
);
4238 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
4241 struct dmar_domain
*dmar_domain
= domain
->priv
;
4242 struct intel_iommu
*iommu
;
4246 if (device_is_rmrr_locked(dev
)) {
4247 dev_warn(dev
, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4251 /* normally dev is not mapped */
4252 if (unlikely(domain_context_mapped(dev
))) {
4253 struct dmar_domain
*old_domain
;
4255 old_domain
= find_domain(dev
);
4257 if (dmar_domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
4258 dmar_domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)
4259 domain_remove_one_dev_info(old_domain
, dev
);
4261 domain_remove_dev_info(old_domain
);
4265 iommu
= device_to_iommu(dev
, &bus
, &devfn
);
4269 /* check if this iommu agaw is sufficient for max mapped address */
4270 addr_width
= agaw_to_width(iommu
->agaw
);
4271 if (addr_width
> cap_mgaw(iommu
->cap
))
4272 addr_width
= cap_mgaw(iommu
->cap
);
4274 if (dmar_domain
->max_addr
> (1LL << addr_width
)) {
4275 printk(KERN_ERR
"%s: iommu width (%d) is not "
4276 "sufficient for the mapped address (%llx)\n",
4277 __func__
, addr_width
, dmar_domain
->max_addr
);
4280 dmar_domain
->gaw
= addr_width
;
4283 * Knock out extra levels of page tables if necessary
4285 while (iommu
->agaw
< dmar_domain
->agaw
) {
4286 struct dma_pte
*pte
;
4288 pte
= dmar_domain
->pgd
;
4289 if (dma_pte_present(pte
)) {
4290 dmar_domain
->pgd
= (struct dma_pte
*)
4291 phys_to_virt(dma_pte_addr(pte
));
4292 free_pgtable_page(pte
);
4294 dmar_domain
->agaw
--;
4297 return domain_add_dev_info(dmar_domain
, dev
, CONTEXT_TT_MULTI_LEVEL
);
4300 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
4303 struct dmar_domain
*dmar_domain
= domain
->priv
;
4305 domain_remove_one_dev_info(dmar_domain
, dev
);
4308 static int intel_iommu_map(struct iommu_domain
*domain
,
4309 unsigned long iova
, phys_addr_t hpa
,
4310 size_t size
, int iommu_prot
)
4312 struct dmar_domain
*dmar_domain
= domain
->priv
;
4317 if (iommu_prot
& IOMMU_READ
)
4318 prot
|= DMA_PTE_READ
;
4319 if (iommu_prot
& IOMMU_WRITE
)
4320 prot
|= DMA_PTE_WRITE
;
4321 if ((iommu_prot
& IOMMU_CACHE
) && dmar_domain
->iommu_snooping
)
4322 prot
|= DMA_PTE_SNP
;
4324 max_addr
= iova
+ size
;
4325 if (dmar_domain
->max_addr
< max_addr
) {
4328 /* check if minimum agaw is sufficient for mapped address */
4329 end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
) + 1;
4330 if (end
< max_addr
) {
4331 printk(KERN_ERR
"%s: iommu width (%d) is not "
4332 "sufficient for the mapped address (%llx)\n",
4333 __func__
, dmar_domain
->gaw
, max_addr
);
4336 dmar_domain
->max_addr
= max_addr
;
4338 /* Round up size to next multiple of PAGE_SIZE, if it and
4339 the low bits of hpa would take us onto the next page */
4340 size
= aligned_nrpages(hpa
, size
);
4341 ret
= domain_pfn_mapping(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
4342 hpa
>> VTD_PAGE_SHIFT
, size
, prot
);
4346 static size_t intel_iommu_unmap(struct iommu_domain
*domain
,
4347 unsigned long iova
, size_t size
)
4349 struct dmar_domain
*dmar_domain
= domain
->priv
;
4350 struct page
*freelist
= NULL
;
4351 struct intel_iommu
*iommu
;
4352 unsigned long start_pfn
, last_pfn
;
4353 unsigned int npages
;
4354 int iommu_id
, num
, ndomains
, level
= 0;
4356 /* Cope with horrid API which requires us to unmap more than the
4357 size argument if it happens to be a large-page mapping. */
4358 if (!pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
))
4361 if (size
< VTD_PAGE_SIZE
<< level_to_offset_bits(level
))
4362 size
= VTD_PAGE_SIZE
<< level_to_offset_bits(level
);
4364 start_pfn
= iova
>> VTD_PAGE_SHIFT
;
4365 last_pfn
= (iova
+ size
- 1) >> VTD_PAGE_SHIFT
;
4367 freelist
= domain_unmap(dmar_domain
, start_pfn
, last_pfn
);
4369 npages
= last_pfn
- start_pfn
+ 1;
4371 for_each_set_bit(iommu_id
, dmar_domain
->iommu_bmp
, g_num_of_iommus
) {
4372 iommu
= g_iommus
[iommu_id
];
4375 * find bit position of dmar_domain
4377 ndomains
= cap_ndoms(iommu
->cap
);
4378 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
4379 if (iommu
->domains
[num
] == dmar_domain
)
4380 iommu_flush_iotlb_psi(iommu
, num
, start_pfn
,
4381 npages
, !freelist
, 0);
4386 dma_free_pagelist(freelist
);
4388 if (dmar_domain
->max_addr
== iova
+ size
)
4389 dmar_domain
->max_addr
= iova
;
4394 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
4397 struct dmar_domain
*dmar_domain
= domain
->priv
;
4398 struct dma_pte
*pte
;
4402 pte
= pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, &level
);
4404 phys
= dma_pte_addr(pte
);
4409 static int intel_iommu_domain_has_cap(struct iommu_domain
*domain
,
4412 struct dmar_domain
*dmar_domain
= domain
->priv
;
4414 if (cap
== IOMMU_CAP_CACHE_COHERENCY
)
4415 return dmar_domain
->iommu_snooping
;
4416 if (cap
== IOMMU_CAP_INTR_REMAP
)
4417 return irq_remapping_enabled
;
4422 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4424 static int intel_iommu_add_device(struct device
*dev
)
4426 struct pci_dev
*pdev
= to_pci_dev(dev
);
4427 struct pci_dev
*bridge
, *dma_pdev
= NULL
;
4428 struct iommu_group
*group
;
4432 if (!device_to_iommu(dev
, &bus
, &devfn
))
4435 bridge
= pci_find_upstream_pcie_bridge(pdev
);
4437 if (pci_is_pcie(bridge
))
4438 dma_pdev
= pci_get_domain_bus_and_slot(
4439 pci_domain_nr(pdev
->bus
),
4440 bridge
->subordinate
->number
, 0);
4442 dma_pdev
= pci_dev_get(bridge
);
4444 dma_pdev
= pci_dev_get(pdev
);
4446 /* Account for quirked devices */
4447 swap_pci_ref(&dma_pdev
, pci_get_dma_source(dma_pdev
));
4450 * If it's a multifunction device that does not support our
4451 * required ACS flags, add to the same group as lowest numbered
4452 * function that also does not suport the required ACS flags.
4454 if (dma_pdev
->multifunction
&&
4455 !pci_acs_enabled(dma_pdev
, REQ_ACS_FLAGS
)) {
4456 u8 i
, slot
= PCI_SLOT(dma_pdev
->devfn
);
4458 for (i
= 0; i
< 8; i
++) {
4459 struct pci_dev
*tmp
;
4461 tmp
= pci_get_slot(dma_pdev
->bus
, PCI_DEVFN(slot
, i
));
4465 if (!pci_acs_enabled(tmp
, REQ_ACS_FLAGS
)) {
4466 swap_pci_ref(&dma_pdev
, tmp
);
4474 * Devices on the root bus go through the iommu. If that's not us,
4475 * find the next upstream device and test ACS up to the root bus.
4476 * Finding the next device may require skipping virtual buses.
4478 while (!pci_is_root_bus(dma_pdev
->bus
)) {
4479 struct pci_bus
*bus
= dma_pdev
->bus
;
4481 while (!bus
->self
) {
4482 if (!pci_is_root_bus(bus
))
4488 if (pci_acs_path_enabled(bus
->self
, NULL
, REQ_ACS_FLAGS
))
4491 swap_pci_ref(&dma_pdev
, pci_dev_get(bus
->self
));
4495 group
= iommu_group_get(&dma_pdev
->dev
);
4496 pci_dev_put(dma_pdev
);
4498 group
= iommu_group_alloc();
4500 return PTR_ERR(group
);
4503 ret
= iommu_group_add_device(group
, dev
);
4505 iommu_group_put(group
);
4509 static void intel_iommu_remove_device(struct device
*dev
)
4511 iommu_group_remove_device(dev
);
4514 static struct iommu_ops intel_iommu_ops
= {
4515 .domain_init
= intel_iommu_domain_init
,
4516 .domain_destroy
= intel_iommu_domain_destroy
,
4517 .attach_dev
= intel_iommu_attach_device
,
4518 .detach_dev
= intel_iommu_detach_device
,
4519 .map
= intel_iommu_map
,
4520 .unmap
= intel_iommu_unmap
,
4521 .iova_to_phys
= intel_iommu_iova_to_phys
,
4522 .domain_has_cap
= intel_iommu_domain_has_cap
,
4523 .add_device
= intel_iommu_add_device
,
4524 .remove_device
= intel_iommu_remove_device
,
4525 .pgsize_bitmap
= INTEL_IOMMU_PGSIZES
,
4528 static void quirk_iommu_g4x_gfx(struct pci_dev
*dev
)
4530 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4531 printk(KERN_INFO
"DMAR: Disabling IOMMU for graphics on this chipset\n");
4535 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_g4x_gfx
);
4536 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_g4x_gfx
);
4537 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_g4x_gfx
);
4538 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_g4x_gfx
);
4539 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_g4x_gfx
);
4540 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_g4x_gfx
);
4541 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_g4x_gfx
);
4543 static void quirk_iommu_rwbf(struct pci_dev
*dev
)
4546 * Mobile 4 Series Chipset neglects to set RWBF capability,
4547 * but needs it. Same seems to hold for the desktop versions.
4549 printk(KERN_INFO
"DMAR: Forcing write-buffer flush capability\n");
4553 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_rwbf
);
4554 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e00, quirk_iommu_rwbf
);
4555 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e10, quirk_iommu_rwbf
);
4556 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e20, quirk_iommu_rwbf
);
4557 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e30, quirk_iommu_rwbf
);
4558 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e40, quirk_iommu_rwbf
);
4559 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2e90, quirk_iommu_rwbf
);
4562 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4563 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4564 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4565 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4566 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4567 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4568 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4569 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4571 static void quirk_calpella_no_shadow_gtt(struct pci_dev
*dev
)
4575 if (pci_read_config_word(dev
, GGC
, &ggc
))
4578 if (!(ggc
& GGC_MEMORY_VT_ENABLED
)) {
4579 printk(KERN_INFO
"DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4581 } else if (dmar_map_gfx
) {
4582 /* we have to ensure the gfx device is idle before we flush */
4583 printk(KERN_INFO
"DMAR: Disabling batched IOTLB flush on Ironlake\n");
4584 intel_iommu_strict
= 1;
4587 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0040, quirk_calpella_no_shadow_gtt
);
4588 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0044, quirk_calpella_no_shadow_gtt
);
4589 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0062, quirk_calpella_no_shadow_gtt
);
4590 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x006a, quirk_calpella_no_shadow_gtt
);
4592 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4593 ISOCH DMAR unit for the Azalia sound device, but not give it any
4594 TLB entries, which causes it to deadlock. Check for that. We do
4595 this in a function called from init_dmars(), instead of in a PCI
4596 quirk, because we don't want to print the obnoxious "BIOS broken"
4597 message if VT-d is actually disabled.
4599 static void __init
check_tylersburg_isoch(void)
4601 struct pci_dev
*pdev
;
4602 uint32_t vtisochctrl
;
4604 /* If there's no Azalia in the system anyway, forget it. */
4605 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x3a3e, NULL
);
4610 /* System Management Registers. Might be hidden, in which case
4611 we can't do the sanity check. But that's OK, because the
4612 known-broken BIOSes _don't_ actually hide it, so far. */
4613 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x342e, NULL
);
4617 if (pci_read_config_dword(pdev
, 0x188, &vtisochctrl
)) {
4624 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4625 if (vtisochctrl
& 1)
4628 /* Drop all bits other than the number of TLB entries */
4629 vtisochctrl
&= 0x1c;
4631 /* If we have the recommended number of TLB entries (16), fine. */
4632 if (vtisochctrl
== 0x10)
4635 /* Zero TLB entries? You get to ride the short bus to school. */
4637 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4638 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4639 dmi_get_system_info(DMI_BIOS_VENDOR
),
4640 dmi_get_system_info(DMI_BIOS_VERSION
),
4641 dmi_get_system_info(DMI_PRODUCT_VERSION
));
4642 iommu_identity_mapping
|= IDENTMAP_AZALIA
;
4646 printk(KERN_WARNING
"DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",