2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <linux/pci-ats.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
46 #define ROOT_SIZE VTD_PAGE_SIZE
47 #define CONTEXT_SIZE VTD_PAGE_SIZE
49 #define IS_BRIDGE_HOST_DEVICE(pdev) \
50 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
51 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
53 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55 #define IOAPIC_RANGE_START (0xfee00000)
56 #define IOAPIC_RANGE_END (0xfeefffff)
57 #define IOVA_START_ADDR (0x1000)
59 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61 #define MAX_AGAW_WIDTH 64
63 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
69 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
73 #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
74 #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
76 /* page table handling */
77 #define LEVEL_STRIDE (9)
78 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
80 static inline int agaw_to_level(int agaw
)
85 static inline int agaw_to_width(int agaw
)
87 return 30 + agaw
* LEVEL_STRIDE
;
90 static inline int width_to_agaw(int width
)
92 return (width
- 30) / LEVEL_STRIDE
;
95 static inline unsigned int level_to_offset_bits(int level
)
97 return (level
- 1) * LEVEL_STRIDE
;
100 static inline int pfn_level_offset(unsigned long pfn
, int level
)
102 return (pfn
>> level_to_offset_bits(level
)) & LEVEL_MASK
;
105 static inline unsigned long level_mask(int level
)
107 return -1UL << level_to_offset_bits(level
);
110 static inline unsigned long level_size(int level
)
112 return 1UL << level_to_offset_bits(level
);
115 static inline unsigned long align_to_level(unsigned long pfn
, int level
)
117 return (pfn
+ level_size(level
) - 1) & level_mask(level
);
120 static inline unsigned long lvl_to_nr_pages(unsigned int lvl
)
122 return 1 << ((lvl
- 1) * LEVEL_STRIDE
);
125 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
126 are never going to work. */
127 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn
)
129 return dma_pfn
>> (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
132 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn
)
134 return mm_pfn
<< (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
136 static inline unsigned long page_to_dma_pfn(struct page
*pg
)
138 return mm_to_dma_pfn(page_to_pfn(pg
));
140 static inline unsigned long virt_to_dma_pfn(void *p
)
142 return page_to_dma_pfn(virt_to_page(p
));
145 /* global iommu list, set NULL for ignored DMAR units */
146 static struct intel_iommu
**g_iommus
;
148 static void __init
check_tylersburg_isoch(void);
149 static int rwbf_quirk
;
152 * set to 1 to panic kernel if can't successfully enable VT-d
153 * (used when kernel is launched w/ TXT)
155 static int force_on
= 0;
160 * 12-63: Context Ptr (12 - (haw-1))
167 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
168 static inline bool root_present(struct root_entry
*root
)
170 return (root
->val
& 1);
172 static inline void set_root_present(struct root_entry
*root
)
176 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
178 root
->val
|= value
& VTD_PAGE_MASK
;
181 static inline struct context_entry
*
182 get_context_addr_from_root(struct root_entry
*root
)
184 return (struct context_entry
*)
185 (root_present(root
)?phys_to_virt(
186 root
->val
& VTD_PAGE_MASK
) :
193 * 1: fault processing disable
194 * 2-3: translation type
195 * 12-63: address space root
201 struct context_entry
{
206 static inline bool context_present(struct context_entry
*context
)
208 return (context
->lo
& 1);
210 static inline void context_set_present(struct context_entry
*context
)
215 static inline void context_set_fault_enable(struct context_entry
*context
)
217 context
->lo
&= (((u64
)-1) << 2) | 1;
220 static inline void context_set_translation_type(struct context_entry
*context
,
223 context
->lo
&= (((u64
)-1) << 4) | 3;
224 context
->lo
|= (value
& 3) << 2;
227 static inline void context_set_address_root(struct context_entry
*context
,
230 context
->lo
|= value
& VTD_PAGE_MASK
;
233 static inline void context_set_address_width(struct context_entry
*context
,
236 context
->hi
|= value
& 7;
239 static inline void context_set_domain_id(struct context_entry
*context
,
242 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
245 static inline void context_clear_entry(struct context_entry
*context
)
258 * 12-63: Host physcial address
264 static inline void dma_clear_pte(struct dma_pte
*pte
)
269 static inline void dma_set_pte_readable(struct dma_pte
*pte
)
271 pte
->val
|= DMA_PTE_READ
;
274 static inline void dma_set_pte_writable(struct dma_pte
*pte
)
276 pte
->val
|= DMA_PTE_WRITE
;
279 static inline void dma_set_pte_snp(struct dma_pte
*pte
)
281 pte
->val
|= DMA_PTE_SNP
;
284 static inline void dma_set_pte_prot(struct dma_pte
*pte
, unsigned long prot
)
286 pte
->val
= (pte
->val
& ~3) | (prot
& 3);
289 static inline u64
dma_pte_addr(struct dma_pte
*pte
)
292 return pte
->val
& VTD_PAGE_MASK
;
294 /* Must have a full atomic 64-bit read */
295 return __cmpxchg64(&pte
->val
, 0ULL, 0ULL) & VTD_PAGE_MASK
;
299 static inline void dma_set_pte_pfn(struct dma_pte
*pte
, unsigned long pfn
)
301 pte
->val
|= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
304 static inline bool dma_pte_present(struct dma_pte
*pte
)
306 return (pte
->val
& 3) != 0;
309 static inline int first_pte_in_page(struct dma_pte
*pte
)
311 return !((unsigned long)pte
& ~VTD_PAGE_MASK
);
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
320 static struct dmar_domain
*si_domain
;
321 static int hw_pass_through
= 1;
323 /* devices under the same p2p bridge are owned in one domain */
324 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
326 /* domain represents a virtual machine, more than one devices
327 * across iommus may be owned in one domain, e.g. kvm guest.
329 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
331 /* si_domain contains mulitple devices */
332 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
335 int id
; /* domain id */
336 int nid
; /* node id */
337 unsigned long iommu_bmp
; /* bitmap of iommus this domain uses*/
339 struct list_head devices
; /* all devices' list */
340 struct iova_domain iovad
; /* iova's that belong to this domain */
342 struct dma_pte
*pgd
; /* virtual address */
343 int gaw
; /* max guest address width */
345 /* adjusted guest address width, 0 is level 2 30-bit */
348 int flags
; /* flags to find out type of domain */
350 int iommu_coherency
;/* indicate coherency of iommu access */
351 int iommu_snooping
; /* indicate snooping control feature*/
352 int iommu_count
; /* reference count of iommu */
353 int iommu_superpage
;/* Level of superpages supported:
354 0 == 4KiB (no superpages), 1 == 2MiB,
355 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
356 spinlock_t iommu_lock
; /* protect iommu set in domain */
357 u64 max_addr
; /* maximum mapped address */
360 /* PCI domain-device relationship */
361 struct device_domain_info
{
362 struct list_head link
; /* link to domain siblings */
363 struct list_head global
; /* link to global list */
364 int segment
; /* PCI domain */
365 u8 bus
; /* PCI bus number */
366 u8 devfn
; /* PCI devfn number */
367 struct pci_dev
*dev
; /* it's NULL for PCIe-to-PCI bridge */
368 struct intel_iommu
*iommu
; /* IOMMU used by this device */
369 struct dmar_domain
*domain
; /* pointer to domain */
372 static void flush_unmaps_timeout(unsigned long data
);
374 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
376 #define HIGH_WATER_MARK 250
377 struct deferred_flush_tables
{
379 struct iova
*iova
[HIGH_WATER_MARK
];
380 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
383 static struct deferred_flush_tables
*deferred_flush
;
385 /* bitmap for indexing intel_iommus */
386 static int g_num_of_iommus
;
388 static DEFINE_SPINLOCK(async_umap_flush_lock
);
389 static LIST_HEAD(unmaps_to_do
);
392 static long list_size
;
394 static void domain_remove_dev_info(struct dmar_domain
*domain
);
396 #ifdef CONFIG_DMAR_DEFAULT_ON
397 int dmar_disabled
= 0;
399 int dmar_disabled
= 1;
400 #endif /*CONFIG_DMAR_DEFAULT_ON*/
402 static int dmar_map_gfx
= 1;
403 static int dmar_forcedac
;
404 static int intel_iommu_strict
;
405 static int intel_iommu_superpage
= 1;
407 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
408 static DEFINE_SPINLOCK(device_domain_lock
);
409 static LIST_HEAD(device_domain_list
);
411 static struct iommu_ops intel_iommu_ops
;
413 static int __init
intel_iommu_setup(char *str
)
418 if (!strncmp(str
, "on", 2)) {
420 printk(KERN_INFO
"Intel-IOMMU: enabled\n");
421 } else if (!strncmp(str
, "off", 3)) {
423 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
424 } else if (!strncmp(str
, "igfx_off", 8)) {
427 "Intel-IOMMU: disable GFX device mapping\n");
428 } else if (!strncmp(str
, "forcedac", 8)) {
430 "Intel-IOMMU: Forcing DAC for PCI devices\n");
432 } else if (!strncmp(str
, "strict", 6)) {
434 "Intel-IOMMU: disable batched IOTLB flush\n");
435 intel_iommu_strict
= 1;
436 } else if (!strncmp(str
, "sp_off", 6)) {
438 "Intel-IOMMU: disable supported super page\n");
439 intel_iommu_superpage
= 0;
442 str
+= strcspn(str
, ",");
448 __setup("intel_iommu=", intel_iommu_setup
);
450 static struct kmem_cache
*iommu_domain_cache
;
451 static struct kmem_cache
*iommu_devinfo_cache
;
452 static struct kmem_cache
*iommu_iova_cache
;
454 static inline void *alloc_pgtable_page(int node
)
459 page
= alloc_pages_node(node
, GFP_ATOMIC
| __GFP_ZERO
, 0);
461 vaddr
= page_address(page
);
465 static inline void free_pgtable_page(void *vaddr
)
467 free_page((unsigned long)vaddr
);
470 static inline void *alloc_domain_mem(void)
472 return kmem_cache_alloc(iommu_domain_cache
, GFP_ATOMIC
);
475 static void free_domain_mem(void *vaddr
)
477 kmem_cache_free(iommu_domain_cache
, vaddr
);
480 static inline void * alloc_devinfo_mem(void)
482 return kmem_cache_alloc(iommu_devinfo_cache
, GFP_ATOMIC
);
485 static inline void free_devinfo_mem(void *vaddr
)
487 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
490 struct iova
*alloc_iova_mem(void)
492 return kmem_cache_alloc(iommu_iova_cache
, GFP_ATOMIC
);
495 void free_iova_mem(struct iova
*iova
)
497 kmem_cache_free(iommu_iova_cache
, iova
);
501 static int __iommu_calculate_agaw(struct intel_iommu
*iommu
, int max_gaw
)
506 sagaw
= cap_sagaw(iommu
->cap
);
507 for (agaw
= width_to_agaw(max_gaw
);
509 if (test_bit(agaw
, &sagaw
))
517 * Calculate max SAGAW for each iommu.
519 int iommu_calculate_max_sagaw(struct intel_iommu
*iommu
)
521 return __iommu_calculate_agaw(iommu
, MAX_AGAW_WIDTH
);
525 * calculate agaw for each iommu.
526 * "SAGAW" may be different across iommus, use a default agaw, and
527 * get a supported less agaw for iommus that don't support the default agaw.
529 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
531 return __iommu_calculate_agaw(iommu
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
534 /* This functionin only returns single iommu in a domain */
535 static struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
539 /* si_domain and vm domain should not get here. */
540 BUG_ON(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
);
541 BUG_ON(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
);
543 iommu_id
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
544 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
547 return g_iommus
[iommu_id
];
550 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
554 domain
->iommu_coherency
= 1;
556 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
557 if (!ecap_coherent(g_iommus
[i
]->ecap
)) {
558 domain
->iommu_coherency
= 0;
564 static void domain_update_iommu_snooping(struct dmar_domain
*domain
)
568 domain
->iommu_snooping
= 1;
570 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
571 if (!ecap_sc_support(g_iommus
[i
]->ecap
)) {
572 domain
->iommu_snooping
= 0;
578 static void domain_update_iommu_superpage(struct dmar_domain
*domain
)
582 if (!intel_iommu_superpage
) {
583 domain
->iommu_superpage
= 0;
587 domain
->iommu_superpage
= 4; /* 1TiB */
589 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
590 mask
|= cap_super_page_val(g_iommus
[i
]->cap
);
595 domain
->iommu_superpage
= fls(mask
);
598 /* Some capabilities may be different across iommus */
599 static void domain_update_iommu_cap(struct dmar_domain
*domain
)
601 domain_update_iommu_coherency(domain
);
602 domain_update_iommu_snooping(domain
);
603 domain_update_iommu_superpage(domain
);
606 static struct intel_iommu
*device_to_iommu(int segment
, u8 bus
, u8 devfn
)
608 struct dmar_drhd_unit
*drhd
= NULL
;
611 for_each_drhd_unit(drhd
) {
614 if (segment
!= drhd
->segment
)
617 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
618 if (drhd
->devices
[i
] &&
619 drhd
->devices
[i
]->bus
->number
== bus
&&
620 drhd
->devices
[i
]->devfn
== devfn
)
622 if (drhd
->devices
[i
] &&
623 drhd
->devices
[i
]->subordinate
&&
624 drhd
->devices
[i
]->subordinate
->number
<= bus
&&
625 drhd
->devices
[i
]->subordinate
->subordinate
>= bus
)
629 if (drhd
->include_all
)
636 static void domain_flush_cache(struct dmar_domain
*domain
,
637 void *addr
, int size
)
639 if (!domain
->iommu_coherency
)
640 clflush_cache_range(addr
, size
);
643 /* Gets context entry for a given bus and devfn */
644 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
647 struct root_entry
*root
;
648 struct context_entry
*context
;
649 unsigned long phy_addr
;
652 spin_lock_irqsave(&iommu
->lock
, flags
);
653 root
= &iommu
->root_entry
[bus
];
654 context
= get_context_addr_from_root(root
);
656 context
= (struct context_entry
*)
657 alloc_pgtable_page(iommu
->node
);
659 spin_unlock_irqrestore(&iommu
->lock
, flags
);
662 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
663 phy_addr
= virt_to_phys((void *)context
);
664 set_root_value(root
, phy_addr
);
665 set_root_present(root
);
666 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
668 spin_unlock_irqrestore(&iommu
->lock
, flags
);
669 return &context
[devfn
];
672 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
674 struct root_entry
*root
;
675 struct context_entry
*context
;
679 spin_lock_irqsave(&iommu
->lock
, flags
);
680 root
= &iommu
->root_entry
[bus
];
681 context
= get_context_addr_from_root(root
);
686 ret
= context_present(&context
[devfn
]);
688 spin_unlock_irqrestore(&iommu
->lock
, flags
);
692 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
694 struct root_entry
*root
;
695 struct context_entry
*context
;
698 spin_lock_irqsave(&iommu
->lock
, flags
);
699 root
= &iommu
->root_entry
[bus
];
700 context
= get_context_addr_from_root(root
);
702 context_clear_entry(&context
[devfn
]);
703 __iommu_flush_cache(iommu
, &context
[devfn
], \
706 spin_unlock_irqrestore(&iommu
->lock
, flags
);
709 static void free_context_table(struct intel_iommu
*iommu
)
711 struct root_entry
*root
;
714 struct context_entry
*context
;
716 spin_lock_irqsave(&iommu
->lock
, flags
);
717 if (!iommu
->root_entry
) {
720 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
721 root
= &iommu
->root_entry
[i
];
722 context
= get_context_addr_from_root(root
);
724 free_pgtable_page(context
);
726 free_pgtable_page(iommu
->root_entry
);
727 iommu
->root_entry
= NULL
;
729 spin_unlock_irqrestore(&iommu
->lock
, flags
);
732 static struct dma_pte
*pfn_to_dma_pte(struct dmar_domain
*domain
,
733 unsigned long pfn
, int large_level
)
735 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
736 struct dma_pte
*parent
, *pte
= NULL
;
737 int level
= agaw_to_level(domain
->agaw
);
738 int offset
, target_level
;
740 BUG_ON(!domain
->pgd
);
741 BUG_ON(addr_width
< BITS_PER_LONG
&& pfn
>> addr_width
);
742 parent
= domain
->pgd
;
748 target_level
= large_level
;
753 offset
= pfn_level_offset(pfn
, level
);
754 pte
= &parent
[offset
];
755 if (!large_level
&& (pte
->val
& DMA_PTE_LARGE_PAGE
))
757 if (level
== target_level
)
760 if (!dma_pte_present(pte
)) {
763 tmp_page
= alloc_pgtable_page(domain
->nid
);
768 domain_flush_cache(domain
, tmp_page
, VTD_PAGE_SIZE
);
769 pteval
= ((uint64_t)virt_to_dma_pfn(tmp_page
) << VTD_PAGE_SHIFT
) | DMA_PTE_READ
| DMA_PTE_WRITE
;
770 if (cmpxchg64(&pte
->val
, 0ULL, pteval
)) {
771 /* Someone else set it while we were thinking; use theirs. */
772 free_pgtable_page(tmp_page
);
775 domain_flush_cache(domain
, pte
, sizeof(*pte
));
778 parent
= phys_to_virt(dma_pte_addr(pte
));
786 /* return address's pte at specific level */
787 static struct dma_pte
*dma_pfn_level_pte(struct dmar_domain
*domain
,
789 int level
, int *large_page
)
791 struct dma_pte
*parent
, *pte
= NULL
;
792 int total
= agaw_to_level(domain
->agaw
);
795 parent
= domain
->pgd
;
796 while (level
<= total
) {
797 offset
= pfn_level_offset(pfn
, total
);
798 pte
= &parent
[offset
];
802 if (!dma_pte_present(pte
)) {
807 if (pte
->val
& DMA_PTE_LARGE_PAGE
) {
812 parent
= phys_to_virt(dma_pte_addr(pte
));
818 /* clear last level pte, a tlb flush should be followed */
819 static void dma_pte_clear_range(struct dmar_domain
*domain
,
820 unsigned long start_pfn
,
821 unsigned long last_pfn
)
823 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
824 unsigned int large_page
= 1;
825 struct dma_pte
*first_pte
, *pte
;
827 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
828 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
829 BUG_ON(start_pfn
> last_pfn
);
831 /* we don't need lock here; nobody else touches the iova range */
834 first_pte
= pte
= dma_pfn_level_pte(domain
, start_pfn
, 1, &large_page
);
836 start_pfn
= align_to_level(start_pfn
+ 1, large_page
+ 1);
841 start_pfn
+= lvl_to_nr_pages(large_page
);
843 } while (start_pfn
<= last_pfn
&& !first_pte_in_page(pte
));
845 domain_flush_cache(domain
, first_pte
,
846 (void *)pte
- (void *)first_pte
);
848 } while (start_pfn
&& start_pfn
<= last_pfn
);
851 /* free page table pages. last level pte should already be cleared */
852 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
853 unsigned long start_pfn
,
854 unsigned long last_pfn
)
856 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
857 struct dma_pte
*first_pte
, *pte
;
858 int total
= agaw_to_level(domain
->agaw
);
863 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
864 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
865 BUG_ON(start_pfn
> last_pfn
);
867 /* We don't need lock here; nobody else touches the iova range */
869 while (level
<= total
) {
870 tmp
= align_to_level(start_pfn
, level
);
872 /* If we can't even clear one PTE at this level, we're done */
873 if (tmp
+ level_size(level
) - 1 > last_pfn
)
878 first_pte
= pte
= dma_pfn_level_pte(domain
, tmp
, level
, &large_page
);
879 if (large_page
> level
)
880 level
= large_page
+ 1;
882 tmp
= align_to_level(tmp
+ 1, level
+ 1);
886 if (dma_pte_present(pte
)) {
887 free_pgtable_page(phys_to_virt(dma_pte_addr(pte
)));
891 tmp
+= level_size(level
);
892 } while (!first_pte_in_page(pte
) &&
893 tmp
+ level_size(level
) - 1 <= last_pfn
);
895 domain_flush_cache(domain
, first_pte
,
896 (void *)pte
- (void *)first_pte
);
898 } while (tmp
&& tmp
+ level_size(level
) - 1 <= last_pfn
);
902 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
903 free_pgtable_page(domain
->pgd
);
909 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
911 struct root_entry
*root
;
914 root
= (struct root_entry
*)alloc_pgtable_page(iommu
->node
);
918 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
920 spin_lock_irqsave(&iommu
->lock
, flags
);
921 iommu
->root_entry
= root
;
922 spin_unlock_irqrestore(&iommu
->lock
, flags
);
927 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
933 addr
= iommu
->root_entry
;
935 spin_lock_irqsave(&iommu
->register_lock
, flag
);
936 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
938 writel(iommu
->gcmd
| DMA_GCMD_SRTP
, iommu
->reg
+ DMAR_GCMD_REG
);
940 /* Make sure hardware complete it */
941 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
942 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
944 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
947 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
952 if (!rwbf_quirk
&& !cap_rwbf(iommu
->cap
))
955 spin_lock_irqsave(&iommu
->register_lock
, flag
);
956 writel(iommu
->gcmd
| DMA_GCMD_WBF
, iommu
->reg
+ DMAR_GCMD_REG
);
958 /* Make sure hardware complete it */
959 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
960 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
962 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
965 /* return value determine if we need a write buffer flush */
966 static void __iommu_flush_context(struct intel_iommu
*iommu
,
967 u16 did
, u16 source_id
, u8 function_mask
,
974 case DMA_CCMD_GLOBAL_INVL
:
975 val
= DMA_CCMD_GLOBAL_INVL
;
977 case DMA_CCMD_DOMAIN_INVL
:
978 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
980 case DMA_CCMD_DEVICE_INVL
:
981 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
982 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
989 spin_lock_irqsave(&iommu
->register_lock
, flag
);
990 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
992 /* Make sure hardware complete it */
993 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
994 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
996 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
999 /* return value determine if we need a write buffer flush */
1000 static void __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
1001 u64 addr
, unsigned int size_order
, u64 type
)
1003 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
1004 u64 val
= 0, val_iva
= 0;
1008 case DMA_TLB_GLOBAL_FLUSH
:
1009 /* global flush doesn't need set IVA_REG */
1010 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
1012 case DMA_TLB_DSI_FLUSH
:
1013 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1015 case DMA_TLB_PSI_FLUSH
:
1016 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1017 /* Note: always flush non-leaf currently */
1018 val_iva
= size_order
| addr
;
1023 /* Note: set drain read/write */
1026 * This is probably to be super secure.. Looks like we can
1027 * ignore it without any impact.
1029 if (cap_read_drain(iommu
->cap
))
1030 val
|= DMA_TLB_READ_DRAIN
;
1032 if (cap_write_drain(iommu
->cap
))
1033 val
|= DMA_TLB_WRITE_DRAIN
;
1035 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1036 /* Note: Only uses first TLB reg currently */
1038 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
1039 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
1041 /* Make sure hardware complete it */
1042 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
1043 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
1045 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1047 /* check IOTLB invalidation granularity */
1048 if (DMA_TLB_IAIG(val
) == 0)
1049 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
1050 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
1051 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1052 (unsigned long long)DMA_TLB_IIRG(type
),
1053 (unsigned long long)DMA_TLB_IAIG(val
));
1056 static struct device_domain_info
*iommu_support_dev_iotlb(
1057 struct dmar_domain
*domain
, int segment
, u8 bus
, u8 devfn
)
1060 unsigned long flags
;
1061 struct device_domain_info
*info
;
1062 struct intel_iommu
*iommu
= device_to_iommu(segment
, bus
, devfn
);
1064 if (!ecap_dev_iotlb_support(iommu
->ecap
))
1070 spin_lock_irqsave(&device_domain_lock
, flags
);
1071 list_for_each_entry(info
, &domain
->devices
, link
)
1072 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1076 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1078 if (!found
|| !info
->dev
)
1081 if (!pci_find_ext_capability(info
->dev
, PCI_EXT_CAP_ID_ATS
))
1084 if (!dmar_find_matched_atsr_unit(info
->dev
))
1087 info
->iommu
= iommu
;
1092 static void iommu_enable_dev_iotlb(struct device_domain_info
*info
)
1097 pci_enable_ats(info
->dev
, VTD_PAGE_SHIFT
);
1100 static void iommu_disable_dev_iotlb(struct device_domain_info
*info
)
1102 if (!info
->dev
|| !pci_ats_enabled(info
->dev
))
1105 pci_disable_ats(info
->dev
);
1108 static void iommu_flush_dev_iotlb(struct dmar_domain
*domain
,
1109 u64 addr
, unsigned mask
)
1112 unsigned long flags
;
1113 struct device_domain_info
*info
;
1115 spin_lock_irqsave(&device_domain_lock
, flags
);
1116 list_for_each_entry(info
, &domain
->devices
, link
) {
1117 if (!info
->dev
|| !pci_ats_enabled(info
->dev
))
1120 sid
= info
->bus
<< 8 | info
->devfn
;
1121 qdep
= pci_ats_queue_depth(info
->dev
);
1122 qi_flush_dev_iotlb(info
->iommu
, sid
, qdep
, addr
, mask
);
1124 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1127 static void iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
1128 unsigned long pfn
, unsigned int pages
, int map
)
1130 unsigned int mask
= ilog2(__roundup_pow_of_two(pages
));
1131 uint64_t addr
= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
1136 * Fallback to domain selective flush if no PSI support or the size is
1138 * PSI requires page size to be 2 ^ x, and the base address is naturally
1139 * aligned to the size
1141 if (!cap_pgsel_inv(iommu
->cap
) || mask
> cap_max_amask_val(iommu
->cap
))
1142 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1145 iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
1149 * In caching mode, changes of pages from non-present to present require
1150 * flush. However, device IOTLB doesn't need to be flushed in this case.
1152 if (!cap_caching_mode(iommu
->cap
) || !map
)
1153 iommu_flush_dev_iotlb(iommu
->domains
[did
], addr
, mask
);
1156 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
1159 unsigned long flags
;
1161 spin_lock_irqsave(&iommu
->register_lock
, flags
);
1162 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
1163 pmen
&= ~DMA_PMEN_EPM
;
1164 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
1166 /* wait for the protected region status bit to clear */
1167 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
1168 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
1170 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1173 static int iommu_enable_translation(struct intel_iommu
*iommu
)
1176 unsigned long flags
;
1178 spin_lock_irqsave(&iommu
->register_lock
, flags
);
1179 iommu
->gcmd
|= DMA_GCMD_TE
;
1180 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1182 /* Make sure hardware complete it */
1183 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1184 readl
, (sts
& DMA_GSTS_TES
), sts
);
1186 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1190 static int iommu_disable_translation(struct intel_iommu
*iommu
)
1195 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1196 iommu
->gcmd
&= ~DMA_GCMD_TE
;
1197 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1199 /* Make sure hardware complete it */
1200 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1201 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
1203 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1208 static int iommu_init_domains(struct intel_iommu
*iommu
)
1210 unsigned long ndomains
;
1211 unsigned long nlongs
;
1213 ndomains
= cap_ndoms(iommu
->cap
);
1214 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu
->seq_id
,
1216 nlongs
= BITS_TO_LONGS(ndomains
);
1218 spin_lock_init(&iommu
->lock
);
1220 /* TBD: there might be 64K domains,
1221 * consider other allocation for future chip
1223 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1224 if (!iommu
->domain_ids
) {
1225 printk(KERN_ERR
"Allocating domain id array failed\n");
1228 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1230 if (!iommu
->domains
) {
1231 printk(KERN_ERR
"Allocating domain array failed\n");
1236 * if Caching mode is set, then invalid translations are tagged
1237 * with domainid 0. Hence we need to pre-allocate it.
1239 if (cap_caching_mode(iommu
->cap
))
1240 set_bit(0, iommu
->domain_ids
);
1245 static void domain_exit(struct dmar_domain
*domain
);
1246 static void vm_domain_exit(struct dmar_domain
*domain
);
1248 void free_dmar_iommu(struct intel_iommu
*iommu
)
1250 struct dmar_domain
*domain
;
1252 unsigned long flags
;
1254 if ((iommu
->domains
) && (iommu
->domain_ids
)) {
1255 for_each_set_bit(i
, iommu
->domain_ids
, cap_ndoms(iommu
->cap
)) {
1256 domain
= iommu
->domains
[i
];
1257 clear_bit(i
, iommu
->domain_ids
);
1259 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1260 if (--domain
->iommu_count
== 0) {
1261 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
)
1262 vm_domain_exit(domain
);
1264 domain_exit(domain
);
1266 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1270 if (iommu
->gcmd
& DMA_GCMD_TE
)
1271 iommu_disable_translation(iommu
);
1274 irq_set_handler_data(iommu
->irq
, NULL
);
1275 /* This will mask the irq */
1276 free_irq(iommu
->irq
, iommu
);
1277 destroy_irq(iommu
->irq
);
1280 kfree(iommu
->domains
);
1281 kfree(iommu
->domain_ids
);
1283 g_iommus
[iommu
->seq_id
] = NULL
;
1285 /* if all iommus are freed, free g_iommus */
1286 for (i
= 0; i
< g_num_of_iommus
; i
++) {
1291 if (i
== g_num_of_iommus
)
1294 /* free context mapping */
1295 free_context_table(iommu
);
1298 static struct dmar_domain
*alloc_domain(void)
1300 struct dmar_domain
*domain
;
1302 domain
= alloc_domain_mem();
1307 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
1313 static int iommu_attach_domain(struct dmar_domain
*domain
,
1314 struct intel_iommu
*iommu
)
1317 unsigned long ndomains
;
1318 unsigned long flags
;
1320 ndomains
= cap_ndoms(iommu
->cap
);
1322 spin_lock_irqsave(&iommu
->lock
, flags
);
1324 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1325 if (num
>= ndomains
) {
1326 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1327 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1332 set_bit(num
, iommu
->domain_ids
);
1333 set_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1334 iommu
->domains
[num
] = domain
;
1335 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1340 static void iommu_detach_domain(struct dmar_domain
*domain
,
1341 struct intel_iommu
*iommu
)
1343 unsigned long flags
;
1347 spin_lock_irqsave(&iommu
->lock
, flags
);
1348 ndomains
= cap_ndoms(iommu
->cap
);
1349 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1350 if (iommu
->domains
[num
] == domain
) {
1357 clear_bit(num
, iommu
->domain_ids
);
1358 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1359 iommu
->domains
[num
] = NULL
;
1361 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1364 static struct iova_domain reserved_iova_list
;
1365 static struct lock_class_key reserved_rbtree_key
;
1367 static int dmar_init_reserved_ranges(void)
1369 struct pci_dev
*pdev
= NULL
;
1373 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1375 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1376 &reserved_rbtree_key
);
1378 /* IOAPIC ranges shouldn't be accessed by DMA */
1379 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1380 IOVA_PFN(IOAPIC_RANGE_END
));
1382 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1386 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1387 for_each_pci_dev(pdev
) {
1390 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1391 r
= &pdev
->resource
[i
];
1392 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1394 iova
= reserve_iova(&reserved_iova_list
,
1398 printk(KERN_ERR
"Reserve iova failed\n");
1406 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1408 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1411 static inline int guestwidth_to_adjustwidth(int gaw
)
1414 int r
= (gaw
- 12) % 9;
1425 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1427 struct intel_iommu
*iommu
;
1428 int adjust_width
, agaw
;
1429 unsigned long sagaw
;
1431 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1432 spin_lock_init(&domain
->iommu_lock
);
1434 domain_reserve_special_ranges(domain
);
1436 /* calculate AGAW */
1437 iommu
= domain_get_iommu(domain
);
1438 if (guest_width
> cap_mgaw(iommu
->cap
))
1439 guest_width
= cap_mgaw(iommu
->cap
);
1440 domain
->gaw
= guest_width
;
1441 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1442 agaw
= width_to_agaw(adjust_width
);
1443 sagaw
= cap_sagaw(iommu
->cap
);
1444 if (!test_bit(agaw
, &sagaw
)) {
1445 /* hardware doesn't support it, choose a bigger one */
1446 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1447 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1451 domain
->agaw
= agaw
;
1452 INIT_LIST_HEAD(&domain
->devices
);
1454 if (ecap_coherent(iommu
->ecap
))
1455 domain
->iommu_coherency
= 1;
1457 domain
->iommu_coherency
= 0;
1459 if (ecap_sc_support(iommu
->ecap
))
1460 domain
->iommu_snooping
= 1;
1462 domain
->iommu_snooping
= 0;
1464 domain
->iommu_superpage
= fls(cap_super_page_val(iommu
->cap
));
1465 domain
->iommu_count
= 1;
1466 domain
->nid
= iommu
->node
;
1468 /* always allocate the top pgd */
1469 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
1472 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1476 static void domain_exit(struct dmar_domain
*domain
)
1478 struct dmar_drhd_unit
*drhd
;
1479 struct intel_iommu
*iommu
;
1481 /* Domain 0 is reserved, so dont process it */
1485 /* Flush any lazy unmaps that may reference this domain */
1486 if (!intel_iommu_strict
)
1487 flush_unmaps_timeout(0);
1489 domain_remove_dev_info(domain
);
1491 put_iova_domain(&domain
->iovad
);
1494 dma_pte_clear_range(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1496 /* free page tables */
1497 dma_pte_free_pagetable(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1499 for_each_active_iommu(iommu
, drhd
)
1500 if (test_bit(iommu
->seq_id
, &domain
->iommu_bmp
))
1501 iommu_detach_domain(domain
, iommu
);
1503 free_domain_mem(domain
);
1506 static int domain_context_mapping_one(struct dmar_domain
*domain
, int segment
,
1507 u8 bus
, u8 devfn
, int translation
)
1509 struct context_entry
*context
;
1510 unsigned long flags
;
1511 struct intel_iommu
*iommu
;
1512 struct dma_pte
*pgd
;
1514 unsigned long ndomains
;
1517 struct device_domain_info
*info
= NULL
;
1519 pr_debug("Set context mapping for %02x:%02x.%d\n",
1520 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1522 BUG_ON(!domain
->pgd
);
1523 BUG_ON(translation
!= CONTEXT_TT_PASS_THROUGH
&&
1524 translation
!= CONTEXT_TT_MULTI_LEVEL
);
1526 iommu
= device_to_iommu(segment
, bus
, devfn
);
1530 context
= device_to_context_entry(iommu
, bus
, devfn
);
1533 spin_lock_irqsave(&iommu
->lock
, flags
);
1534 if (context_present(context
)) {
1535 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1542 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
1543 domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) {
1546 /* find an available domain id for this device in iommu */
1547 ndomains
= cap_ndoms(iommu
->cap
);
1548 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1549 if (iommu
->domains
[num
] == domain
) {
1557 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1558 if (num
>= ndomains
) {
1559 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1560 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1564 set_bit(num
, iommu
->domain_ids
);
1565 iommu
->domains
[num
] = domain
;
1569 /* Skip top levels of page tables for
1570 * iommu which has less agaw than default.
1571 * Unnecessary for PT mode.
1573 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1574 for (agaw
= domain
->agaw
; agaw
!= iommu
->agaw
; agaw
--) {
1575 pgd
= phys_to_virt(dma_pte_addr(pgd
));
1576 if (!dma_pte_present(pgd
)) {
1577 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1584 context_set_domain_id(context
, id
);
1586 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1587 info
= iommu_support_dev_iotlb(domain
, segment
, bus
, devfn
);
1588 translation
= info
? CONTEXT_TT_DEV_IOTLB
:
1589 CONTEXT_TT_MULTI_LEVEL
;
1592 * In pass through mode, AW must be programmed to indicate the largest
1593 * AGAW value supported by hardware. And ASR is ignored by hardware.
1595 if (unlikely(translation
== CONTEXT_TT_PASS_THROUGH
))
1596 context_set_address_width(context
, iommu
->msagaw
);
1598 context_set_address_root(context
, virt_to_phys(pgd
));
1599 context_set_address_width(context
, iommu
->agaw
);
1602 context_set_translation_type(context
, translation
);
1603 context_set_fault_enable(context
);
1604 context_set_present(context
);
1605 domain_flush_cache(domain
, context
, sizeof(*context
));
1608 * It's a non-present to present mapping. If hardware doesn't cache
1609 * non-present entry we only need to flush the write-buffer. If the
1610 * _does_ cache non-present entries, then it does so in the special
1611 * domain #0, which we have to flush:
1613 if (cap_caching_mode(iommu
->cap
)) {
1614 iommu
->flush
.flush_context(iommu
, 0,
1615 (((u16
)bus
) << 8) | devfn
,
1616 DMA_CCMD_MASK_NOBIT
,
1617 DMA_CCMD_DEVICE_INVL
);
1618 iommu
->flush
.flush_iotlb(iommu
, domain
->id
, 0, 0, DMA_TLB_DSI_FLUSH
);
1620 iommu_flush_write_buffer(iommu
);
1622 iommu_enable_dev_iotlb(info
);
1623 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1625 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1626 if (!test_and_set_bit(iommu
->seq_id
, &domain
->iommu_bmp
)) {
1627 domain
->iommu_count
++;
1628 if (domain
->iommu_count
== 1)
1629 domain
->nid
= iommu
->node
;
1630 domain_update_iommu_cap(domain
);
1632 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1637 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
,
1641 struct pci_dev
*tmp
, *parent
;
1643 ret
= domain_context_mapping_one(domain
, pci_domain_nr(pdev
->bus
),
1644 pdev
->bus
->number
, pdev
->devfn
,
1649 /* dependent device mapping */
1650 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1653 /* Secondary interface's bus number and devfn 0 */
1654 parent
= pdev
->bus
->self
;
1655 while (parent
!= tmp
) {
1656 ret
= domain_context_mapping_one(domain
,
1657 pci_domain_nr(parent
->bus
),
1658 parent
->bus
->number
,
1659 parent
->devfn
, translation
);
1662 parent
= parent
->bus
->self
;
1664 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
1665 return domain_context_mapping_one(domain
,
1666 pci_domain_nr(tmp
->subordinate
),
1667 tmp
->subordinate
->number
, 0,
1669 else /* this is a legacy PCI bridge */
1670 return domain_context_mapping_one(domain
,
1671 pci_domain_nr(tmp
->bus
),
1677 static int domain_context_mapped(struct pci_dev
*pdev
)
1680 struct pci_dev
*tmp
, *parent
;
1681 struct intel_iommu
*iommu
;
1683 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
1688 ret
= device_context_mapped(iommu
, pdev
->bus
->number
, pdev
->devfn
);
1691 /* dependent device mapping */
1692 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1695 /* Secondary interface's bus number and devfn 0 */
1696 parent
= pdev
->bus
->self
;
1697 while (parent
!= tmp
) {
1698 ret
= device_context_mapped(iommu
, parent
->bus
->number
,
1702 parent
= parent
->bus
->self
;
1704 if (pci_is_pcie(tmp
))
1705 return device_context_mapped(iommu
, tmp
->subordinate
->number
,
1708 return device_context_mapped(iommu
, tmp
->bus
->number
,
1712 /* Returns a number of VTD pages, but aligned to MM page size */
1713 static inline unsigned long aligned_nrpages(unsigned long host_addr
,
1716 host_addr
&= ~PAGE_MASK
;
1717 return PAGE_ALIGN(host_addr
+ size
) >> VTD_PAGE_SHIFT
;
1720 /* Return largest possible superpage level for a given mapping */
1721 static inline int hardware_largepage_caps(struct dmar_domain
*domain
,
1722 unsigned long iov_pfn
,
1723 unsigned long phy_pfn
,
1724 unsigned long pages
)
1726 int support
, level
= 1;
1727 unsigned long pfnmerge
;
1729 support
= domain
->iommu_superpage
;
1731 /* To use a large page, the virtual *and* physical addresses
1732 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1733 of them will mean we have to use smaller pages. So just
1734 merge them and check both at once. */
1735 pfnmerge
= iov_pfn
| phy_pfn
;
1737 while (support
&& !(pfnmerge
& ~VTD_STRIDE_MASK
)) {
1738 pages
>>= VTD_STRIDE_SHIFT
;
1741 pfnmerge
>>= VTD_STRIDE_SHIFT
;
1748 static int __domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1749 struct scatterlist
*sg
, unsigned long phys_pfn
,
1750 unsigned long nr_pages
, int prot
)
1752 struct dma_pte
*first_pte
= NULL
, *pte
= NULL
;
1753 phys_addr_t
uninitialized_var(pteval
);
1754 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
1755 unsigned long sg_res
;
1756 unsigned int largepage_lvl
= 0;
1757 unsigned long lvl_pages
= 0;
1759 BUG_ON(addr_width
< BITS_PER_LONG
&& (iov_pfn
+ nr_pages
- 1) >> addr_width
);
1761 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1764 prot
&= DMA_PTE_READ
| DMA_PTE_WRITE
| DMA_PTE_SNP
;
1769 sg_res
= nr_pages
+ 1;
1770 pteval
= ((phys_addr_t
)phys_pfn
<< VTD_PAGE_SHIFT
) | prot
;
1773 while (nr_pages
> 0) {
1777 sg_res
= aligned_nrpages(sg
->offset
, sg
->length
);
1778 sg
->dma_address
= ((dma_addr_t
)iov_pfn
<< VTD_PAGE_SHIFT
) + sg
->offset
;
1779 sg
->dma_length
= sg
->length
;
1780 pteval
= page_to_phys(sg_page(sg
)) | prot
;
1781 phys_pfn
= pteval
>> VTD_PAGE_SHIFT
;
1785 largepage_lvl
= hardware_largepage_caps(domain
, iov_pfn
, phys_pfn
, sg_res
);
1787 first_pte
= pte
= pfn_to_dma_pte(domain
, iov_pfn
, largepage_lvl
);
1790 /* It is large page*/
1791 if (largepage_lvl
> 1)
1792 pteval
|= DMA_PTE_LARGE_PAGE
;
1794 pteval
&= ~(uint64_t)DMA_PTE_LARGE_PAGE
;
1797 /* We don't need lock here, nobody else
1798 * touches the iova range
1800 tmp
= cmpxchg64_local(&pte
->val
, 0ULL, pteval
);
1802 static int dumps
= 5;
1803 printk(KERN_CRIT
"ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1804 iov_pfn
, tmp
, (unsigned long long)pteval
);
1807 debug_dma_dump_mappings(NULL
);
1812 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
1814 BUG_ON(nr_pages
< lvl_pages
);
1815 BUG_ON(sg_res
< lvl_pages
);
1817 nr_pages
-= lvl_pages
;
1818 iov_pfn
+= lvl_pages
;
1819 phys_pfn
+= lvl_pages
;
1820 pteval
+= lvl_pages
* VTD_PAGE_SIZE
;
1821 sg_res
-= lvl_pages
;
1823 /* If the next PTE would be the first in a new page, then we
1824 need to flush the cache on the entries we've just written.
1825 And then we'll need to recalculate 'pte', so clear it and
1826 let it get set again in the if (!pte) block above.
1828 If we're done (!nr_pages) we need to flush the cache too.
1830 Also if we've been setting superpages, we may need to
1831 recalculate 'pte' and switch back to smaller pages for the
1832 end of the mapping, if the trailing size is not enough to
1833 use another superpage (i.e. sg_res < lvl_pages). */
1835 if (!nr_pages
|| first_pte_in_page(pte
) ||
1836 (largepage_lvl
> 1 && sg_res
< lvl_pages
)) {
1837 domain_flush_cache(domain
, first_pte
,
1838 (void *)pte
- (void *)first_pte
);
1842 if (!sg_res
&& nr_pages
)
1848 static inline int domain_sg_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1849 struct scatterlist
*sg
, unsigned long nr_pages
,
1852 return __domain_mapping(domain
, iov_pfn
, sg
, 0, nr_pages
, prot
);
1855 static inline int domain_pfn_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1856 unsigned long phys_pfn
, unsigned long nr_pages
,
1859 return __domain_mapping(domain
, iov_pfn
, NULL
, phys_pfn
, nr_pages
, prot
);
1862 static void iommu_detach_dev(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
1867 clear_context_table(iommu
, bus
, devfn
);
1868 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
1869 DMA_CCMD_GLOBAL_INVL
);
1870 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
1873 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1875 struct device_domain_info
*info
;
1876 unsigned long flags
;
1877 struct intel_iommu
*iommu
;
1879 spin_lock_irqsave(&device_domain_lock
, flags
);
1880 while (!list_empty(&domain
->devices
)) {
1881 info
= list_entry(domain
->devices
.next
,
1882 struct device_domain_info
, link
);
1883 list_del(&info
->link
);
1884 list_del(&info
->global
);
1886 info
->dev
->dev
.archdata
.iommu
= NULL
;
1887 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1889 iommu_disable_dev_iotlb(info
);
1890 iommu
= device_to_iommu(info
->segment
, info
->bus
, info
->devfn
);
1891 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
1892 free_devinfo_mem(info
);
1894 spin_lock_irqsave(&device_domain_lock
, flags
);
1896 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1901 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1903 static struct dmar_domain
*
1904 find_domain(struct pci_dev
*pdev
)
1906 struct device_domain_info
*info
;
1908 /* No lock here, assumes no domain exit in normal case */
1909 info
= pdev
->dev
.archdata
.iommu
;
1911 return info
->domain
;
1915 /* domain is initialized */
1916 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1918 struct dmar_domain
*domain
, *found
= NULL
;
1919 struct intel_iommu
*iommu
;
1920 struct dmar_drhd_unit
*drhd
;
1921 struct device_domain_info
*info
, *tmp
;
1922 struct pci_dev
*dev_tmp
;
1923 unsigned long flags
;
1924 int bus
= 0, devfn
= 0;
1928 domain
= find_domain(pdev
);
1932 segment
= pci_domain_nr(pdev
->bus
);
1934 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1936 if (pci_is_pcie(dev_tmp
)) {
1937 bus
= dev_tmp
->subordinate
->number
;
1940 bus
= dev_tmp
->bus
->number
;
1941 devfn
= dev_tmp
->devfn
;
1943 spin_lock_irqsave(&device_domain_lock
, flags
);
1944 list_for_each_entry(info
, &device_domain_list
, global
) {
1945 if (info
->segment
== segment
&&
1946 info
->bus
== bus
&& info
->devfn
== devfn
) {
1947 found
= info
->domain
;
1951 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1952 /* pcie-pci bridge already has a domain, uses it */
1959 domain
= alloc_domain();
1963 /* Allocate new domain for the device */
1964 drhd
= dmar_find_matched_drhd_unit(pdev
);
1966 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1970 iommu
= drhd
->iommu
;
1972 ret
= iommu_attach_domain(domain
, iommu
);
1974 free_domain_mem(domain
);
1978 if (domain_init(domain
, gaw
)) {
1979 domain_exit(domain
);
1983 /* register pcie-to-pci device */
1985 info
= alloc_devinfo_mem();
1987 domain_exit(domain
);
1990 info
->segment
= segment
;
1992 info
->devfn
= devfn
;
1994 info
->domain
= domain
;
1995 /* This domain is shared by devices under p2p bridge */
1996 domain
->flags
|= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES
;
1998 /* pcie-to-pci bridge already has a domain, uses it */
2000 spin_lock_irqsave(&device_domain_lock
, flags
);
2001 list_for_each_entry(tmp
, &device_domain_list
, global
) {
2002 if (tmp
->segment
== segment
&&
2003 tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
2004 found
= tmp
->domain
;
2009 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2010 free_devinfo_mem(info
);
2011 domain_exit(domain
);
2014 list_add(&info
->link
, &domain
->devices
);
2015 list_add(&info
->global
, &device_domain_list
);
2016 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2021 info
= alloc_devinfo_mem();
2024 info
->segment
= segment
;
2025 info
->bus
= pdev
->bus
->number
;
2026 info
->devfn
= pdev
->devfn
;
2028 info
->domain
= domain
;
2029 spin_lock_irqsave(&device_domain_lock
, flags
);
2030 /* somebody is fast */
2031 found
= find_domain(pdev
);
2032 if (found
!= NULL
) {
2033 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2034 if (found
!= domain
) {
2035 domain_exit(domain
);
2038 free_devinfo_mem(info
);
2041 list_add(&info
->link
, &domain
->devices
);
2042 list_add(&info
->global
, &device_domain_list
);
2043 pdev
->dev
.archdata
.iommu
= info
;
2044 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2047 /* recheck it here, maybe others set it */
2048 return find_domain(pdev
);
2051 static int iommu_identity_mapping
;
2052 #define IDENTMAP_ALL 1
2053 #define IDENTMAP_GFX 2
2054 #define IDENTMAP_AZALIA 4
2056 static int iommu_domain_identity_map(struct dmar_domain
*domain
,
2057 unsigned long long start
,
2058 unsigned long long end
)
2060 unsigned long first_vpfn
= start
>> VTD_PAGE_SHIFT
;
2061 unsigned long last_vpfn
= end
>> VTD_PAGE_SHIFT
;
2063 if (!reserve_iova(&domain
->iovad
, dma_to_mm_pfn(first_vpfn
),
2064 dma_to_mm_pfn(last_vpfn
))) {
2065 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
2069 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2070 start
, end
, domain
->id
);
2072 * RMRR range might have overlap with physical memory range,
2075 dma_pte_clear_range(domain
, first_vpfn
, last_vpfn
);
2077 return domain_pfn_mapping(domain
, first_vpfn
, first_vpfn
,
2078 last_vpfn
- first_vpfn
+ 1,
2079 DMA_PTE_READ
|DMA_PTE_WRITE
);
2082 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
2083 unsigned long long start
,
2084 unsigned long long end
)
2086 struct dmar_domain
*domain
;
2089 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2093 /* For _hardware_ passthrough, don't bother. But for software
2094 passthrough, we do it anyway -- it may indicate a memory
2095 range which is reserved in E820, so which didn't get set
2096 up to start with in si_domain */
2097 if (domain
== si_domain
&& hw_pass_through
) {
2098 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2099 pci_name(pdev
), start
, end
);
2104 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2105 pci_name(pdev
), start
, end
);
2108 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2109 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2110 dmi_get_system_info(DMI_BIOS_VENDOR
),
2111 dmi_get_system_info(DMI_BIOS_VERSION
),
2112 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2117 if (end
>> agaw_to_width(domain
->agaw
)) {
2118 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2119 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2120 agaw_to_width(domain
->agaw
),
2121 dmi_get_system_info(DMI_BIOS_VENDOR
),
2122 dmi_get_system_info(DMI_BIOS_VERSION
),
2123 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2128 ret
= iommu_domain_identity_map(domain
, start
, end
);
2132 /* context entry init */
2133 ret
= domain_context_mapping(domain
, pdev
, CONTEXT_TT_MULTI_LEVEL
);
2140 domain_exit(domain
);
2144 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
2145 struct pci_dev
*pdev
)
2147 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2149 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
2153 #ifdef CONFIG_DMAR_FLOPPY_WA
2154 static inline void iommu_prepare_isa(void)
2156 struct pci_dev
*pdev
;
2159 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
2163 printk(KERN_INFO
"IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2164 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024 - 1);
2167 printk(KERN_ERR
"IOMMU: Failed to create 0-16MiB identity map; "
2168 "floppy might not work\n");
2172 static inline void iommu_prepare_isa(void)
2176 #endif /* !CONFIG_DMAR_FLPY_WA */
2178 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
);
2180 static int __init
si_domain_work_fn(unsigned long start_pfn
,
2181 unsigned long end_pfn
, void *datax
)
2185 *ret
= iommu_domain_identity_map(si_domain
,
2186 (uint64_t)start_pfn
<< PAGE_SHIFT
,
2187 (uint64_t)end_pfn
<< PAGE_SHIFT
);
2192 static int __init
si_domain_init(int hw
)
2194 struct dmar_drhd_unit
*drhd
;
2195 struct intel_iommu
*iommu
;
2198 si_domain
= alloc_domain();
2202 pr_debug("Identity mapping domain is domain %d\n", si_domain
->id
);
2204 for_each_active_iommu(iommu
, drhd
) {
2205 ret
= iommu_attach_domain(si_domain
, iommu
);
2207 domain_exit(si_domain
);
2212 if (md_domain_init(si_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2213 domain_exit(si_domain
);
2217 si_domain
->flags
= DOMAIN_FLAG_STATIC_IDENTITY
;
2222 for_each_online_node(nid
) {
2223 work_with_active_regions(nid
, si_domain_work_fn
, &ret
);
2231 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
2232 struct pci_dev
*pdev
);
2233 static int identity_mapping(struct pci_dev
*pdev
)
2235 struct device_domain_info
*info
;
2237 if (likely(!iommu_identity_mapping
))
2240 info
= pdev
->dev
.archdata
.iommu
;
2241 if (info
&& info
!= DUMMY_DEVICE_DOMAIN_INFO
)
2242 return (info
->domain
== si_domain
);
2247 static int domain_add_dev_info(struct dmar_domain
*domain
,
2248 struct pci_dev
*pdev
,
2251 struct device_domain_info
*info
;
2252 unsigned long flags
;
2255 info
= alloc_devinfo_mem();
2259 ret
= domain_context_mapping(domain
, pdev
, translation
);
2261 free_devinfo_mem(info
);
2265 info
->segment
= pci_domain_nr(pdev
->bus
);
2266 info
->bus
= pdev
->bus
->number
;
2267 info
->devfn
= pdev
->devfn
;
2269 info
->domain
= domain
;
2271 spin_lock_irqsave(&device_domain_lock
, flags
);
2272 list_add(&info
->link
, &domain
->devices
);
2273 list_add(&info
->global
, &device_domain_list
);
2274 pdev
->dev
.archdata
.iommu
= info
;
2275 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2280 static int iommu_should_identity_map(struct pci_dev
*pdev
, int startup
)
2282 if ((iommu_identity_mapping
& IDENTMAP_AZALIA
) && IS_AZALIA(pdev
))
2285 if ((iommu_identity_mapping
& IDENTMAP_GFX
) && IS_GFX_DEVICE(pdev
))
2288 if (!(iommu_identity_mapping
& IDENTMAP_ALL
))
2292 * We want to start off with all devices in the 1:1 domain, and
2293 * take them out later if we find they can't access all of memory.
2295 * However, we can't do this for PCI devices behind bridges,
2296 * because all PCI devices behind the same bridge will end up
2297 * with the same source-id on their transactions.
2299 * Practically speaking, we can't change things around for these
2300 * devices at run-time, because we can't be sure there'll be no
2301 * DMA transactions in flight for any of their siblings.
2303 * So PCI devices (unless they're on the root bus) as well as
2304 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2305 * the 1:1 domain, just in _case_ one of their siblings turns out
2306 * not to be able to map all of memory.
2308 if (!pci_is_pcie(pdev
)) {
2309 if (!pci_is_root_bus(pdev
->bus
))
2311 if (pdev
->class >> 8 == PCI_CLASS_BRIDGE_PCI
)
2313 } else if (pdev
->pcie_type
== PCI_EXP_TYPE_PCI_BRIDGE
)
2317 * At boot time, we don't yet know if devices will be 64-bit capable.
2318 * Assume that they will -- if they turn out not to be, then we can
2319 * take them out of the 1:1 domain later.
2323 * If the device's dma_mask is less than the system's memory
2324 * size then this is not a candidate for identity mapping.
2326 u64 dma_mask
= pdev
->dma_mask
;
2328 if (pdev
->dev
.coherent_dma_mask
&&
2329 pdev
->dev
.coherent_dma_mask
< dma_mask
)
2330 dma_mask
= pdev
->dev
.coherent_dma_mask
;
2332 return dma_mask
>= dma_get_required_mask(&pdev
->dev
);
2338 static int __init
iommu_prepare_static_identity_mapping(int hw
)
2340 struct pci_dev
*pdev
= NULL
;
2343 ret
= si_domain_init(hw
);
2347 for_each_pci_dev(pdev
) {
2348 /* Skip Host/PCI Bridge devices */
2349 if (IS_BRIDGE_HOST_DEVICE(pdev
))
2351 if (iommu_should_identity_map(pdev
, 1)) {
2352 printk(KERN_INFO
"IOMMU: %s identity mapping for device %s\n",
2353 hw
? "hardware" : "software", pci_name(pdev
));
2355 ret
= domain_add_dev_info(si_domain
, pdev
,
2356 hw
? CONTEXT_TT_PASS_THROUGH
:
2357 CONTEXT_TT_MULTI_LEVEL
);
2366 static int __init
init_dmars(void)
2368 struct dmar_drhd_unit
*drhd
;
2369 struct dmar_rmrr_unit
*rmrr
;
2370 struct pci_dev
*pdev
;
2371 struct intel_iommu
*iommu
;
2377 * initialize and program root entry to not present
2380 for_each_drhd_unit(drhd
) {
2383 * lock not needed as this is only incremented in the single
2384 * threaded kernel __init code path all other access are read
2389 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
2392 printk(KERN_ERR
"Allocating global iommu array failed\n");
2397 deferred_flush
= kzalloc(g_num_of_iommus
*
2398 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
2399 if (!deferred_flush
) {
2404 for_each_drhd_unit(drhd
) {
2408 iommu
= drhd
->iommu
;
2409 g_iommus
[iommu
->seq_id
] = iommu
;
2411 ret
= iommu_init_domains(iommu
);
2417 * we could share the same root & context tables
2418 * among all IOMMU's. Need to Split it later.
2420 ret
= iommu_alloc_root_entry(iommu
);
2422 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
2425 if (!ecap_pass_through(iommu
->ecap
))
2426 hw_pass_through
= 0;
2430 * Start from the sane iommu hardware state.
2432 for_each_drhd_unit(drhd
) {
2436 iommu
= drhd
->iommu
;
2439 * If the queued invalidation is already initialized by us
2440 * (for example, while enabling interrupt-remapping) then
2441 * we got the things already rolling from a sane state.
2447 * Clear any previous faults.
2449 dmar_fault(-1, iommu
);
2451 * Disable queued invalidation if supported and already enabled
2452 * before OS handover.
2454 dmar_disable_qi(iommu
);
2457 for_each_drhd_unit(drhd
) {
2461 iommu
= drhd
->iommu
;
2463 if (dmar_enable_qi(iommu
)) {
2465 * Queued Invalidate not enabled, use Register Based
2468 iommu
->flush
.flush_context
= __iommu_flush_context
;
2469 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
2470 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Register based "
2473 (unsigned long long)drhd
->reg_base_addr
);
2475 iommu
->flush
.flush_context
= qi_flush_context
;
2476 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2477 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Queued "
2480 (unsigned long long)drhd
->reg_base_addr
);
2484 if (iommu_pass_through
)
2485 iommu_identity_mapping
|= IDENTMAP_ALL
;
2487 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2488 iommu_identity_mapping
|= IDENTMAP_GFX
;
2491 check_tylersburg_isoch();
2494 * If pass through is not set or not enabled, setup context entries for
2495 * identity mappings for rmrr, gfx, and isa and may fall back to static
2496 * identity mapping if iommu_identity_mapping is set.
2498 if (iommu_identity_mapping
) {
2499 ret
= iommu_prepare_static_identity_mapping(hw_pass_through
);
2501 printk(KERN_CRIT
"Failed to setup IOMMU pass-through\n");
2507 * for each dev attached to rmrr
2509 * locate drhd for dev, alloc domain for dev
2510 * allocate free domain
2511 * allocate page table entries for rmrr
2512 * if context not allocated for bus
2513 * allocate and init context
2514 * set present in root table for this bus
2515 * init context with domain, translation etc
2519 printk(KERN_INFO
"IOMMU: Setting RMRR:\n");
2520 for_each_rmrr_units(rmrr
) {
2521 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
2522 pdev
= rmrr
->devices
[i
];
2524 * some BIOS lists non-exist devices in DMAR
2529 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
2532 "IOMMU: mapping reserved region failed\n");
2536 iommu_prepare_isa();
2541 * global invalidate context cache
2542 * global invalidate iotlb
2543 * enable translation
2545 for_each_drhd_unit(drhd
) {
2546 if (drhd
->ignored
) {
2548 * we always have to disable PMRs or DMA may fail on
2552 iommu_disable_protect_mem_regions(drhd
->iommu
);
2555 iommu
= drhd
->iommu
;
2557 iommu_flush_write_buffer(iommu
);
2559 ret
= dmar_set_interrupt(iommu
);
2563 iommu_set_root_entry(iommu
);
2565 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
2566 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
2568 ret
= iommu_enable_translation(iommu
);
2572 iommu_disable_protect_mem_regions(iommu
);
2577 for_each_drhd_unit(drhd
) {
2580 iommu
= drhd
->iommu
;
2587 /* This takes a number of _MM_ pages, not VTD pages */
2588 static struct iova
*intel_alloc_iova(struct device
*dev
,
2589 struct dmar_domain
*domain
,
2590 unsigned long nrpages
, uint64_t dma_mask
)
2592 struct pci_dev
*pdev
= to_pci_dev(dev
);
2593 struct iova
*iova
= NULL
;
2595 /* Restrict dma_mask to the width that the iommu can handle */
2596 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
), dma_mask
);
2598 if (!dmar_forcedac
&& dma_mask
> DMA_BIT_MASK(32)) {
2600 * First try to allocate an io virtual address in
2601 * DMA_BIT_MASK(32) and if that fails then try allocating
2604 iova
= alloc_iova(&domain
->iovad
, nrpages
,
2605 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2609 iova
= alloc_iova(&domain
->iovad
, nrpages
, IOVA_PFN(dma_mask
), 1);
2610 if (unlikely(!iova
)) {
2611 printk(KERN_ERR
"Allocating %ld-page iova for %s failed",
2612 nrpages
, pci_name(pdev
));
2619 static struct dmar_domain
*__get_valid_domain_for_dev(struct pci_dev
*pdev
)
2621 struct dmar_domain
*domain
;
2624 domain
= get_domain_for_dev(pdev
,
2625 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2628 "Allocating domain for %s failed", pci_name(pdev
));
2632 /* make sure context mapping is ok */
2633 if (unlikely(!domain_context_mapped(pdev
))) {
2634 ret
= domain_context_mapping(domain
, pdev
,
2635 CONTEXT_TT_MULTI_LEVEL
);
2638 "Domain context map for %s failed",
2647 static inline struct dmar_domain
*get_valid_domain_for_dev(struct pci_dev
*dev
)
2649 struct device_domain_info
*info
;
2651 /* No lock here, assumes no domain exit in normal case */
2652 info
= dev
->dev
.archdata
.iommu
;
2654 return info
->domain
;
2656 return __get_valid_domain_for_dev(dev
);
2659 static int iommu_dummy(struct pci_dev
*pdev
)
2661 return pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
;
2664 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2665 static int iommu_no_mapping(struct device
*dev
)
2667 struct pci_dev
*pdev
;
2670 if (unlikely(dev
->bus
!= &pci_bus_type
))
2673 pdev
= to_pci_dev(dev
);
2674 if (iommu_dummy(pdev
))
2677 if (!iommu_identity_mapping
)
2680 found
= identity_mapping(pdev
);
2682 if (iommu_should_identity_map(pdev
, 0))
2686 * 32 bit DMA is removed from si_domain and fall back
2687 * to non-identity mapping.
2689 domain_remove_one_dev_info(si_domain
, pdev
);
2690 printk(KERN_INFO
"32bit %s uses non-identity mapping\n",
2696 * In case of a detached 64 bit DMA device from vm, the device
2697 * is put into si_domain for identity mapping.
2699 if (iommu_should_identity_map(pdev
, 0)) {
2701 ret
= domain_add_dev_info(si_domain
, pdev
,
2703 CONTEXT_TT_PASS_THROUGH
:
2704 CONTEXT_TT_MULTI_LEVEL
);
2706 printk(KERN_INFO
"64bit %s uses identity mapping\n",
2716 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2717 size_t size
, int dir
, u64 dma_mask
)
2719 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2720 struct dmar_domain
*domain
;
2721 phys_addr_t start_paddr
;
2725 struct intel_iommu
*iommu
;
2726 unsigned long paddr_pfn
= paddr
>> PAGE_SHIFT
;
2728 BUG_ON(dir
== DMA_NONE
);
2730 if (iommu_no_mapping(hwdev
))
2733 domain
= get_valid_domain_for_dev(pdev
);
2737 iommu
= domain_get_iommu(domain
);
2738 size
= aligned_nrpages(paddr
, size
);
2740 iova
= intel_alloc_iova(hwdev
, domain
, dma_to_mm_pfn(size
), dma_mask
);
2745 * Check if DMAR supports zero-length reads on write only
2748 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2749 !cap_zlr(iommu
->cap
))
2750 prot
|= DMA_PTE_READ
;
2751 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2752 prot
|= DMA_PTE_WRITE
;
2754 * paddr - (paddr + size) might be partial page, we should map the whole
2755 * page. Note: if two part of one page are separately mapped, we
2756 * might have two guest_addr mapping to the same host paddr, but this
2757 * is not a big problem
2759 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova
->pfn_lo
),
2760 mm_to_dma_pfn(paddr_pfn
), size
, prot
);
2764 /* it's a non-present to present mapping. Only flush if caching mode */
2765 if (cap_caching_mode(iommu
->cap
))
2766 iommu_flush_iotlb_psi(iommu
, domain
->id
, mm_to_dma_pfn(iova
->pfn_lo
), size
, 1);
2768 iommu_flush_write_buffer(iommu
);
2770 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
2771 start_paddr
+= paddr
& ~PAGE_MASK
;
2776 __free_iova(&domain
->iovad
, iova
);
2777 printk(KERN_ERR
"Device %s request: %zx@%llx dir %d --- failed\n",
2778 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
2782 static dma_addr_t
intel_map_page(struct device
*dev
, struct page
*page
,
2783 unsigned long offset
, size_t size
,
2784 enum dma_data_direction dir
,
2785 struct dma_attrs
*attrs
)
2787 return __intel_map_single(dev
, page_to_phys(page
) + offset
, size
,
2788 dir
, to_pci_dev(dev
)->dma_mask
);
2791 static void flush_unmaps(void)
2797 /* just flush them all */
2798 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2799 struct intel_iommu
*iommu
= g_iommus
[i
];
2803 if (!deferred_flush
[i
].next
)
2806 /* In caching mode, global flushes turn emulation expensive */
2807 if (!cap_caching_mode(iommu
->cap
))
2808 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2809 DMA_TLB_GLOBAL_FLUSH
);
2810 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2812 struct iova
*iova
= deferred_flush
[i
].iova
[j
];
2813 struct dmar_domain
*domain
= deferred_flush
[i
].domain
[j
];
2815 /* On real hardware multiple invalidations are expensive */
2816 if (cap_caching_mode(iommu
->cap
))
2817 iommu_flush_iotlb_psi(iommu
, domain
->id
,
2818 iova
->pfn_lo
, iova
->pfn_hi
- iova
->pfn_lo
+ 1, 0);
2820 mask
= ilog2(mm_to_dma_pfn(iova
->pfn_hi
- iova
->pfn_lo
+ 1));
2821 iommu_flush_dev_iotlb(deferred_flush
[i
].domain
[j
],
2822 (uint64_t)iova
->pfn_lo
<< PAGE_SHIFT
, mask
);
2824 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
, iova
);
2826 deferred_flush
[i
].next
= 0;
2832 static void flush_unmaps_timeout(unsigned long data
)
2834 unsigned long flags
;
2836 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2838 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2841 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2843 unsigned long flags
;
2845 struct intel_iommu
*iommu
;
2847 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2848 if (list_size
== HIGH_WATER_MARK
)
2851 iommu
= domain_get_iommu(dom
);
2852 iommu_id
= iommu
->seq_id
;
2854 next
= deferred_flush
[iommu_id
].next
;
2855 deferred_flush
[iommu_id
].domain
[next
] = dom
;
2856 deferred_flush
[iommu_id
].iova
[next
] = iova
;
2857 deferred_flush
[iommu_id
].next
++;
2860 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
2864 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2867 static void intel_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
2868 size_t size
, enum dma_data_direction dir
,
2869 struct dma_attrs
*attrs
)
2871 struct pci_dev
*pdev
= to_pci_dev(dev
);
2872 struct dmar_domain
*domain
;
2873 unsigned long start_pfn
, last_pfn
;
2875 struct intel_iommu
*iommu
;
2877 if (iommu_no_mapping(dev
))
2880 domain
= find_domain(pdev
);
2883 iommu
= domain_get_iommu(domain
);
2885 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
2886 if (WARN_ONCE(!iova
, "Driver unmaps unmatched page at PFN %llx\n",
2887 (unsigned long long)dev_addr
))
2890 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
2891 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
2893 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2894 pci_name(pdev
), start_pfn
, last_pfn
);
2896 /* clear the whole page */
2897 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
2899 /* free page tables */
2900 dma_pte_free_pagetable(domain
, start_pfn
, last_pfn
);
2902 if (intel_iommu_strict
) {
2903 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
2904 last_pfn
- start_pfn
+ 1, 0);
2906 __free_iova(&domain
->iovad
, iova
);
2908 add_unmap(domain
, iova
);
2910 * queue up the release of the unmap to save the 1/6th of the
2911 * cpu used up by the iotlb flush operation...
2916 static void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
2917 dma_addr_t
*dma_handle
, gfp_t flags
)
2922 size
= PAGE_ALIGN(size
);
2923 order
= get_order(size
);
2925 if (!iommu_no_mapping(hwdev
))
2926 flags
&= ~(GFP_DMA
| GFP_DMA32
);
2927 else if (hwdev
->coherent_dma_mask
< dma_get_required_mask(hwdev
)) {
2928 if (hwdev
->coherent_dma_mask
< DMA_BIT_MASK(32))
2934 vaddr
= (void *)__get_free_pages(flags
, order
);
2937 memset(vaddr
, 0, size
);
2939 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
2941 hwdev
->coherent_dma_mask
);
2944 free_pages((unsigned long)vaddr
, order
);
2948 static void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
2949 dma_addr_t dma_handle
)
2953 size
= PAGE_ALIGN(size
);
2954 order
= get_order(size
);
2956 intel_unmap_page(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
, NULL
);
2957 free_pages((unsigned long)vaddr
, order
);
2960 static void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2961 int nelems
, enum dma_data_direction dir
,
2962 struct dma_attrs
*attrs
)
2964 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2965 struct dmar_domain
*domain
;
2966 unsigned long start_pfn
, last_pfn
;
2968 struct intel_iommu
*iommu
;
2970 if (iommu_no_mapping(hwdev
))
2973 domain
= find_domain(pdev
);
2976 iommu
= domain_get_iommu(domain
);
2978 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2979 if (WARN_ONCE(!iova
, "Driver unmaps unmatched sglist at PFN %llx\n",
2980 (unsigned long long)sglist
[0].dma_address
))
2983 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
2984 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
2986 /* clear the whole page */
2987 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
2989 /* free page tables */
2990 dma_pte_free_pagetable(domain
, start_pfn
, last_pfn
);
2992 if (intel_iommu_strict
) {
2993 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
2994 last_pfn
- start_pfn
+ 1, 0);
2996 __free_iova(&domain
->iovad
, iova
);
2998 add_unmap(domain
, iova
);
3000 * queue up the release of the unmap to save the 1/6th of the
3001 * cpu used up by the iotlb flush operation...
3006 static int intel_nontranslate_map_sg(struct device
*hddev
,
3007 struct scatterlist
*sglist
, int nelems
, int dir
)
3010 struct scatterlist
*sg
;
3012 for_each_sg(sglist
, sg
, nelems
, i
) {
3013 BUG_ON(!sg_page(sg
));
3014 sg
->dma_address
= page_to_phys(sg_page(sg
)) + sg
->offset
;
3015 sg
->dma_length
= sg
->length
;
3020 static int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
3021 enum dma_data_direction dir
, struct dma_attrs
*attrs
)
3024 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
3025 struct dmar_domain
*domain
;
3028 struct iova
*iova
= NULL
;
3030 struct scatterlist
*sg
;
3031 unsigned long start_vpfn
;
3032 struct intel_iommu
*iommu
;
3034 BUG_ON(dir
== DMA_NONE
);
3035 if (iommu_no_mapping(hwdev
))
3036 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
3038 domain
= get_valid_domain_for_dev(pdev
);
3042 iommu
= domain_get_iommu(domain
);
3044 for_each_sg(sglist
, sg
, nelems
, i
)
3045 size
+= aligned_nrpages(sg
->offset
, sg
->length
);
3047 iova
= intel_alloc_iova(hwdev
, domain
, dma_to_mm_pfn(size
),
3050 sglist
->dma_length
= 0;
3055 * Check if DMAR supports zero-length reads on write only
3058 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3059 !cap_zlr(iommu
->cap
))
3060 prot
|= DMA_PTE_READ
;
3061 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3062 prot
|= DMA_PTE_WRITE
;
3064 start_vpfn
= mm_to_dma_pfn(iova
->pfn_lo
);
3066 ret
= domain_sg_mapping(domain
, start_vpfn
, sglist
, size
, prot
);
3067 if (unlikely(ret
)) {
3068 /* clear the page */
3069 dma_pte_clear_range(domain
, start_vpfn
,
3070 start_vpfn
+ size
- 1);
3071 /* free page tables */
3072 dma_pte_free_pagetable(domain
, start_vpfn
,
3073 start_vpfn
+ size
- 1);
3075 __free_iova(&domain
->iovad
, iova
);
3079 /* it's a non-present to present mapping. Only flush if caching mode */
3080 if (cap_caching_mode(iommu
->cap
))
3081 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_vpfn
, size
, 1);
3083 iommu_flush_write_buffer(iommu
);
3088 static int intel_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
3093 struct dma_map_ops intel_dma_ops
= {
3094 .alloc_coherent
= intel_alloc_coherent
,
3095 .free_coherent
= intel_free_coherent
,
3096 .map_sg
= intel_map_sg
,
3097 .unmap_sg
= intel_unmap_sg
,
3098 .map_page
= intel_map_page
,
3099 .unmap_page
= intel_unmap_page
,
3100 .mapping_error
= intel_mapping_error
,
3103 static inline int iommu_domain_cache_init(void)
3107 iommu_domain_cache
= kmem_cache_create("iommu_domain",
3108 sizeof(struct dmar_domain
),
3113 if (!iommu_domain_cache
) {
3114 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
3121 static inline int iommu_devinfo_cache_init(void)
3125 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
3126 sizeof(struct device_domain_info
),
3130 if (!iommu_devinfo_cache
) {
3131 printk(KERN_ERR
"Couldn't create devinfo cache\n");
3138 static inline int iommu_iova_cache_init(void)
3142 iommu_iova_cache
= kmem_cache_create("iommu_iova",
3143 sizeof(struct iova
),
3147 if (!iommu_iova_cache
) {
3148 printk(KERN_ERR
"Couldn't create iova cache\n");
3155 static int __init
iommu_init_mempool(void)
3158 ret
= iommu_iova_cache_init();
3162 ret
= iommu_domain_cache_init();
3166 ret
= iommu_devinfo_cache_init();
3170 kmem_cache_destroy(iommu_domain_cache
);
3172 kmem_cache_destroy(iommu_iova_cache
);
3177 static void __init
iommu_exit_mempool(void)
3179 kmem_cache_destroy(iommu_devinfo_cache
);
3180 kmem_cache_destroy(iommu_domain_cache
);
3181 kmem_cache_destroy(iommu_iova_cache
);
3185 static void quirk_ioat_snb_local_iommu(struct pci_dev
*pdev
)
3187 struct dmar_drhd_unit
*drhd
;
3191 /* We know that this device on this chipset has its own IOMMU.
3192 * If we find it under a different IOMMU, then the BIOS is lying
3193 * to us. Hope that the IOMMU for this device is actually
3194 * disabled, and it needs no translation...
3196 rc
= pci_bus_read_config_dword(pdev
->bus
, PCI_DEVFN(0, 0), 0xb0, &vtbar
);
3198 /* "can't" happen */
3199 dev_info(&pdev
->dev
, "failed to run vt-d quirk\n");
3202 vtbar
&= 0xffff0000;
3204 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3205 drhd
= dmar_find_matched_drhd_unit(pdev
);
3206 if (WARN_TAINT_ONCE(!drhd
|| drhd
->reg_base_addr
- vtbar
!= 0xa000,
3207 TAINT_FIRMWARE_WORKAROUND
,
3208 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3209 pdev
->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3211 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_IOAT_SNB
, quirk_ioat_snb_local_iommu
);
3213 static void __init
init_no_remapping_devices(void)
3215 struct dmar_drhd_unit
*drhd
;
3217 for_each_drhd_unit(drhd
) {
3218 if (!drhd
->include_all
) {
3220 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
3221 if (drhd
->devices
[i
] != NULL
)
3223 /* ignore DMAR unit if no pci devices exist */
3224 if (i
== drhd
->devices_cnt
)
3232 for_each_drhd_unit(drhd
) {
3234 if (drhd
->ignored
|| drhd
->include_all
)
3237 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
3238 if (drhd
->devices
[i
] &&
3239 !IS_GFX_DEVICE(drhd
->devices
[i
]))
3242 if (i
< drhd
->devices_cnt
)
3245 /* bypass IOMMU if it is just for gfx devices */
3247 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
3248 if (!drhd
->devices
[i
])
3250 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3255 #ifdef CONFIG_SUSPEND
3256 static int init_iommu_hw(void)
3258 struct dmar_drhd_unit
*drhd
;
3259 struct intel_iommu
*iommu
= NULL
;
3261 for_each_active_iommu(iommu
, drhd
)
3263 dmar_reenable_qi(iommu
);
3265 for_each_iommu(iommu
, drhd
) {
3266 if (drhd
->ignored
) {
3268 * we always have to disable PMRs or DMA may fail on
3272 iommu_disable_protect_mem_regions(iommu
);
3276 iommu_flush_write_buffer(iommu
);
3278 iommu_set_root_entry(iommu
);
3280 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3281 DMA_CCMD_GLOBAL_INVL
);
3282 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3283 DMA_TLB_GLOBAL_FLUSH
);
3284 if (iommu_enable_translation(iommu
))
3286 iommu_disable_protect_mem_regions(iommu
);
3292 static void iommu_flush_all(void)
3294 struct dmar_drhd_unit
*drhd
;
3295 struct intel_iommu
*iommu
;
3297 for_each_active_iommu(iommu
, drhd
) {
3298 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3299 DMA_CCMD_GLOBAL_INVL
);
3300 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3301 DMA_TLB_GLOBAL_FLUSH
);
3305 static int iommu_suspend(void)
3307 struct dmar_drhd_unit
*drhd
;
3308 struct intel_iommu
*iommu
= NULL
;
3311 for_each_active_iommu(iommu
, drhd
) {
3312 iommu
->iommu_state
= kzalloc(sizeof(u32
) * MAX_SR_DMAR_REGS
,
3314 if (!iommu
->iommu_state
)
3320 for_each_active_iommu(iommu
, drhd
) {
3321 iommu_disable_translation(iommu
);
3323 spin_lock_irqsave(&iommu
->register_lock
, flag
);
3325 iommu
->iommu_state
[SR_DMAR_FECTL_REG
] =
3326 readl(iommu
->reg
+ DMAR_FECTL_REG
);
3327 iommu
->iommu_state
[SR_DMAR_FEDATA_REG
] =
3328 readl(iommu
->reg
+ DMAR_FEDATA_REG
);
3329 iommu
->iommu_state
[SR_DMAR_FEADDR_REG
] =
3330 readl(iommu
->reg
+ DMAR_FEADDR_REG
);
3331 iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
] =
3332 readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
3334 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3339 for_each_active_iommu(iommu
, drhd
)
3340 kfree(iommu
->iommu_state
);
3345 static void iommu_resume(void)
3347 struct dmar_drhd_unit
*drhd
;
3348 struct intel_iommu
*iommu
= NULL
;
3351 if (init_iommu_hw()) {
3353 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3355 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3359 for_each_active_iommu(iommu
, drhd
) {
3361 spin_lock_irqsave(&iommu
->register_lock
, flag
);
3363 writel(iommu
->iommu_state
[SR_DMAR_FECTL_REG
],
3364 iommu
->reg
+ DMAR_FECTL_REG
);
3365 writel(iommu
->iommu_state
[SR_DMAR_FEDATA_REG
],
3366 iommu
->reg
+ DMAR_FEDATA_REG
);
3367 writel(iommu
->iommu_state
[SR_DMAR_FEADDR_REG
],
3368 iommu
->reg
+ DMAR_FEADDR_REG
);
3369 writel(iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
],
3370 iommu
->reg
+ DMAR_FEUADDR_REG
);
3372 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3375 for_each_active_iommu(iommu
, drhd
)
3376 kfree(iommu
->iommu_state
);
3379 static struct syscore_ops iommu_syscore_ops
= {
3380 .resume
= iommu_resume
,
3381 .suspend
= iommu_suspend
,
3384 static void __init
init_iommu_pm_ops(void)
3386 register_syscore_ops(&iommu_syscore_ops
);
3390 static inline void init_iommu_pm_ops(void) {}
3391 #endif /* CONFIG_PM */
3394 * Here we only respond to action of unbound device from driver.
3396 * Added device is not attached to its DMAR domain here yet. That will happen
3397 * when mapping the device to iova.
3399 static int device_notifier(struct notifier_block
*nb
,
3400 unsigned long action
, void *data
)
3402 struct device
*dev
= data
;
3403 struct pci_dev
*pdev
= to_pci_dev(dev
);
3404 struct dmar_domain
*domain
;
3406 if (iommu_no_mapping(dev
))
3409 domain
= find_domain(pdev
);
3413 if (action
== BUS_NOTIFY_UNBOUND_DRIVER
&& !iommu_pass_through
) {
3414 domain_remove_one_dev_info(domain
, pdev
);
3416 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
3417 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) &&
3418 list_empty(&domain
->devices
))
3419 domain_exit(domain
);
3425 static struct notifier_block device_nb
= {
3426 .notifier_call
= device_notifier
,
3429 int __init
intel_iommu_init(void)
3433 /* VT-d is required for a TXT/tboot launch, so enforce that */
3434 force_on
= tboot_force_iommu();
3436 if (dmar_table_init()) {
3438 panic("tboot: Failed to initialize DMAR table\n");
3442 if (dmar_dev_scope_init()) {
3444 panic("tboot: Failed to initialize DMAR device scope\n");
3449 * Check the need for DMA-remapping initialization now.
3450 * Above initialization will also be used by Interrupt-remapping.
3452 if (no_iommu
|| dmar_disabled
)
3455 if (iommu_init_mempool()) {
3457 panic("tboot: Failed to initialize iommu memory\n");
3461 if (dmar_init_reserved_ranges()) {
3463 panic("tboot: Failed to reserve iommu ranges\n");
3467 init_no_remapping_devices();
3472 panic("tboot: Failed to initialize DMARs\n");
3473 printk(KERN_ERR
"IOMMU: dmar init failed\n");
3474 put_iova_domain(&reserved_iova_list
);
3475 iommu_exit_mempool();
3479 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3481 init_timer(&unmap_timer
);
3482 #ifdef CONFIG_SWIOTLB
3485 dma_ops
= &intel_dma_ops
;
3487 init_iommu_pm_ops();
3489 register_iommu(&intel_iommu_ops
);
3491 bus_register_notifier(&pci_bus_type
, &device_nb
);
3496 static void iommu_detach_dependent_devices(struct intel_iommu
*iommu
,
3497 struct pci_dev
*pdev
)
3499 struct pci_dev
*tmp
, *parent
;
3501 if (!iommu
|| !pdev
)
3504 /* dependent device detach */
3505 tmp
= pci_find_upstream_pcie_bridge(pdev
);
3506 /* Secondary interface's bus number and devfn 0 */
3508 parent
= pdev
->bus
->self
;
3509 while (parent
!= tmp
) {
3510 iommu_detach_dev(iommu
, parent
->bus
->number
,
3512 parent
= parent
->bus
->self
;
3514 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
3515 iommu_detach_dev(iommu
,
3516 tmp
->subordinate
->number
, 0);
3517 else /* this is a legacy PCI bridge */
3518 iommu_detach_dev(iommu
, tmp
->bus
->number
,
3523 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
3524 struct pci_dev
*pdev
)
3526 struct device_domain_info
*info
;
3527 struct intel_iommu
*iommu
;
3528 unsigned long flags
;
3530 struct list_head
*entry
, *tmp
;
3532 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
3537 spin_lock_irqsave(&device_domain_lock
, flags
);
3538 list_for_each_safe(entry
, tmp
, &domain
->devices
) {
3539 info
= list_entry(entry
, struct device_domain_info
, link
);
3540 if (info
->segment
== pci_domain_nr(pdev
->bus
) &&
3541 info
->bus
== pdev
->bus
->number
&&
3542 info
->devfn
== pdev
->devfn
) {
3543 list_del(&info
->link
);
3544 list_del(&info
->global
);
3546 info
->dev
->dev
.archdata
.iommu
= NULL
;
3547 spin_unlock_irqrestore(&device_domain_lock
, flags
);
3549 iommu_disable_dev_iotlb(info
);
3550 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
3551 iommu_detach_dependent_devices(iommu
, pdev
);
3552 free_devinfo_mem(info
);
3554 spin_lock_irqsave(&device_domain_lock
, flags
);
3562 /* if there is no other devices under the same iommu
3563 * owned by this domain, clear this iommu in iommu_bmp
3564 * update iommu count and coherency
3566 if (iommu
== device_to_iommu(info
->segment
, info
->bus
,
3572 unsigned long tmp_flags
;
3573 spin_lock_irqsave(&domain
->iommu_lock
, tmp_flags
);
3574 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
3575 domain
->iommu_count
--;
3576 domain_update_iommu_cap(domain
);
3577 spin_unlock_irqrestore(&domain
->iommu_lock
, tmp_flags
);
3579 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
3580 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)) {
3581 spin_lock_irqsave(&iommu
->lock
, tmp_flags
);
3582 clear_bit(domain
->id
, iommu
->domain_ids
);
3583 iommu
->domains
[domain
->id
] = NULL
;
3584 spin_unlock_irqrestore(&iommu
->lock
, tmp_flags
);
3588 spin_unlock_irqrestore(&device_domain_lock
, flags
);
3591 static void vm_domain_remove_all_dev_info(struct dmar_domain
*domain
)
3593 struct device_domain_info
*info
;
3594 struct intel_iommu
*iommu
;
3595 unsigned long flags1
, flags2
;
3597 spin_lock_irqsave(&device_domain_lock
, flags1
);
3598 while (!list_empty(&domain
->devices
)) {
3599 info
= list_entry(domain
->devices
.next
,
3600 struct device_domain_info
, link
);
3601 list_del(&info
->link
);
3602 list_del(&info
->global
);
3604 info
->dev
->dev
.archdata
.iommu
= NULL
;
3606 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
3608 iommu_disable_dev_iotlb(info
);
3609 iommu
= device_to_iommu(info
->segment
, info
->bus
, info
->devfn
);
3610 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
3611 iommu_detach_dependent_devices(iommu
, info
->dev
);
3613 /* clear this iommu in iommu_bmp, update iommu count
3616 spin_lock_irqsave(&domain
->iommu_lock
, flags2
);
3617 if (test_and_clear_bit(iommu
->seq_id
,
3618 &domain
->iommu_bmp
)) {
3619 domain
->iommu_count
--;
3620 domain_update_iommu_cap(domain
);
3622 spin_unlock_irqrestore(&domain
->iommu_lock
, flags2
);
3624 free_devinfo_mem(info
);
3625 spin_lock_irqsave(&device_domain_lock
, flags1
);
3627 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
3630 /* domain id for virtual machine, it won't be set in context */
3631 static unsigned long vm_domid
;
3633 static struct dmar_domain
*iommu_alloc_vm_domain(void)
3635 struct dmar_domain
*domain
;
3637 domain
= alloc_domain_mem();
3641 domain
->id
= vm_domid
++;
3643 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
3644 domain
->flags
= DOMAIN_FLAG_VIRTUAL_MACHINE
;
3649 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
)
3653 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
3654 spin_lock_init(&domain
->iommu_lock
);
3656 domain_reserve_special_ranges(domain
);
3658 /* calculate AGAW */
3659 domain
->gaw
= guest_width
;
3660 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
3661 domain
->agaw
= width_to_agaw(adjust_width
);
3663 INIT_LIST_HEAD(&domain
->devices
);
3665 domain
->iommu_count
= 0;
3666 domain
->iommu_coherency
= 0;
3667 domain
->iommu_snooping
= 0;
3668 domain
->iommu_superpage
= 0;
3669 domain
->max_addr
= 0;
3672 /* always allocate the top pgd */
3673 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
3676 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
3680 static void iommu_free_vm_domain(struct dmar_domain
*domain
)
3682 unsigned long flags
;
3683 struct dmar_drhd_unit
*drhd
;
3684 struct intel_iommu
*iommu
;
3686 unsigned long ndomains
;
3688 for_each_drhd_unit(drhd
) {
3691 iommu
= drhd
->iommu
;
3693 ndomains
= cap_ndoms(iommu
->cap
);
3694 for_each_set_bit(i
, iommu
->domain_ids
, ndomains
) {
3695 if (iommu
->domains
[i
] == domain
) {
3696 spin_lock_irqsave(&iommu
->lock
, flags
);
3697 clear_bit(i
, iommu
->domain_ids
);
3698 iommu
->domains
[i
] = NULL
;
3699 spin_unlock_irqrestore(&iommu
->lock
, flags
);
3706 static void vm_domain_exit(struct dmar_domain
*domain
)
3708 /* Domain 0 is reserved, so dont process it */
3712 vm_domain_remove_all_dev_info(domain
);
3714 put_iova_domain(&domain
->iovad
);
3717 dma_pte_clear_range(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
3719 /* free page tables */
3720 dma_pte_free_pagetable(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
3722 iommu_free_vm_domain(domain
);
3723 free_domain_mem(domain
);
3726 static int intel_iommu_domain_init(struct iommu_domain
*domain
)
3728 struct dmar_domain
*dmar_domain
;
3730 dmar_domain
= iommu_alloc_vm_domain();
3733 "intel_iommu_domain_init: dmar_domain == NULL\n");
3736 if (md_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
3738 "intel_iommu_domain_init() failed\n");
3739 vm_domain_exit(dmar_domain
);
3742 domain
->priv
= dmar_domain
;
3747 static void intel_iommu_domain_destroy(struct iommu_domain
*domain
)
3749 struct dmar_domain
*dmar_domain
= domain
->priv
;
3751 domain
->priv
= NULL
;
3752 vm_domain_exit(dmar_domain
);
3755 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
3758 struct dmar_domain
*dmar_domain
= domain
->priv
;
3759 struct pci_dev
*pdev
= to_pci_dev(dev
);
3760 struct intel_iommu
*iommu
;
3763 /* normally pdev is not mapped */
3764 if (unlikely(domain_context_mapped(pdev
))) {
3765 struct dmar_domain
*old_domain
;
3767 old_domain
= find_domain(pdev
);
3769 if (dmar_domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
3770 dmar_domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)
3771 domain_remove_one_dev_info(old_domain
, pdev
);
3773 domain_remove_dev_info(old_domain
);
3777 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
3782 /* check if this iommu agaw is sufficient for max mapped address */
3783 addr_width
= agaw_to_width(iommu
->agaw
);
3784 if (addr_width
> cap_mgaw(iommu
->cap
))
3785 addr_width
= cap_mgaw(iommu
->cap
);
3787 if (dmar_domain
->max_addr
> (1LL << addr_width
)) {
3788 printk(KERN_ERR
"%s: iommu width (%d) is not "
3789 "sufficient for the mapped address (%llx)\n",
3790 __func__
, addr_width
, dmar_domain
->max_addr
);
3793 dmar_domain
->gaw
= addr_width
;
3796 * Knock out extra levels of page tables if necessary
3798 while (iommu
->agaw
< dmar_domain
->agaw
) {
3799 struct dma_pte
*pte
;
3801 pte
= dmar_domain
->pgd
;
3802 if (dma_pte_present(pte
)) {
3803 dmar_domain
->pgd
= (struct dma_pte
*)
3804 phys_to_virt(dma_pte_addr(pte
));
3805 free_pgtable_page(pte
);
3807 dmar_domain
->agaw
--;
3810 return domain_add_dev_info(dmar_domain
, pdev
, CONTEXT_TT_MULTI_LEVEL
);
3813 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
3816 struct dmar_domain
*dmar_domain
= domain
->priv
;
3817 struct pci_dev
*pdev
= to_pci_dev(dev
);
3819 domain_remove_one_dev_info(dmar_domain
, pdev
);
3822 static int intel_iommu_map(struct iommu_domain
*domain
,
3823 unsigned long iova
, phys_addr_t hpa
,
3824 int gfp_order
, int iommu_prot
)
3826 struct dmar_domain
*dmar_domain
= domain
->priv
;
3832 if (iommu_prot
& IOMMU_READ
)
3833 prot
|= DMA_PTE_READ
;
3834 if (iommu_prot
& IOMMU_WRITE
)
3835 prot
|= DMA_PTE_WRITE
;
3836 if ((iommu_prot
& IOMMU_CACHE
) && dmar_domain
->iommu_snooping
)
3837 prot
|= DMA_PTE_SNP
;
3839 size
= PAGE_SIZE
<< gfp_order
;
3840 max_addr
= iova
+ size
;
3841 if (dmar_domain
->max_addr
< max_addr
) {
3844 /* check if minimum agaw is sufficient for mapped address */
3845 end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
) + 1;
3846 if (end
< max_addr
) {
3847 printk(KERN_ERR
"%s: iommu width (%d) is not "
3848 "sufficient for the mapped address (%llx)\n",
3849 __func__
, dmar_domain
->gaw
, max_addr
);
3852 dmar_domain
->max_addr
= max_addr
;
3854 /* Round up size to next multiple of PAGE_SIZE, if it and
3855 the low bits of hpa would take us onto the next page */
3856 size
= aligned_nrpages(hpa
, size
);
3857 ret
= domain_pfn_mapping(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
3858 hpa
>> VTD_PAGE_SHIFT
, size
, prot
);
3862 static int intel_iommu_unmap(struct iommu_domain
*domain
,
3863 unsigned long iova
, int gfp_order
)
3865 struct dmar_domain
*dmar_domain
= domain
->priv
;
3866 size_t size
= PAGE_SIZE
<< gfp_order
;
3868 dma_pte_clear_range(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
3869 (iova
+ size
- 1) >> VTD_PAGE_SHIFT
);
3871 if (dmar_domain
->max_addr
== iova
+ size
)
3872 dmar_domain
->max_addr
= iova
;
3877 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
3880 struct dmar_domain
*dmar_domain
= domain
->priv
;
3881 struct dma_pte
*pte
;
3884 pte
= pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, 0);
3886 phys
= dma_pte_addr(pte
);
3891 static int intel_iommu_domain_has_cap(struct iommu_domain
*domain
,
3894 struct dmar_domain
*dmar_domain
= domain
->priv
;
3896 if (cap
== IOMMU_CAP_CACHE_COHERENCY
)
3897 return dmar_domain
->iommu_snooping
;
3898 if (cap
== IOMMU_CAP_INTR_REMAP
)
3899 return intr_remapping_enabled
;
3904 static struct iommu_ops intel_iommu_ops
= {
3905 .domain_init
= intel_iommu_domain_init
,
3906 .domain_destroy
= intel_iommu_domain_destroy
,
3907 .attach_dev
= intel_iommu_attach_device
,
3908 .detach_dev
= intel_iommu_detach_device
,
3909 .map
= intel_iommu_map
,
3910 .unmap
= intel_iommu_unmap
,
3911 .iova_to_phys
= intel_iommu_iova_to_phys
,
3912 .domain_has_cap
= intel_iommu_domain_has_cap
,
3915 static void __devinit
quirk_iommu_rwbf(struct pci_dev
*dev
)
3918 * Mobile 4 Series Chipset neglects to set RWBF capability,
3921 printk(KERN_INFO
"DMAR: Forcing write-buffer flush capability\n");
3924 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3925 if (dev
->revision
== 0x07) {
3926 printk(KERN_INFO
"DMAR: Disabling IOMMU for graphics on this chipset\n");
3931 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_rwbf
);
3934 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
3935 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
3936 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
3937 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
3938 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
3939 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
3940 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
3941 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
3943 static void __devinit
quirk_calpella_no_shadow_gtt(struct pci_dev
*dev
)
3947 if (pci_read_config_word(dev
, GGC
, &ggc
))
3950 if (!(ggc
& GGC_MEMORY_VT_ENABLED
)) {
3951 printk(KERN_INFO
"DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0040, quirk_calpella_no_shadow_gtt
);
3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0044, quirk_calpella_no_shadow_gtt
);
3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0062, quirk_calpella_no_shadow_gtt
);
3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x006a, quirk_calpella_no_shadow_gtt
);
3960 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3961 ISOCH DMAR unit for the Azalia sound device, but not give it any
3962 TLB entries, which causes it to deadlock. Check for that. We do
3963 this in a function called from init_dmars(), instead of in a PCI
3964 quirk, because we don't want to print the obnoxious "BIOS broken"
3965 message if VT-d is actually disabled.
3967 static void __init
check_tylersburg_isoch(void)
3969 struct pci_dev
*pdev
;
3970 uint32_t vtisochctrl
;
3972 /* If there's no Azalia in the system anyway, forget it. */
3973 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x3a3e, NULL
);
3978 /* System Management Registers. Might be hidden, in which case
3979 we can't do the sanity check. But that's OK, because the
3980 known-broken BIOSes _don't_ actually hide it, so far. */
3981 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x342e, NULL
);
3985 if (pci_read_config_dword(pdev
, 0x188, &vtisochctrl
)) {
3992 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3993 if (vtisochctrl
& 1)
3996 /* Drop all bits other than the number of TLB entries */
3997 vtisochctrl
&= 0x1c;
3999 /* If we have the recommended number of TLB entries (16), fine. */
4000 if (vtisochctrl
== 0x10)
4003 /* Zero TLB entries? You get to ride the short bus to school. */
4005 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4006 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4007 dmi_get_system_info(DMI_BIOS_VENDOR
),
4008 dmi_get_system_info(DMI_BIOS_VERSION
),
4009 dmi_get_system_info(DMI_PRODUCT_VERSION
));
4010 iommu_identity_mapping
|= IDENTMAP_AZALIA
;
4014 printk(KERN_WARNING
"DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",