2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <linux/pci-ats.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
46 #define ROOT_SIZE VTD_PAGE_SIZE
47 #define CONTEXT_SIZE VTD_PAGE_SIZE
49 #define IS_BRIDGE_HOST_DEVICE(pdev) \
50 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
51 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
53 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55 #define IOAPIC_RANGE_START (0xfee00000)
56 #define IOAPIC_RANGE_END (0xfeefffff)
57 #define IOVA_START_ADDR (0x1000)
59 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61 #define MAX_AGAW_WIDTH 64
63 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
69 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
73 #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
74 #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
76 /* page table handling */
77 #define LEVEL_STRIDE (9)
78 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
80 static inline int agaw_to_level(int agaw
)
85 static inline int agaw_to_width(int agaw
)
87 return 30 + agaw
* LEVEL_STRIDE
;
90 static inline int width_to_agaw(int width
)
92 return (width
- 30) / LEVEL_STRIDE
;
95 static inline unsigned int level_to_offset_bits(int level
)
97 return (level
- 1) * LEVEL_STRIDE
;
100 static inline int pfn_level_offset(unsigned long pfn
, int level
)
102 return (pfn
>> level_to_offset_bits(level
)) & LEVEL_MASK
;
105 static inline unsigned long level_mask(int level
)
107 return -1UL << level_to_offset_bits(level
);
110 static inline unsigned long level_size(int level
)
112 return 1UL << level_to_offset_bits(level
);
115 static inline unsigned long align_to_level(unsigned long pfn
, int level
)
117 return (pfn
+ level_size(level
) - 1) & level_mask(level
);
120 static inline unsigned long lvl_to_nr_pages(unsigned int lvl
)
122 return 1 << ((lvl
- 1) * LEVEL_STRIDE
);
125 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
126 are never going to work. */
127 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn
)
129 return dma_pfn
>> (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
132 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn
)
134 return mm_pfn
<< (PAGE_SHIFT
- VTD_PAGE_SHIFT
);
136 static inline unsigned long page_to_dma_pfn(struct page
*pg
)
138 return mm_to_dma_pfn(page_to_pfn(pg
));
140 static inline unsigned long virt_to_dma_pfn(void *p
)
142 return page_to_dma_pfn(virt_to_page(p
));
145 /* global iommu list, set NULL for ignored DMAR units */
146 static struct intel_iommu
**g_iommus
;
148 static void __init
check_tylersburg_isoch(void);
149 static int rwbf_quirk
;
152 * set to 1 to panic kernel if can't successfully enable VT-d
153 * (used when kernel is launched w/ TXT)
155 static int force_on
= 0;
160 * 12-63: Context Ptr (12 - (haw-1))
167 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
168 static inline bool root_present(struct root_entry
*root
)
170 return (root
->val
& 1);
172 static inline void set_root_present(struct root_entry
*root
)
176 static inline void set_root_value(struct root_entry
*root
, unsigned long value
)
178 root
->val
|= value
& VTD_PAGE_MASK
;
181 static inline struct context_entry
*
182 get_context_addr_from_root(struct root_entry
*root
)
184 return (struct context_entry
*)
185 (root_present(root
)?phys_to_virt(
186 root
->val
& VTD_PAGE_MASK
) :
193 * 1: fault processing disable
194 * 2-3: translation type
195 * 12-63: address space root
201 struct context_entry
{
206 static inline bool context_present(struct context_entry
*context
)
208 return (context
->lo
& 1);
210 static inline void context_set_present(struct context_entry
*context
)
215 static inline void context_set_fault_enable(struct context_entry
*context
)
217 context
->lo
&= (((u64
)-1) << 2) | 1;
220 static inline void context_set_translation_type(struct context_entry
*context
,
223 context
->lo
&= (((u64
)-1) << 4) | 3;
224 context
->lo
|= (value
& 3) << 2;
227 static inline void context_set_address_root(struct context_entry
*context
,
230 context
->lo
|= value
& VTD_PAGE_MASK
;
233 static inline void context_set_address_width(struct context_entry
*context
,
236 context
->hi
|= value
& 7;
239 static inline void context_set_domain_id(struct context_entry
*context
,
242 context
->hi
|= (value
& ((1 << 16) - 1)) << 8;
245 static inline void context_clear_entry(struct context_entry
*context
)
258 * 12-63: Host physcial address
264 static inline void dma_clear_pte(struct dma_pte
*pte
)
269 static inline void dma_set_pte_readable(struct dma_pte
*pte
)
271 pte
->val
|= DMA_PTE_READ
;
274 static inline void dma_set_pte_writable(struct dma_pte
*pte
)
276 pte
->val
|= DMA_PTE_WRITE
;
279 static inline void dma_set_pte_snp(struct dma_pte
*pte
)
281 pte
->val
|= DMA_PTE_SNP
;
284 static inline void dma_set_pte_prot(struct dma_pte
*pte
, unsigned long prot
)
286 pte
->val
= (pte
->val
& ~3) | (prot
& 3);
289 static inline u64
dma_pte_addr(struct dma_pte
*pte
)
292 return pte
->val
& VTD_PAGE_MASK
;
294 /* Must have a full atomic 64-bit read */
295 return __cmpxchg64(&pte
->val
, 0ULL, 0ULL) & VTD_PAGE_MASK
;
299 static inline void dma_set_pte_pfn(struct dma_pte
*pte
, unsigned long pfn
)
301 pte
->val
|= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
304 static inline bool dma_pte_present(struct dma_pte
*pte
)
306 return (pte
->val
& 3) != 0;
309 static inline int first_pte_in_page(struct dma_pte
*pte
)
311 return !((unsigned long)pte
& ~VTD_PAGE_MASK
);
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
320 static struct dmar_domain
*si_domain
;
321 static int hw_pass_through
= 1;
323 /* devices under the same p2p bridge are owned in one domain */
324 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
326 /* domain represents a virtual machine, more than one devices
327 * across iommus may be owned in one domain, e.g. kvm guest.
329 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
331 /* si_domain contains mulitple devices */
332 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
335 int id
; /* domain id */
336 int nid
; /* node id */
337 unsigned long iommu_bmp
; /* bitmap of iommus this domain uses*/
339 struct list_head devices
; /* all devices' list */
340 struct iova_domain iovad
; /* iova's that belong to this domain */
342 struct dma_pte
*pgd
; /* virtual address */
343 int gaw
; /* max guest address width */
345 /* adjusted guest address width, 0 is level 2 30-bit */
348 int flags
; /* flags to find out type of domain */
350 int iommu_coherency
;/* indicate coherency of iommu access */
351 int iommu_snooping
; /* indicate snooping control feature*/
352 int iommu_count
; /* reference count of iommu */
353 int iommu_superpage
;/* Level of superpages supported:
354 0 == 4KiB (no superpages), 1 == 2MiB,
355 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
356 spinlock_t iommu_lock
; /* protect iommu set in domain */
357 u64 max_addr
; /* maximum mapped address */
360 /* PCI domain-device relationship */
361 struct device_domain_info
{
362 struct list_head link
; /* link to domain siblings */
363 struct list_head global
; /* link to global list */
364 int segment
; /* PCI domain */
365 u8 bus
; /* PCI bus number */
366 u8 devfn
; /* PCI devfn number */
367 struct pci_dev
*dev
; /* it's NULL for PCIe-to-PCI bridge */
368 struct intel_iommu
*iommu
; /* IOMMU used by this device */
369 struct dmar_domain
*domain
; /* pointer to domain */
372 static void flush_unmaps_timeout(unsigned long data
);
374 DEFINE_TIMER(unmap_timer
, flush_unmaps_timeout
, 0, 0);
376 #define HIGH_WATER_MARK 250
377 struct deferred_flush_tables
{
379 struct iova
*iova
[HIGH_WATER_MARK
];
380 struct dmar_domain
*domain
[HIGH_WATER_MARK
];
383 static struct deferred_flush_tables
*deferred_flush
;
385 /* bitmap for indexing intel_iommus */
386 static int g_num_of_iommus
;
388 static DEFINE_SPINLOCK(async_umap_flush_lock
);
389 static LIST_HEAD(unmaps_to_do
);
392 static long list_size
;
394 static void domain_remove_dev_info(struct dmar_domain
*domain
);
396 #ifdef CONFIG_DMAR_DEFAULT_ON
397 int dmar_disabled
= 0;
399 int dmar_disabled
= 1;
400 #endif /*CONFIG_DMAR_DEFAULT_ON*/
402 static int dmar_map_gfx
= 1;
403 static int dmar_forcedac
;
404 static int intel_iommu_strict
;
405 static int intel_iommu_superpage
= 1;
407 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
408 static DEFINE_SPINLOCK(device_domain_lock
);
409 static LIST_HEAD(device_domain_list
);
411 static struct iommu_ops intel_iommu_ops
;
413 static int __init
intel_iommu_setup(char *str
)
418 if (!strncmp(str
, "on", 2)) {
420 printk(KERN_INFO
"Intel-IOMMU: enabled\n");
421 } else if (!strncmp(str
, "off", 3)) {
423 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
424 } else if (!strncmp(str
, "igfx_off", 8)) {
427 "Intel-IOMMU: disable GFX device mapping\n");
428 } else if (!strncmp(str
, "forcedac", 8)) {
430 "Intel-IOMMU: Forcing DAC for PCI devices\n");
432 } else if (!strncmp(str
, "strict", 6)) {
434 "Intel-IOMMU: disable batched IOTLB flush\n");
435 intel_iommu_strict
= 1;
436 } else if (!strncmp(str
, "sp_off", 6)) {
438 "Intel-IOMMU: disable supported super page\n");
439 intel_iommu_superpage
= 0;
442 str
+= strcspn(str
, ",");
448 __setup("intel_iommu=", intel_iommu_setup
);
450 static struct kmem_cache
*iommu_domain_cache
;
451 static struct kmem_cache
*iommu_devinfo_cache
;
452 static struct kmem_cache
*iommu_iova_cache
;
454 static inline void *alloc_pgtable_page(int node
)
459 page
= alloc_pages_node(node
, GFP_ATOMIC
| __GFP_ZERO
, 0);
461 vaddr
= page_address(page
);
465 static inline void free_pgtable_page(void *vaddr
)
467 free_page((unsigned long)vaddr
);
470 static inline void *alloc_domain_mem(void)
472 return kmem_cache_alloc(iommu_domain_cache
, GFP_ATOMIC
);
475 static void free_domain_mem(void *vaddr
)
477 kmem_cache_free(iommu_domain_cache
, vaddr
);
480 static inline void * alloc_devinfo_mem(void)
482 return kmem_cache_alloc(iommu_devinfo_cache
, GFP_ATOMIC
);
485 static inline void free_devinfo_mem(void *vaddr
)
487 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
490 struct iova
*alloc_iova_mem(void)
492 return kmem_cache_alloc(iommu_iova_cache
, GFP_ATOMIC
);
495 void free_iova_mem(struct iova
*iova
)
497 kmem_cache_free(iommu_iova_cache
, iova
);
501 static int __iommu_calculate_agaw(struct intel_iommu
*iommu
, int max_gaw
)
506 sagaw
= cap_sagaw(iommu
->cap
);
507 for (agaw
= width_to_agaw(max_gaw
);
509 if (test_bit(agaw
, &sagaw
))
517 * Calculate max SAGAW for each iommu.
519 int iommu_calculate_max_sagaw(struct intel_iommu
*iommu
)
521 return __iommu_calculate_agaw(iommu
, MAX_AGAW_WIDTH
);
525 * calculate agaw for each iommu.
526 * "SAGAW" may be different across iommus, use a default agaw, and
527 * get a supported less agaw for iommus that don't support the default agaw.
529 int iommu_calculate_agaw(struct intel_iommu
*iommu
)
531 return __iommu_calculate_agaw(iommu
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
534 /* This functionin only returns single iommu in a domain */
535 static struct intel_iommu
*domain_get_iommu(struct dmar_domain
*domain
)
539 /* si_domain and vm domain should not get here. */
540 BUG_ON(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
);
541 BUG_ON(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
);
543 iommu_id
= find_first_bit(&domain
->iommu_bmp
, g_num_of_iommus
);
544 if (iommu_id
< 0 || iommu_id
>= g_num_of_iommus
)
547 return g_iommus
[iommu_id
];
550 static void domain_update_iommu_coherency(struct dmar_domain
*domain
)
554 domain
->iommu_coherency
= 1;
556 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
557 if (!ecap_coherent(g_iommus
[i
]->ecap
)) {
558 domain
->iommu_coherency
= 0;
564 static void domain_update_iommu_snooping(struct dmar_domain
*domain
)
568 domain
->iommu_snooping
= 1;
570 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
571 if (!ecap_sc_support(g_iommus
[i
]->ecap
)) {
572 domain
->iommu_snooping
= 0;
578 static void domain_update_iommu_superpage(struct dmar_domain
*domain
)
582 if (!intel_iommu_superpage
) {
583 domain
->iommu_superpage
= 0;
587 domain
->iommu_superpage
= 4; /* 1TiB */
589 for_each_set_bit(i
, &domain
->iommu_bmp
, g_num_of_iommus
) {
590 mask
|= cap_super_page_val(g_iommus
[i
]->cap
);
595 domain
->iommu_superpage
= fls(mask
);
598 /* Some capabilities may be different across iommus */
599 static void domain_update_iommu_cap(struct dmar_domain
*domain
)
601 domain_update_iommu_coherency(domain
);
602 domain_update_iommu_snooping(domain
);
603 domain_update_iommu_superpage(domain
);
606 static struct intel_iommu
*device_to_iommu(int segment
, u8 bus
, u8 devfn
)
608 struct dmar_drhd_unit
*drhd
= NULL
;
611 for_each_drhd_unit(drhd
) {
614 if (segment
!= drhd
->segment
)
617 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
618 if (drhd
->devices
[i
] &&
619 drhd
->devices
[i
]->bus
->number
== bus
&&
620 drhd
->devices
[i
]->devfn
== devfn
)
622 if (drhd
->devices
[i
] &&
623 drhd
->devices
[i
]->subordinate
&&
624 drhd
->devices
[i
]->subordinate
->number
<= bus
&&
625 drhd
->devices
[i
]->subordinate
->subordinate
>= bus
)
629 if (drhd
->include_all
)
636 #ifdef CONFIG_HOTPLUG
638 struct list_head list
;
647 save_dev_dmaru(int segment
, unsigned char bus
, unsigned int devfn
,
648 void *dmaru
, int index
, struct list_head
*lh
)
652 m
= kzalloc(sizeof(*m
), GFP_KERNEL
);
656 m
->segment
= segment
;
662 list_add(&m
->list
, lh
);
668 *get_dev_dmaru(int segment
, unsigned char bus
, unsigned int devfn
,
669 int *index
, struct list_head
*lh
)
674 list_for_each_entry(m
, lh
, list
) {
675 if (m
->segment
== segment
&&
676 m
->bus
== bus
&& m
->devfn
== devfn
) {
688 static LIST_HEAD(saved_dev_drhd_list
);
690 static void remove_dev_from_drhd(struct pci_dev
*dev
)
692 struct dmar_drhd_unit
*drhd
= NULL
;
693 int segment
= pci_domain_nr(dev
->bus
);
696 for_each_drhd_unit(drhd
) {
699 if (segment
!= drhd
->segment
)
702 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
703 if (drhd
->devices
[i
] == dev
) {
704 /* save it at first if it is in drhd */
705 save_dev_dmaru(segment
, dev
->bus
->number
,
707 &saved_dev_drhd_list
);
708 /* always remove it */
709 drhd
->devices
[i
] = NULL
;
716 static void restore_dev_to_drhd(struct pci_dev
*dev
)
718 struct dmar_drhd_unit
*drhd
= NULL
;
721 /* find the stored drhd */
722 drhd
= get_dev_dmaru(pci_domain_nr(dev
->bus
), dev
->bus
->number
,
723 dev
->devfn
, &i
, &saved_dev_drhd_list
);
724 /* restore that into drhd */
726 drhd
->devices
[i
] = dev
;
729 static void remove_dev_from_drhd(struct pci_dev
*dev
)
733 static void restore_dev_to_drhd(struct pci_dev
*dev
)
738 #if defined(CONFIG_DMAR) && defined(CONFIG_HOTPLUG)
739 static LIST_HEAD(saved_dev_atsr_list
);
741 static void remove_dev_from_atsr(struct pci_dev
*dev
)
743 struct dmar_atsr_unit
*atsr
= NULL
;
744 int segment
= pci_domain_nr(dev
->bus
);
747 for_each_atsr_unit(atsr
) {
748 if (segment
!= atsr
->segment
)
751 for (i
= 0; i
< atsr
->devices_cnt
; i
++) {
752 if (atsr
->devices
[i
] == dev
) {
753 /* save it at first if it is in drhd */
754 save_dev_dmaru(segment
, dev
->bus
->number
,
756 &saved_dev_atsr_list
);
757 /* always remove it */
758 atsr
->devices
[i
] = NULL
;
765 static void restore_dev_to_atsr(struct pci_dev
*dev
)
767 struct dmar_atsr_unit
*atsr
= NULL
;
770 /* find the stored atsr */
771 atsr
= get_dev_dmaru(pci_domain_nr(dev
->bus
), dev
->bus
->number
,
772 dev
->devfn
, &i
, &saved_dev_atsr_list
);
773 /* restore that into atsr */
775 atsr
->devices
[i
] = dev
;
778 static void remove_dev_from_atsr(struct pci_dev
*dev
)
782 static void restore_dev_to_atsr(struct pci_dev
*dev
)
787 static void domain_flush_cache(struct dmar_domain
*domain
,
788 void *addr
, int size
)
790 if (!domain
->iommu_coherency
)
791 clflush_cache_range(addr
, size
);
794 /* Gets context entry for a given bus and devfn */
795 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
798 struct root_entry
*root
;
799 struct context_entry
*context
;
800 unsigned long phy_addr
;
803 spin_lock_irqsave(&iommu
->lock
, flags
);
804 root
= &iommu
->root_entry
[bus
];
805 context
= get_context_addr_from_root(root
);
807 context
= (struct context_entry
*)
808 alloc_pgtable_page(iommu
->node
);
810 spin_unlock_irqrestore(&iommu
->lock
, flags
);
813 __iommu_flush_cache(iommu
, (void *)context
, CONTEXT_SIZE
);
814 phy_addr
= virt_to_phys((void *)context
);
815 set_root_value(root
, phy_addr
);
816 set_root_present(root
);
817 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
819 spin_unlock_irqrestore(&iommu
->lock
, flags
);
820 return &context
[devfn
];
823 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
825 struct root_entry
*root
;
826 struct context_entry
*context
;
830 spin_lock_irqsave(&iommu
->lock
, flags
);
831 root
= &iommu
->root_entry
[bus
];
832 context
= get_context_addr_from_root(root
);
837 ret
= context_present(&context
[devfn
]);
839 spin_unlock_irqrestore(&iommu
->lock
, flags
);
843 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
845 struct root_entry
*root
;
846 struct context_entry
*context
;
849 spin_lock_irqsave(&iommu
->lock
, flags
);
850 root
= &iommu
->root_entry
[bus
];
851 context
= get_context_addr_from_root(root
);
853 context_clear_entry(&context
[devfn
]);
854 __iommu_flush_cache(iommu
, &context
[devfn
], \
857 spin_unlock_irqrestore(&iommu
->lock
, flags
);
860 static void free_context_table(struct intel_iommu
*iommu
)
862 struct root_entry
*root
;
865 struct context_entry
*context
;
867 spin_lock_irqsave(&iommu
->lock
, flags
);
868 if (!iommu
->root_entry
) {
871 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
872 root
= &iommu
->root_entry
[i
];
873 context
= get_context_addr_from_root(root
);
875 free_pgtable_page(context
);
877 free_pgtable_page(iommu
->root_entry
);
878 iommu
->root_entry
= NULL
;
880 spin_unlock_irqrestore(&iommu
->lock
, flags
);
883 static struct dma_pte
*pfn_to_dma_pte(struct dmar_domain
*domain
,
884 unsigned long pfn
, int large_level
)
886 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
887 struct dma_pte
*parent
, *pte
= NULL
;
888 int level
= agaw_to_level(domain
->agaw
);
889 int offset
, target_level
;
891 BUG_ON(!domain
->pgd
);
892 BUG_ON(addr_width
< BITS_PER_LONG
&& pfn
>> addr_width
);
893 parent
= domain
->pgd
;
899 target_level
= large_level
;
904 offset
= pfn_level_offset(pfn
, level
);
905 pte
= &parent
[offset
];
906 if (!large_level
&& (pte
->val
& DMA_PTE_LARGE_PAGE
))
908 if (level
== target_level
)
911 if (!dma_pte_present(pte
)) {
914 tmp_page
= alloc_pgtable_page(domain
->nid
);
919 domain_flush_cache(domain
, tmp_page
, VTD_PAGE_SIZE
);
920 pteval
= ((uint64_t)virt_to_dma_pfn(tmp_page
) << VTD_PAGE_SHIFT
) | DMA_PTE_READ
| DMA_PTE_WRITE
;
921 if (cmpxchg64(&pte
->val
, 0ULL, pteval
)) {
922 /* Someone else set it while we were thinking; use theirs. */
923 free_pgtable_page(tmp_page
);
926 domain_flush_cache(domain
, pte
, sizeof(*pte
));
929 parent
= phys_to_virt(dma_pte_addr(pte
));
937 /* return address's pte at specific level */
938 static struct dma_pte
*dma_pfn_level_pte(struct dmar_domain
*domain
,
940 int level
, int *large_page
)
942 struct dma_pte
*parent
, *pte
= NULL
;
943 int total
= agaw_to_level(domain
->agaw
);
946 parent
= domain
->pgd
;
947 while (level
<= total
) {
948 offset
= pfn_level_offset(pfn
, total
);
949 pte
= &parent
[offset
];
953 if (!dma_pte_present(pte
)) {
958 if (pte
->val
& DMA_PTE_LARGE_PAGE
) {
963 parent
= phys_to_virt(dma_pte_addr(pte
));
969 /* clear last level pte, a tlb flush should be followed */
970 static void dma_pte_clear_range(struct dmar_domain
*domain
,
971 unsigned long start_pfn
,
972 unsigned long last_pfn
)
974 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
975 unsigned int large_page
= 1;
976 struct dma_pte
*first_pte
, *pte
;
978 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
979 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
980 BUG_ON(start_pfn
> last_pfn
);
982 /* we don't need lock here; nobody else touches the iova range */
985 first_pte
= pte
= dma_pfn_level_pte(domain
, start_pfn
, 1, &large_page
);
987 start_pfn
= align_to_level(start_pfn
+ 1, large_page
+ 1);
992 start_pfn
+= lvl_to_nr_pages(large_page
);
994 } while (start_pfn
<= last_pfn
&& !first_pte_in_page(pte
));
996 domain_flush_cache(domain
, first_pte
,
997 (void *)pte
- (void *)first_pte
);
999 } while (start_pfn
&& start_pfn
<= last_pfn
);
1002 /* free page table pages. last level pte should already be cleared */
1003 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
1004 unsigned long start_pfn
,
1005 unsigned long last_pfn
)
1007 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
1008 struct dma_pte
*first_pte
, *pte
;
1009 int total
= agaw_to_level(domain
->agaw
);
1014 BUG_ON(addr_width
< BITS_PER_LONG
&& start_pfn
>> addr_width
);
1015 BUG_ON(addr_width
< BITS_PER_LONG
&& last_pfn
>> addr_width
);
1016 BUG_ON(start_pfn
> last_pfn
);
1018 /* We don't need lock here; nobody else touches the iova range */
1020 while (level
<= total
) {
1021 tmp
= align_to_level(start_pfn
, level
);
1023 /* If we can't even clear one PTE at this level, we're done */
1024 if (tmp
+ level_size(level
) - 1 > last_pfn
)
1029 first_pte
= pte
= dma_pfn_level_pte(domain
, tmp
, level
, &large_page
);
1030 if (large_page
> level
)
1031 level
= large_page
+ 1;
1033 tmp
= align_to_level(tmp
+ 1, level
+ 1);
1037 if (dma_pte_present(pte
)) {
1038 free_pgtable_page(phys_to_virt(dma_pte_addr(pte
)));
1042 tmp
+= level_size(level
);
1043 } while (!first_pte_in_page(pte
) &&
1044 tmp
+ level_size(level
) - 1 <= last_pfn
);
1046 domain_flush_cache(domain
, first_pte
,
1047 (void *)pte
- (void *)first_pte
);
1049 } while (tmp
&& tmp
+ level_size(level
) - 1 <= last_pfn
);
1053 if (start_pfn
== 0 && last_pfn
== DOMAIN_MAX_PFN(domain
->gaw
)) {
1054 free_pgtable_page(domain
->pgd
);
1059 /* iommu handling */
1060 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
1062 struct root_entry
*root
;
1063 unsigned long flags
;
1065 root
= (struct root_entry
*)alloc_pgtable_page(iommu
->node
);
1069 __iommu_flush_cache(iommu
, root
, ROOT_SIZE
);
1071 spin_lock_irqsave(&iommu
->lock
, flags
);
1072 iommu
->root_entry
= root
;
1073 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1078 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
1084 addr
= iommu
->root_entry
;
1086 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1087 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
1089 writel(iommu
->gcmd
| DMA_GCMD_SRTP
, iommu
->reg
+ DMAR_GCMD_REG
);
1091 /* Make sure hardware complete it */
1092 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1093 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
1095 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1098 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
1103 if (!rwbf_quirk
&& !cap_rwbf(iommu
->cap
))
1106 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1107 writel(iommu
->gcmd
| DMA_GCMD_WBF
, iommu
->reg
+ DMAR_GCMD_REG
);
1109 /* Make sure hardware complete it */
1110 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1111 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
1113 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1116 /* return value determine if we need a write buffer flush */
1117 static void __iommu_flush_context(struct intel_iommu
*iommu
,
1118 u16 did
, u16 source_id
, u8 function_mask
,
1125 case DMA_CCMD_GLOBAL_INVL
:
1126 val
= DMA_CCMD_GLOBAL_INVL
;
1128 case DMA_CCMD_DOMAIN_INVL
:
1129 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
1131 case DMA_CCMD_DEVICE_INVL
:
1132 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
1133 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
1138 val
|= DMA_CCMD_ICC
;
1140 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1141 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
1143 /* Make sure hardware complete it */
1144 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
1145 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
1147 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1150 /* return value determine if we need a write buffer flush */
1151 static void __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
1152 u64 addr
, unsigned int size_order
, u64 type
)
1154 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
1155 u64 val
= 0, val_iva
= 0;
1159 case DMA_TLB_GLOBAL_FLUSH
:
1160 /* global flush doesn't need set IVA_REG */
1161 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
1163 case DMA_TLB_DSI_FLUSH
:
1164 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1166 case DMA_TLB_PSI_FLUSH
:
1167 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
1168 /* Note: always flush non-leaf currently */
1169 val_iva
= size_order
| addr
;
1174 /* Note: set drain read/write */
1177 * This is probably to be super secure.. Looks like we can
1178 * ignore it without any impact.
1180 if (cap_read_drain(iommu
->cap
))
1181 val
|= DMA_TLB_READ_DRAIN
;
1183 if (cap_write_drain(iommu
->cap
))
1184 val
|= DMA_TLB_WRITE_DRAIN
;
1186 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1187 /* Note: Only uses first TLB reg currently */
1189 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
1190 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
1192 /* Make sure hardware complete it */
1193 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
1194 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
1196 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1198 /* check IOTLB invalidation granularity */
1199 if (DMA_TLB_IAIG(val
) == 0)
1200 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
1201 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
1202 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1203 (unsigned long long)DMA_TLB_IIRG(type
),
1204 (unsigned long long)DMA_TLB_IAIG(val
));
1207 static struct device_domain_info
*iommu_support_dev_iotlb(
1208 struct dmar_domain
*domain
, int segment
, u8 bus
, u8 devfn
)
1211 unsigned long flags
;
1212 struct device_domain_info
*info
;
1213 struct intel_iommu
*iommu
= device_to_iommu(segment
, bus
, devfn
);
1215 if (!ecap_dev_iotlb_support(iommu
->ecap
))
1221 spin_lock_irqsave(&device_domain_lock
, flags
);
1222 list_for_each_entry(info
, &domain
->devices
, link
)
1223 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1227 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1229 if (!found
|| !info
->dev
)
1232 if (!pci_find_ext_capability(info
->dev
, PCI_EXT_CAP_ID_ATS
))
1235 if (!dmar_find_matched_atsr_unit(info
->dev
))
1238 info
->iommu
= iommu
;
1243 static void iommu_enable_dev_iotlb(struct device_domain_info
*info
)
1248 pci_enable_ats(info
->dev
, VTD_PAGE_SHIFT
);
1251 static void iommu_disable_dev_iotlb(struct device_domain_info
*info
)
1253 if (!info
->dev
|| !pci_ats_enabled(info
->dev
))
1256 pci_disable_ats(info
->dev
);
1259 static void iommu_flush_dev_iotlb(struct dmar_domain
*domain
,
1260 u64 addr
, unsigned mask
)
1263 unsigned long flags
;
1264 struct device_domain_info
*info
;
1266 spin_lock_irqsave(&device_domain_lock
, flags
);
1267 list_for_each_entry(info
, &domain
->devices
, link
) {
1268 if (!info
->dev
|| !pci_ats_enabled(info
->dev
))
1271 sid
= info
->bus
<< 8 | info
->devfn
;
1272 qdep
= pci_ats_queue_depth(info
->dev
);
1273 qi_flush_dev_iotlb(info
->iommu
, sid
, qdep
, addr
, mask
);
1275 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1278 static void iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
1279 unsigned long pfn
, unsigned int pages
, int map
)
1281 unsigned int mask
= ilog2(__roundup_pow_of_two(pages
));
1282 uint64_t addr
= (uint64_t)pfn
<< VTD_PAGE_SHIFT
;
1287 * Fallback to domain selective flush if no PSI support or the size is
1289 * PSI requires page size to be 2 ^ x, and the base address is naturally
1290 * aligned to the size
1292 if (!cap_pgsel_inv(iommu
->cap
) || mask
> cap_max_amask_val(iommu
->cap
))
1293 iommu
->flush
.flush_iotlb(iommu
, did
, 0, 0,
1296 iommu
->flush
.flush_iotlb(iommu
, did
, addr
, mask
,
1300 * In caching mode, changes of pages from non-present to present require
1301 * flush. However, device IOTLB doesn't need to be flushed in this case.
1303 if (!cap_caching_mode(iommu
->cap
) || !map
)
1304 iommu_flush_dev_iotlb(iommu
->domains
[did
], addr
, mask
);
1307 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
1310 unsigned long flags
;
1312 spin_lock_irqsave(&iommu
->register_lock
, flags
);
1313 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
1314 pmen
&= ~DMA_PMEN_EPM
;
1315 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
1317 /* wait for the protected region status bit to clear */
1318 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
1319 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
1321 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1324 static int iommu_enable_translation(struct intel_iommu
*iommu
)
1327 unsigned long flags
;
1329 spin_lock_irqsave(&iommu
->register_lock
, flags
);
1330 iommu
->gcmd
|= DMA_GCMD_TE
;
1331 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1333 /* Make sure hardware complete it */
1334 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1335 readl
, (sts
& DMA_GSTS_TES
), sts
);
1337 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
1341 static int iommu_disable_translation(struct intel_iommu
*iommu
)
1346 spin_lock_irqsave(&iommu
->register_lock
, flag
);
1347 iommu
->gcmd
&= ~DMA_GCMD_TE
;
1348 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
1350 /* Make sure hardware complete it */
1351 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
1352 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
1354 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
1359 static int iommu_init_domains(struct intel_iommu
*iommu
)
1361 unsigned long ndomains
;
1362 unsigned long nlongs
;
1364 ndomains
= cap_ndoms(iommu
->cap
);
1365 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu
->seq_id
,
1367 nlongs
= BITS_TO_LONGS(ndomains
);
1369 spin_lock_init(&iommu
->lock
);
1371 /* TBD: there might be 64K domains,
1372 * consider other allocation for future chip
1374 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
1375 if (!iommu
->domain_ids
) {
1376 printk(KERN_ERR
"Allocating domain id array failed\n");
1379 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
1381 if (!iommu
->domains
) {
1382 printk(KERN_ERR
"Allocating domain array failed\n");
1387 * if Caching mode is set, then invalid translations are tagged
1388 * with domainid 0. Hence we need to pre-allocate it.
1390 if (cap_caching_mode(iommu
->cap
))
1391 set_bit(0, iommu
->domain_ids
);
1396 static void domain_exit(struct dmar_domain
*domain
);
1397 static void vm_domain_exit(struct dmar_domain
*domain
);
1399 void free_dmar_iommu(struct intel_iommu
*iommu
)
1401 struct dmar_domain
*domain
;
1403 unsigned long flags
;
1405 if ((iommu
->domains
) && (iommu
->domain_ids
)) {
1406 for_each_set_bit(i
, iommu
->domain_ids
, cap_ndoms(iommu
->cap
)) {
1407 domain
= iommu
->domains
[i
];
1408 clear_bit(i
, iommu
->domain_ids
);
1410 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1411 if (--domain
->iommu_count
== 0) {
1412 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
)
1413 vm_domain_exit(domain
);
1415 domain_exit(domain
);
1417 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1421 if (iommu
->gcmd
& DMA_GCMD_TE
)
1422 iommu_disable_translation(iommu
);
1425 irq_set_handler_data(iommu
->irq
, NULL
);
1426 /* This will mask the irq */
1427 free_irq(iommu
->irq
, iommu
);
1428 destroy_irq(iommu
->irq
);
1431 kfree(iommu
->domains
);
1432 kfree(iommu
->domain_ids
);
1434 g_iommus
[iommu
->seq_id
] = NULL
;
1436 /* if all iommus are freed, free g_iommus */
1437 for (i
= 0; i
< g_num_of_iommus
; i
++) {
1442 if (i
== g_num_of_iommus
)
1445 /* free context mapping */
1446 free_context_table(iommu
);
1449 static struct dmar_domain
*alloc_domain(void)
1451 struct dmar_domain
*domain
;
1453 domain
= alloc_domain_mem();
1458 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
1464 static int iommu_attach_domain(struct dmar_domain
*domain
,
1465 struct intel_iommu
*iommu
)
1468 unsigned long ndomains
;
1469 unsigned long flags
;
1471 ndomains
= cap_ndoms(iommu
->cap
);
1473 spin_lock_irqsave(&iommu
->lock
, flags
);
1475 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1476 if (num
>= ndomains
) {
1477 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1478 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1483 set_bit(num
, iommu
->domain_ids
);
1484 set_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1485 iommu
->domains
[num
] = domain
;
1486 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1491 static void iommu_detach_domain(struct dmar_domain
*domain
,
1492 struct intel_iommu
*iommu
)
1494 unsigned long flags
;
1498 spin_lock_irqsave(&iommu
->lock
, flags
);
1499 ndomains
= cap_ndoms(iommu
->cap
);
1500 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1501 if (iommu
->domains
[num
] == domain
) {
1508 clear_bit(num
, iommu
->domain_ids
);
1509 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
1510 iommu
->domains
[num
] = NULL
;
1512 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1515 static struct iova_domain reserved_iova_list
;
1516 static struct lock_class_key reserved_rbtree_key
;
1518 static int dmar_init_reserved_ranges(void)
1520 struct pci_dev
*pdev
= NULL
;
1524 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1526 lockdep_set_class(&reserved_iova_list
.iova_rbtree_lock
,
1527 &reserved_rbtree_key
);
1529 /* IOAPIC ranges shouldn't be accessed by DMA */
1530 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1531 IOVA_PFN(IOAPIC_RANGE_END
));
1533 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1537 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1538 for_each_pci_dev(pdev
) {
1541 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1542 r
= &pdev
->resource
[i
];
1543 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1545 iova
= reserve_iova(&reserved_iova_list
,
1549 printk(KERN_ERR
"Reserve iova failed\n");
1557 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1559 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1562 static inline int guestwidth_to_adjustwidth(int gaw
)
1565 int r
= (gaw
- 12) % 9;
1576 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1578 struct intel_iommu
*iommu
;
1579 int adjust_width
, agaw
;
1580 unsigned long sagaw
;
1582 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1583 spin_lock_init(&domain
->iommu_lock
);
1585 domain_reserve_special_ranges(domain
);
1587 /* calculate AGAW */
1588 iommu
= domain_get_iommu(domain
);
1589 if (guest_width
> cap_mgaw(iommu
->cap
))
1590 guest_width
= cap_mgaw(iommu
->cap
);
1591 domain
->gaw
= guest_width
;
1592 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1593 agaw
= width_to_agaw(adjust_width
);
1594 sagaw
= cap_sagaw(iommu
->cap
);
1595 if (!test_bit(agaw
, &sagaw
)) {
1596 /* hardware doesn't support it, choose a bigger one */
1597 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1598 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1602 domain
->agaw
= agaw
;
1603 INIT_LIST_HEAD(&domain
->devices
);
1605 if (ecap_coherent(iommu
->ecap
))
1606 domain
->iommu_coherency
= 1;
1608 domain
->iommu_coherency
= 0;
1610 if (ecap_sc_support(iommu
->ecap
))
1611 domain
->iommu_snooping
= 1;
1613 domain
->iommu_snooping
= 0;
1615 domain
->iommu_superpage
= fls(cap_super_page_val(iommu
->cap
));
1616 domain
->iommu_count
= 1;
1617 domain
->nid
= iommu
->node
;
1619 /* always allocate the top pgd */
1620 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
1623 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE
);
1627 static void domain_exit(struct dmar_domain
*domain
)
1629 struct dmar_drhd_unit
*drhd
;
1630 struct intel_iommu
*iommu
;
1632 /* Domain 0 is reserved, so dont process it */
1636 /* Flush any lazy unmaps that may reference this domain */
1637 if (!intel_iommu_strict
)
1638 flush_unmaps_timeout(0);
1640 domain_remove_dev_info(domain
);
1642 put_iova_domain(&domain
->iovad
);
1645 dma_pte_clear_range(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1647 /* free page tables */
1648 dma_pte_free_pagetable(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
1650 for_each_active_iommu(iommu
, drhd
)
1651 if (test_bit(iommu
->seq_id
, &domain
->iommu_bmp
))
1652 iommu_detach_domain(domain
, iommu
);
1654 free_domain_mem(domain
);
1657 static int domain_context_mapping_one(struct dmar_domain
*domain
, int segment
,
1658 u8 bus
, u8 devfn
, int translation
)
1660 struct context_entry
*context
;
1661 unsigned long flags
;
1662 struct intel_iommu
*iommu
;
1663 struct dma_pte
*pgd
;
1665 unsigned long ndomains
;
1668 struct device_domain_info
*info
= NULL
;
1670 pr_debug("Set context mapping for %02x:%02x.%d\n",
1671 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1673 BUG_ON(!domain
->pgd
);
1674 BUG_ON(translation
!= CONTEXT_TT_PASS_THROUGH
&&
1675 translation
!= CONTEXT_TT_MULTI_LEVEL
);
1677 iommu
= device_to_iommu(segment
, bus
, devfn
);
1681 context
= device_to_context_entry(iommu
, bus
, devfn
);
1684 spin_lock_irqsave(&iommu
->lock
, flags
);
1685 if (context_present(context
)) {
1686 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1693 if (domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
1694 domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) {
1697 /* find an available domain id for this device in iommu */
1698 ndomains
= cap_ndoms(iommu
->cap
);
1699 for_each_set_bit(num
, iommu
->domain_ids
, ndomains
) {
1700 if (iommu
->domains
[num
] == domain
) {
1708 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1709 if (num
>= ndomains
) {
1710 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1711 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1715 set_bit(num
, iommu
->domain_ids
);
1716 iommu
->domains
[num
] = domain
;
1720 /* Skip top levels of page tables for
1721 * iommu which has less agaw than default.
1722 * Unnecessary for PT mode.
1724 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1725 for (agaw
= domain
->agaw
; agaw
!= iommu
->agaw
; agaw
--) {
1726 pgd
= phys_to_virt(dma_pte_addr(pgd
));
1727 if (!dma_pte_present(pgd
)) {
1728 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1735 context_set_domain_id(context
, id
);
1737 if (translation
!= CONTEXT_TT_PASS_THROUGH
) {
1738 info
= iommu_support_dev_iotlb(domain
, segment
, bus
, devfn
);
1739 translation
= info
? CONTEXT_TT_DEV_IOTLB
:
1740 CONTEXT_TT_MULTI_LEVEL
;
1743 * In pass through mode, AW must be programmed to indicate the largest
1744 * AGAW value supported by hardware. And ASR is ignored by hardware.
1746 if (unlikely(translation
== CONTEXT_TT_PASS_THROUGH
))
1747 context_set_address_width(context
, iommu
->msagaw
);
1749 context_set_address_root(context
, virt_to_phys(pgd
));
1750 context_set_address_width(context
, iommu
->agaw
);
1753 context_set_translation_type(context
, translation
);
1754 context_set_fault_enable(context
);
1755 context_set_present(context
);
1756 domain_flush_cache(domain
, context
, sizeof(*context
));
1759 * It's a non-present to present mapping. If hardware doesn't cache
1760 * non-present entry we only need to flush the write-buffer. If the
1761 * _does_ cache non-present entries, then it does so in the special
1762 * domain #0, which we have to flush:
1764 if (cap_caching_mode(iommu
->cap
)) {
1765 iommu
->flush
.flush_context(iommu
, 0,
1766 (((u16
)bus
) << 8) | devfn
,
1767 DMA_CCMD_MASK_NOBIT
,
1768 DMA_CCMD_DEVICE_INVL
);
1769 iommu
->flush
.flush_iotlb(iommu
, domain
->id
, 0, 0, DMA_TLB_DSI_FLUSH
);
1771 iommu_flush_write_buffer(iommu
);
1773 iommu_enable_dev_iotlb(info
);
1774 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1776 spin_lock_irqsave(&domain
->iommu_lock
, flags
);
1777 if (!test_and_set_bit(iommu
->seq_id
, &domain
->iommu_bmp
)) {
1778 domain
->iommu_count
++;
1779 if (domain
->iommu_count
== 1)
1780 domain
->nid
= iommu
->node
;
1781 domain_update_iommu_cap(domain
);
1783 spin_unlock_irqrestore(&domain
->iommu_lock
, flags
);
1788 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
,
1792 struct pci_dev
*tmp
, *parent
;
1794 ret
= domain_context_mapping_one(domain
, pci_domain_nr(pdev
->bus
),
1795 pdev
->bus
->number
, pdev
->devfn
,
1800 /* dependent device mapping */
1801 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1804 /* Secondary interface's bus number and devfn 0 */
1805 parent
= pdev
->bus
->self
;
1806 while (parent
!= tmp
) {
1807 ret
= domain_context_mapping_one(domain
,
1808 pci_domain_nr(parent
->bus
),
1809 parent
->bus
->number
,
1810 parent
->devfn
, translation
);
1813 parent
= parent
->bus
->self
;
1815 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
1816 return domain_context_mapping_one(domain
,
1817 pci_domain_nr(tmp
->subordinate
),
1818 tmp
->subordinate
->number
, 0,
1820 else /* this is a legacy PCI bridge */
1821 return domain_context_mapping_one(domain
,
1822 pci_domain_nr(tmp
->bus
),
1828 static int domain_context_mapped(struct pci_dev
*pdev
)
1831 struct pci_dev
*tmp
, *parent
;
1832 struct intel_iommu
*iommu
;
1834 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
1839 ret
= device_context_mapped(iommu
, pdev
->bus
->number
, pdev
->devfn
);
1842 /* dependent device mapping */
1843 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1846 /* Secondary interface's bus number and devfn 0 */
1847 parent
= pdev
->bus
->self
;
1848 while (parent
!= tmp
) {
1849 ret
= device_context_mapped(iommu
, parent
->bus
->number
,
1853 parent
= parent
->bus
->self
;
1855 if (pci_is_pcie(tmp
))
1856 return device_context_mapped(iommu
, tmp
->subordinate
->number
,
1859 return device_context_mapped(iommu
, tmp
->bus
->number
,
1863 /* Returns a number of VTD pages, but aligned to MM page size */
1864 static inline unsigned long aligned_nrpages(unsigned long host_addr
,
1867 host_addr
&= ~PAGE_MASK
;
1868 return PAGE_ALIGN(host_addr
+ size
) >> VTD_PAGE_SHIFT
;
1871 /* Return largest possible superpage level for a given mapping */
1872 static inline int hardware_largepage_caps(struct dmar_domain
*domain
,
1873 unsigned long iov_pfn
,
1874 unsigned long phy_pfn
,
1875 unsigned long pages
)
1877 int support
, level
= 1;
1878 unsigned long pfnmerge
;
1880 support
= domain
->iommu_superpage
;
1882 /* To use a large page, the virtual *and* physical addresses
1883 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1884 of them will mean we have to use smaller pages. So just
1885 merge them and check both at once. */
1886 pfnmerge
= iov_pfn
| phy_pfn
;
1888 while (support
&& !(pfnmerge
& ~VTD_STRIDE_MASK
)) {
1889 pages
>>= VTD_STRIDE_SHIFT
;
1892 pfnmerge
>>= VTD_STRIDE_SHIFT
;
1899 static int __domain_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
1900 struct scatterlist
*sg
, unsigned long phys_pfn
,
1901 unsigned long nr_pages
, int prot
)
1903 struct dma_pte
*first_pte
= NULL
, *pte
= NULL
;
1904 phys_addr_t
uninitialized_var(pteval
);
1905 int addr_width
= agaw_to_width(domain
->agaw
) - VTD_PAGE_SHIFT
;
1906 unsigned long sg_res
;
1907 unsigned int largepage_lvl
= 0;
1908 unsigned long lvl_pages
= 0;
1910 BUG_ON(addr_width
< BITS_PER_LONG
&& (iov_pfn
+ nr_pages
- 1) >> addr_width
);
1912 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1915 prot
&= DMA_PTE_READ
| DMA_PTE_WRITE
| DMA_PTE_SNP
;
1920 sg_res
= nr_pages
+ 1;
1921 pteval
= ((phys_addr_t
)phys_pfn
<< VTD_PAGE_SHIFT
) | prot
;
1924 while (nr_pages
> 0) {
1928 sg_res
= aligned_nrpages(sg
->offset
, sg
->length
);
1929 sg
->dma_address
= ((dma_addr_t
)iov_pfn
<< VTD_PAGE_SHIFT
) + sg
->offset
;
1930 sg
->dma_length
= sg
->length
;
1931 pteval
= page_to_phys(sg_page(sg
)) | prot
;
1932 phys_pfn
= pteval
>> VTD_PAGE_SHIFT
;
1936 largepage_lvl
= hardware_largepage_caps(domain
, iov_pfn
, phys_pfn
, sg_res
);
1938 first_pte
= pte
= pfn_to_dma_pte(domain
, iov_pfn
, largepage_lvl
);
1941 /* It is large page*/
1942 if (largepage_lvl
> 1)
1943 pteval
|= DMA_PTE_LARGE_PAGE
;
1945 pteval
&= ~(uint64_t)DMA_PTE_LARGE_PAGE
;
1948 /* We don't need lock here, nobody else
1949 * touches the iova range
1951 tmp
= cmpxchg64_local(&pte
->val
, 0ULL, pteval
);
1953 static int dumps
= 5;
1954 printk(KERN_CRIT
"ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1955 iov_pfn
, tmp
, (unsigned long long)pteval
);
1958 debug_dma_dump_mappings(NULL
);
1963 lvl_pages
= lvl_to_nr_pages(largepage_lvl
);
1965 BUG_ON(nr_pages
< lvl_pages
);
1966 BUG_ON(sg_res
< lvl_pages
);
1968 nr_pages
-= lvl_pages
;
1969 iov_pfn
+= lvl_pages
;
1970 phys_pfn
+= lvl_pages
;
1971 pteval
+= lvl_pages
* VTD_PAGE_SIZE
;
1972 sg_res
-= lvl_pages
;
1974 /* If the next PTE would be the first in a new page, then we
1975 need to flush the cache on the entries we've just written.
1976 And then we'll need to recalculate 'pte', so clear it and
1977 let it get set again in the if (!pte) block above.
1979 If we're done (!nr_pages) we need to flush the cache too.
1981 Also if we've been setting superpages, we may need to
1982 recalculate 'pte' and switch back to smaller pages for the
1983 end of the mapping, if the trailing size is not enough to
1984 use another superpage (i.e. sg_res < lvl_pages). */
1986 if (!nr_pages
|| first_pte_in_page(pte
) ||
1987 (largepage_lvl
> 1 && sg_res
< lvl_pages
)) {
1988 domain_flush_cache(domain
, first_pte
,
1989 (void *)pte
- (void *)first_pte
);
1993 if (!sg_res
&& nr_pages
)
1999 static inline int domain_sg_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2000 struct scatterlist
*sg
, unsigned long nr_pages
,
2003 return __domain_mapping(domain
, iov_pfn
, sg
, 0, nr_pages
, prot
);
2006 static inline int domain_pfn_mapping(struct dmar_domain
*domain
, unsigned long iov_pfn
,
2007 unsigned long phys_pfn
, unsigned long nr_pages
,
2010 return __domain_mapping(domain
, iov_pfn
, NULL
, phys_pfn
, nr_pages
, prot
);
2013 static void iommu_detach_dev(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
2018 clear_context_table(iommu
, bus
, devfn
);
2019 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
2020 DMA_CCMD_GLOBAL_INVL
);
2021 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
2024 static void domain_remove_dev_info(struct dmar_domain
*domain
)
2026 struct device_domain_info
*info
;
2027 unsigned long flags
;
2028 struct intel_iommu
*iommu
;
2030 spin_lock_irqsave(&device_domain_lock
, flags
);
2031 while (!list_empty(&domain
->devices
)) {
2032 info
= list_entry(domain
->devices
.next
,
2033 struct device_domain_info
, link
);
2034 list_del(&info
->link
);
2035 list_del(&info
->global
);
2037 info
->dev
->dev
.archdata
.iommu
= NULL
;
2038 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2040 iommu_disable_dev_iotlb(info
);
2041 iommu
= device_to_iommu(info
->segment
, info
->bus
, info
->devfn
);
2042 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
2043 free_devinfo_mem(info
);
2045 spin_lock_irqsave(&device_domain_lock
, flags
);
2047 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2052 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
2054 static struct dmar_domain
*
2055 find_domain(struct pci_dev
*pdev
)
2057 struct device_domain_info
*info
;
2059 /* No lock here, assumes no domain exit in normal case */
2060 info
= pdev
->dev
.archdata
.iommu
;
2062 return info
->domain
;
2066 /* domain is initialized */
2067 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
2069 struct dmar_domain
*domain
, *found
= NULL
;
2070 struct intel_iommu
*iommu
;
2071 struct dmar_drhd_unit
*drhd
;
2072 struct device_domain_info
*info
, *tmp
;
2073 struct pci_dev
*dev_tmp
;
2074 unsigned long flags
;
2075 int bus
= 0, devfn
= 0;
2079 domain
= find_domain(pdev
);
2083 segment
= pci_domain_nr(pdev
->bus
);
2085 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
2087 if (pci_is_pcie(dev_tmp
)) {
2088 bus
= dev_tmp
->subordinate
->number
;
2091 bus
= dev_tmp
->bus
->number
;
2092 devfn
= dev_tmp
->devfn
;
2094 spin_lock_irqsave(&device_domain_lock
, flags
);
2095 list_for_each_entry(info
, &device_domain_list
, global
) {
2096 if (info
->segment
== segment
&&
2097 info
->bus
== bus
&& info
->devfn
== devfn
) {
2098 found
= info
->domain
;
2102 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2103 /* pcie-pci bridge already has a domain, uses it */
2110 domain
= alloc_domain();
2114 /* Allocate new domain for the device */
2115 drhd
= dmar_find_matched_drhd_unit(pdev
);
2117 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
2121 iommu
= drhd
->iommu
;
2123 ret
= iommu_attach_domain(domain
, iommu
);
2125 free_domain_mem(domain
);
2129 if (domain_init(domain
, gaw
)) {
2130 domain_exit(domain
);
2134 /* register pcie-to-pci device */
2136 info
= alloc_devinfo_mem();
2138 domain_exit(domain
);
2141 info
->segment
= segment
;
2143 info
->devfn
= devfn
;
2145 info
->domain
= domain
;
2146 /* This domain is shared by devices under p2p bridge */
2147 domain
->flags
|= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES
;
2149 /* pcie-to-pci bridge already has a domain, uses it */
2151 spin_lock_irqsave(&device_domain_lock
, flags
);
2152 list_for_each_entry(tmp
, &device_domain_list
, global
) {
2153 if (tmp
->segment
== segment
&&
2154 tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
2155 found
= tmp
->domain
;
2160 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2161 free_devinfo_mem(info
);
2162 domain_exit(domain
);
2165 list_add(&info
->link
, &domain
->devices
);
2166 list_add(&info
->global
, &device_domain_list
);
2167 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2172 info
= alloc_devinfo_mem();
2175 info
->segment
= segment
;
2176 info
->bus
= pdev
->bus
->number
;
2177 info
->devfn
= pdev
->devfn
;
2179 info
->domain
= domain
;
2180 spin_lock_irqsave(&device_domain_lock
, flags
);
2181 /* somebody is fast */
2182 found
= find_domain(pdev
);
2183 if (found
!= NULL
) {
2184 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2185 if (found
!= domain
) {
2186 domain_exit(domain
);
2189 free_devinfo_mem(info
);
2192 list_add(&info
->link
, &domain
->devices
);
2193 list_add(&info
->global
, &device_domain_list
);
2194 pdev
->dev
.archdata
.iommu
= info
;
2195 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2198 /* recheck it here, maybe others set it */
2199 return find_domain(pdev
);
2202 static int iommu_identity_mapping
;
2203 #define IDENTMAP_ALL 1
2204 #define IDENTMAP_GFX 2
2205 #define IDENTMAP_AZALIA 4
2207 static int iommu_domain_identity_map(struct dmar_domain
*domain
,
2208 unsigned long long start
,
2209 unsigned long long end
)
2211 unsigned long first_vpfn
= start
>> VTD_PAGE_SHIFT
;
2212 unsigned long last_vpfn
= end
>> VTD_PAGE_SHIFT
;
2214 if (!reserve_iova(&domain
->iovad
, dma_to_mm_pfn(first_vpfn
),
2215 dma_to_mm_pfn(last_vpfn
))) {
2216 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
2220 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2221 start
, end
, domain
->id
);
2223 * RMRR range might have overlap with physical memory range,
2226 dma_pte_clear_range(domain
, first_vpfn
, last_vpfn
);
2228 return domain_pfn_mapping(domain
, first_vpfn
, first_vpfn
,
2229 last_vpfn
- first_vpfn
+ 1,
2230 DMA_PTE_READ
|DMA_PTE_WRITE
);
2233 static int iommu_prepare_identity_map(struct pci_dev
*pdev
,
2234 unsigned long long start
,
2235 unsigned long long end
)
2237 struct dmar_domain
*domain
;
2240 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2244 /* For _hardware_ passthrough, don't bother. But for software
2245 passthrough, we do it anyway -- it may indicate a memory
2246 range which is reserved in E820, so which didn't get set
2247 up to start with in si_domain */
2248 if (domain
== si_domain
&& hw_pass_through
) {
2249 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2250 pci_name(pdev
), start
, end
);
2255 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2256 pci_name(pdev
), start
, end
);
2259 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2260 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2261 dmi_get_system_info(DMI_BIOS_VENDOR
),
2262 dmi_get_system_info(DMI_BIOS_VERSION
),
2263 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2268 if (end
>> agaw_to_width(domain
->agaw
)) {
2269 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2270 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2271 agaw_to_width(domain
->agaw
),
2272 dmi_get_system_info(DMI_BIOS_VENDOR
),
2273 dmi_get_system_info(DMI_BIOS_VERSION
),
2274 dmi_get_system_info(DMI_PRODUCT_VERSION
));
2279 ret
= iommu_domain_identity_map(domain
, start
, end
);
2283 /* context entry init */
2284 ret
= domain_context_mapping(domain
, pdev
, CONTEXT_TT_MULTI_LEVEL
);
2291 domain_exit(domain
);
2295 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
2296 struct pci_dev
*pdev
)
2298 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2300 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
2304 #ifdef CONFIG_DMAR_FLOPPY_WA
2305 static inline void iommu_prepare_isa(void)
2307 struct pci_dev
*pdev
;
2310 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
2314 printk(KERN_INFO
"IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2315 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024 - 1);
2318 printk(KERN_ERR
"IOMMU: Failed to create 0-16MiB identity map; "
2319 "floppy might not work\n");
2323 static inline void iommu_prepare_isa(void)
2327 #endif /* !CONFIG_DMAR_FLPY_WA */
2329 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
);
2331 static int __init
si_domain_work_fn(unsigned long start_pfn
,
2332 unsigned long end_pfn
, void *datax
)
2336 *ret
= iommu_domain_identity_map(si_domain
,
2337 (uint64_t)start_pfn
<< PAGE_SHIFT
,
2338 (uint64_t)end_pfn
<< PAGE_SHIFT
);
2343 static int __init
si_domain_init(int hw
)
2345 struct dmar_drhd_unit
*drhd
;
2346 struct intel_iommu
*iommu
;
2349 si_domain
= alloc_domain();
2353 pr_debug("Identity mapping domain is domain %d\n", si_domain
->id
);
2355 for_each_active_iommu(iommu
, drhd
) {
2356 ret
= iommu_attach_domain(si_domain
, iommu
);
2358 domain_exit(si_domain
);
2363 if (md_domain_init(si_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
2364 domain_exit(si_domain
);
2368 si_domain
->flags
= DOMAIN_FLAG_STATIC_IDENTITY
;
2373 for_each_online_node(nid
) {
2374 work_with_active_regions(nid
, si_domain_work_fn
, &ret
);
2382 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
2383 struct pci_dev
*pdev
);
2384 static int identity_mapping(struct pci_dev
*pdev
)
2386 struct device_domain_info
*info
;
2388 if (likely(!iommu_identity_mapping
))
2391 info
= pdev
->dev
.archdata
.iommu
;
2392 if (info
&& info
!= DUMMY_DEVICE_DOMAIN_INFO
)
2393 return (info
->domain
== si_domain
);
2398 static int domain_add_dev_info(struct dmar_domain
*domain
,
2399 struct pci_dev
*pdev
,
2402 struct device_domain_info
*info
;
2403 unsigned long flags
;
2406 info
= alloc_devinfo_mem();
2410 ret
= domain_context_mapping(domain
, pdev
, translation
);
2412 free_devinfo_mem(info
);
2416 info
->segment
= pci_domain_nr(pdev
->bus
);
2417 info
->bus
= pdev
->bus
->number
;
2418 info
->devfn
= pdev
->devfn
;
2420 info
->domain
= domain
;
2422 spin_lock_irqsave(&device_domain_lock
, flags
);
2423 list_add(&info
->link
, &domain
->devices
);
2424 list_add(&info
->global
, &device_domain_list
);
2425 pdev
->dev
.archdata
.iommu
= info
;
2426 spin_unlock_irqrestore(&device_domain_lock
, flags
);
2431 static int iommu_should_identity_map(struct pci_dev
*pdev
, int startup
)
2433 if ((iommu_identity_mapping
& IDENTMAP_AZALIA
) && IS_AZALIA(pdev
))
2436 if ((iommu_identity_mapping
& IDENTMAP_GFX
) && IS_GFX_DEVICE(pdev
))
2439 if (!(iommu_identity_mapping
& IDENTMAP_ALL
))
2443 * We want to start off with all devices in the 1:1 domain, and
2444 * take them out later if we find they can't access all of memory.
2446 * However, we can't do this for PCI devices behind bridges,
2447 * because all PCI devices behind the same bridge will end up
2448 * with the same source-id on their transactions.
2450 * Practically speaking, we can't change things around for these
2451 * devices at run-time, because we can't be sure there'll be no
2452 * DMA transactions in flight for any of their siblings.
2454 * So PCI devices (unless they're on the root bus) as well as
2455 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2456 * the 1:1 domain, just in _case_ one of their siblings turns out
2457 * not to be able to map all of memory.
2459 if (!pci_is_pcie(pdev
)) {
2460 if (!pci_is_root_bus(pdev
->bus
))
2462 if (pdev
->class >> 8 == PCI_CLASS_BRIDGE_PCI
)
2464 } else if (pdev
->pcie_type
== PCI_EXP_TYPE_PCI_BRIDGE
)
2468 * At boot time, we don't yet know if devices will be 64-bit capable.
2469 * Assume that they will -- if they turn out not to be, then we can
2470 * take them out of the 1:1 domain later.
2474 * If the device's dma_mask is less than the system's memory
2475 * size then this is not a candidate for identity mapping.
2477 u64 dma_mask
= pdev
->dma_mask
;
2479 if (pdev
->dev
.coherent_dma_mask
&&
2480 pdev
->dev
.coherent_dma_mask
< dma_mask
)
2481 dma_mask
= pdev
->dev
.coherent_dma_mask
;
2483 return dma_mask
>= dma_get_required_mask(&pdev
->dev
);
2489 static int __init
iommu_prepare_static_identity_mapping(int hw
)
2491 struct pci_dev
*pdev
= NULL
;
2494 ret
= si_domain_init(hw
);
2498 for_each_pci_dev(pdev
) {
2499 /* Skip Host/PCI Bridge devices */
2500 if (IS_BRIDGE_HOST_DEVICE(pdev
))
2502 if (iommu_should_identity_map(pdev
, 1)) {
2503 printk(KERN_INFO
"IOMMU: %s identity mapping for device %s\n",
2504 hw
? "hardware" : "software", pci_name(pdev
));
2506 ret
= domain_add_dev_info(si_domain
, pdev
,
2507 hw
? CONTEXT_TT_PASS_THROUGH
:
2508 CONTEXT_TT_MULTI_LEVEL
);
2517 static int __init
init_dmars(void)
2519 struct dmar_drhd_unit
*drhd
;
2520 struct dmar_rmrr_unit
*rmrr
;
2521 struct pci_dev
*pdev
;
2522 struct intel_iommu
*iommu
;
2528 * initialize and program root entry to not present
2531 for_each_drhd_unit(drhd
) {
2534 * lock not needed as this is only incremented in the single
2535 * threaded kernel __init code path all other access are read
2540 g_iommus
= kcalloc(g_num_of_iommus
, sizeof(struct intel_iommu
*),
2543 printk(KERN_ERR
"Allocating global iommu array failed\n");
2548 deferred_flush
= kzalloc(g_num_of_iommus
*
2549 sizeof(struct deferred_flush_tables
), GFP_KERNEL
);
2550 if (!deferred_flush
) {
2555 for_each_drhd_unit(drhd
) {
2559 iommu
= drhd
->iommu
;
2560 g_iommus
[iommu
->seq_id
] = iommu
;
2562 ret
= iommu_init_domains(iommu
);
2568 * we could share the same root & context tables
2569 * among all IOMMU's. Need to Split it later.
2571 ret
= iommu_alloc_root_entry(iommu
);
2573 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
2576 if (!ecap_pass_through(iommu
->ecap
))
2577 hw_pass_through
= 0;
2581 * Start from the sane iommu hardware state.
2583 for_each_drhd_unit(drhd
) {
2587 iommu
= drhd
->iommu
;
2590 * If the queued invalidation is already initialized by us
2591 * (for example, while enabling interrupt-remapping) then
2592 * we got the things already rolling from a sane state.
2598 * Clear any previous faults.
2600 dmar_fault(-1, iommu
);
2602 * Disable queued invalidation if supported and already enabled
2603 * before OS handover.
2605 dmar_disable_qi(iommu
);
2608 for_each_drhd_unit(drhd
) {
2612 iommu
= drhd
->iommu
;
2614 if (dmar_enable_qi(iommu
)) {
2616 * Queued Invalidate not enabled, use Register Based
2619 iommu
->flush
.flush_context
= __iommu_flush_context
;
2620 iommu
->flush
.flush_iotlb
= __iommu_flush_iotlb
;
2621 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Register based "
2624 (unsigned long long)drhd
->reg_base_addr
);
2626 iommu
->flush
.flush_context
= qi_flush_context
;
2627 iommu
->flush
.flush_iotlb
= qi_flush_iotlb
;
2628 printk(KERN_INFO
"IOMMU %d 0x%Lx: using Queued "
2631 (unsigned long long)drhd
->reg_base_addr
);
2635 if (iommu_pass_through
)
2636 iommu_identity_mapping
|= IDENTMAP_ALL
;
2638 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2639 iommu_identity_mapping
|= IDENTMAP_GFX
;
2642 check_tylersburg_isoch();
2645 * If pass through is not set or not enabled, setup context entries for
2646 * identity mappings for rmrr, gfx, and isa and may fall back to static
2647 * identity mapping if iommu_identity_mapping is set.
2649 if (iommu_identity_mapping
) {
2650 ret
= iommu_prepare_static_identity_mapping(hw_pass_through
);
2652 printk(KERN_CRIT
"Failed to setup IOMMU pass-through\n");
2658 * for each dev attached to rmrr
2660 * locate drhd for dev, alloc domain for dev
2661 * allocate free domain
2662 * allocate page table entries for rmrr
2663 * if context not allocated for bus
2664 * allocate and init context
2665 * set present in root table for this bus
2666 * init context with domain, translation etc
2670 printk(KERN_INFO
"IOMMU: Setting RMRR:\n");
2671 for_each_rmrr_units(rmrr
) {
2672 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
2673 pdev
= rmrr
->devices
[i
];
2675 * some BIOS lists non-exist devices in DMAR
2680 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
2683 "IOMMU: mapping reserved region failed\n");
2687 iommu_prepare_isa();
2692 * global invalidate context cache
2693 * global invalidate iotlb
2694 * enable translation
2696 for_each_drhd_unit(drhd
) {
2697 if (drhd
->ignored
) {
2699 * we always have to disable PMRs or DMA may fail on
2703 iommu_disable_protect_mem_regions(drhd
->iommu
);
2706 iommu
= drhd
->iommu
;
2708 iommu_flush_write_buffer(iommu
);
2710 ret
= dmar_set_interrupt(iommu
);
2714 iommu_set_root_entry(iommu
);
2716 iommu
->flush
.flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
);
2717 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
);
2719 ret
= iommu_enable_translation(iommu
);
2723 iommu_disable_protect_mem_regions(iommu
);
2728 for_each_drhd_unit(drhd
) {
2731 iommu
= drhd
->iommu
;
2738 /* This takes a number of _MM_ pages, not VTD pages */
2739 static struct iova
*intel_alloc_iova(struct device
*dev
,
2740 struct dmar_domain
*domain
,
2741 unsigned long nrpages
, uint64_t dma_mask
)
2743 struct pci_dev
*pdev
= to_pci_dev(dev
);
2744 struct iova
*iova
= NULL
;
2746 /* Restrict dma_mask to the width that the iommu can handle */
2747 dma_mask
= min_t(uint64_t, DOMAIN_MAX_ADDR(domain
->gaw
), dma_mask
);
2749 if (!dmar_forcedac
&& dma_mask
> DMA_BIT_MASK(32)) {
2751 * First try to allocate an io virtual address in
2752 * DMA_BIT_MASK(32) and if that fails then try allocating
2755 iova
= alloc_iova(&domain
->iovad
, nrpages
,
2756 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2760 iova
= alloc_iova(&domain
->iovad
, nrpages
, IOVA_PFN(dma_mask
), 1);
2761 if (unlikely(!iova
)) {
2762 printk(KERN_ERR
"Allocating %ld-page iova for %s failed",
2763 nrpages
, pci_name(pdev
));
2770 static struct dmar_domain
*__get_valid_domain_for_dev(struct pci_dev
*pdev
)
2772 struct dmar_domain
*domain
;
2775 domain
= get_domain_for_dev(pdev
,
2776 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
2779 "Allocating domain for %s failed", pci_name(pdev
));
2783 /* make sure context mapping is ok */
2784 if (unlikely(!domain_context_mapped(pdev
))) {
2785 ret
= domain_context_mapping(domain
, pdev
,
2786 CONTEXT_TT_MULTI_LEVEL
);
2789 "Domain context map for %s failed",
2798 static inline struct dmar_domain
*get_valid_domain_for_dev(struct pci_dev
*dev
)
2800 struct device_domain_info
*info
;
2802 /* No lock here, assumes no domain exit in normal case */
2803 info
= dev
->dev
.archdata
.iommu
;
2805 return info
->domain
;
2807 return __get_valid_domain_for_dev(dev
);
2810 static int iommu_dummy(struct pci_dev
*pdev
)
2812 return pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
;
2815 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2816 static int iommu_no_mapping(struct device
*dev
)
2818 struct pci_dev
*pdev
;
2821 if (unlikely(dev
->bus
!= &pci_bus_type
))
2824 pdev
= to_pci_dev(dev
);
2825 if (iommu_dummy(pdev
))
2828 if (!iommu_identity_mapping
)
2831 found
= identity_mapping(pdev
);
2833 if (iommu_should_identity_map(pdev
, 0))
2837 * 32 bit DMA is removed from si_domain and fall back
2838 * to non-identity mapping.
2840 domain_remove_one_dev_info(si_domain
, pdev
);
2841 printk(KERN_INFO
"32bit %s uses non-identity mapping\n",
2847 * In case of a detached 64 bit DMA device from vm, the device
2848 * is put into si_domain for identity mapping.
2850 if (iommu_should_identity_map(pdev
, 0)) {
2852 ret
= domain_add_dev_info(si_domain
, pdev
,
2854 CONTEXT_TT_PASS_THROUGH
:
2855 CONTEXT_TT_MULTI_LEVEL
);
2857 printk(KERN_INFO
"64bit %s uses identity mapping\n",
2867 static dma_addr_t
__intel_map_single(struct device
*hwdev
, phys_addr_t paddr
,
2868 size_t size
, int dir
, u64 dma_mask
)
2870 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2871 struct dmar_domain
*domain
;
2872 phys_addr_t start_paddr
;
2876 struct intel_iommu
*iommu
;
2877 unsigned long paddr_pfn
= paddr
>> PAGE_SHIFT
;
2879 BUG_ON(dir
== DMA_NONE
);
2881 if (iommu_no_mapping(hwdev
))
2884 domain
= get_valid_domain_for_dev(pdev
);
2888 iommu
= domain_get_iommu(domain
);
2889 size
= aligned_nrpages(paddr
, size
);
2891 iova
= intel_alloc_iova(hwdev
, domain
, dma_to_mm_pfn(size
), dma_mask
);
2896 * Check if DMAR supports zero-length reads on write only
2899 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2900 !cap_zlr(iommu
->cap
))
2901 prot
|= DMA_PTE_READ
;
2902 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2903 prot
|= DMA_PTE_WRITE
;
2905 * paddr - (paddr + size) might be partial page, we should map the whole
2906 * page. Note: if two part of one page are separately mapped, we
2907 * might have two guest_addr mapping to the same host paddr, but this
2908 * is not a big problem
2910 ret
= domain_pfn_mapping(domain
, mm_to_dma_pfn(iova
->pfn_lo
),
2911 mm_to_dma_pfn(paddr_pfn
), size
, prot
);
2915 /* it's a non-present to present mapping. Only flush if caching mode */
2916 if (cap_caching_mode(iommu
->cap
))
2917 iommu_flush_iotlb_psi(iommu
, domain
->id
, mm_to_dma_pfn(iova
->pfn_lo
), size
, 1);
2919 iommu_flush_write_buffer(iommu
);
2921 start_paddr
= (phys_addr_t
)iova
->pfn_lo
<< PAGE_SHIFT
;
2922 start_paddr
+= paddr
& ~PAGE_MASK
;
2927 __free_iova(&domain
->iovad
, iova
);
2928 printk(KERN_ERR
"Device %s request: %zx@%llx dir %d --- failed\n",
2929 pci_name(pdev
), size
, (unsigned long long)paddr
, dir
);
2933 static dma_addr_t
intel_map_page(struct device
*dev
, struct page
*page
,
2934 unsigned long offset
, size_t size
,
2935 enum dma_data_direction dir
,
2936 struct dma_attrs
*attrs
)
2938 return __intel_map_single(dev
, page_to_phys(page
) + offset
, size
,
2939 dir
, to_pci_dev(dev
)->dma_mask
);
2942 static void flush_unmaps(void)
2948 /* just flush them all */
2949 for (i
= 0; i
< g_num_of_iommus
; i
++) {
2950 struct intel_iommu
*iommu
= g_iommus
[i
];
2954 if (!deferred_flush
[i
].next
)
2957 /* In caching mode, global flushes turn emulation expensive */
2958 if (!cap_caching_mode(iommu
->cap
))
2959 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
2960 DMA_TLB_GLOBAL_FLUSH
);
2961 for (j
= 0; j
< deferred_flush
[i
].next
; j
++) {
2963 struct iova
*iova
= deferred_flush
[i
].iova
[j
];
2964 struct dmar_domain
*domain
= deferred_flush
[i
].domain
[j
];
2966 /* On real hardware multiple invalidations are expensive */
2967 if (cap_caching_mode(iommu
->cap
))
2968 iommu_flush_iotlb_psi(iommu
, domain
->id
,
2969 iova
->pfn_lo
, iova
->pfn_hi
- iova
->pfn_lo
+ 1, 0);
2971 mask
= ilog2(mm_to_dma_pfn(iova
->pfn_hi
- iova
->pfn_lo
+ 1));
2972 iommu_flush_dev_iotlb(deferred_flush
[i
].domain
[j
],
2973 (uint64_t)iova
->pfn_lo
<< PAGE_SHIFT
, mask
);
2975 __free_iova(&deferred_flush
[i
].domain
[j
]->iovad
, iova
);
2977 deferred_flush
[i
].next
= 0;
2983 static void flush_unmaps_timeout(unsigned long data
)
2985 unsigned long flags
;
2987 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2989 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
2992 static void add_unmap(struct dmar_domain
*dom
, struct iova
*iova
)
2994 unsigned long flags
;
2996 struct intel_iommu
*iommu
;
2998 spin_lock_irqsave(&async_umap_flush_lock
, flags
);
2999 if (list_size
== HIGH_WATER_MARK
)
3002 iommu
= domain_get_iommu(dom
);
3003 iommu_id
= iommu
->seq_id
;
3005 next
= deferred_flush
[iommu_id
].next
;
3006 deferred_flush
[iommu_id
].domain
[next
] = dom
;
3007 deferred_flush
[iommu_id
].iova
[next
] = iova
;
3008 deferred_flush
[iommu_id
].next
++;
3011 mod_timer(&unmap_timer
, jiffies
+ msecs_to_jiffies(10));
3015 spin_unlock_irqrestore(&async_umap_flush_lock
, flags
);
3018 static void intel_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
3019 size_t size
, enum dma_data_direction dir
,
3020 struct dma_attrs
*attrs
)
3022 struct pci_dev
*pdev
= to_pci_dev(dev
);
3023 struct dmar_domain
*domain
;
3024 unsigned long start_pfn
, last_pfn
;
3026 struct intel_iommu
*iommu
;
3028 if (iommu_no_mapping(dev
))
3031 domain
= find_domain(pdev
);
3034 iommu
= domain_get_iommu(domain
);
3036 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
3037 if (WARN_ONCE(!iova
, "Driver unmaps unmatched page at PFN %llx\n",
3038 (unsigned long long)dev_addr
))
3041 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
3042 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
3044 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3045 pci_name(pdev
), start_pfn
, last_pfn
);
3047 /* clear the whole page */
3048 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
3050 /* free page tables */
3051 dma_pte_free_pagetable(domain
, start_pfn
, last_pfn
);
3053 if (intel_iommu_strict
) {
3054 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
3055 last_pfn
- start_pfn
+ 1, 0);
3057 __free_iova(&domain
->iovad
, iova
);
3059 add_unmap(domain
, iova
);
3061 * queue up the release of the unmap to save the 1/6th of the
3062 * cpu used up by the iotlb flush operation...
3067 static void *intel_alloc_coherent(struct device
*hwdev
, size_t size
,
3068 dma_addr_t
*dma_handle
, gfp_t flags
)
3073 size
= PAGE_ALIGN(size
);
3074 order
= get_order(size
);
3076 if (!iommu_no_mapping(hwdev
))
3077 flags
&= ~(GFP_DMA
| GFP_DMA32
);
3078 else if (hwdev
->coherent_dma_mask
< dma_get_required_mask(hwdev
)) {
3079 if (hwdev
->coherent_dma_mask
< DMA_BIT_MASK(32))
3085 vaddr
= (void *)__get_free_pages(flags
, order
);
3088 memset(vaddr
, 0, size
);
3090 *dma_handle
= __intel_map_single(hwdev
, virt_to_bus(vaddr
), size
,
3092 hwdev
->coherent_dma_mask
);
3095 free_pages((unsigned long)vaddr
, order
);
3099 static void intel_free_coherent(struct device
*hwdev
, size_t size
, void *vaddr
,
3100 dma_addr_t dma_handle
)
3104 size
= PAGE_ALIGN(size
);
3105 order
= get_order(size
);
3107 intel_unmap_page(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
, NULL
);
3108 free_pages((unsigned long)vaddr
, order
);
3111 static void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
3112 int nelems
, enum dma_data_direction dir
,
3113 struct dma_attrs
*attrs
)
3115 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
3116 struct dmar_domain
*domain
;
3117 unsigned long start_pfn
, last_pfn
;
3119 struct intel_iommu
*iommu
;
3121 if (iommu_no_mapping(hwdev
))
3124 domain
= find_domain(pdev
);
3127 iommu
= domain_get_iommu(domain
);
3129 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
3130 if (WARN_ONCE(!iova
, "Driver unmaps unmatched sglist at PFN %llx\n",
3131 (unsigned long long)sglist
[0].dma_address
))
3134 start_pfn
= mm_to_dma_pfn(iova
->pfn_lo
);
3135 last_pfn
= mm_to_dma_pfn(iova
->pfn_hi
+ 1) - 1;
3137 /* clear the whole page */
3138 dma_pte_clear_range(domain
, start_pfn
, last_pfn
);
3140 /* free page tables */
3141 dma_pte_free_pagetable(domain
, start_pfn
, last_pfn
);
3143 if (intel_iommu_strict
) {
3144 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_pfn
,
3145 last_pfn
- start_pfn
+ 1, 0);
3147 __free_iova(&domain
->iovad
, iova
);
3149 add_unmap(domain
, iova
);
3151 * queue up the release of the unmap to save the 1/6th of the
3152 * cpu used up by the iotlb flush operation...
3157 static int intel_nontranslate_map_sg(struct device
*hddev
,
3158 struct scatterlist
*sglist
, int nelems
, int dir
)
3161 struct scatterlist
*sg
;
3163 for_each_sg(sglist
, sg
, nelems
, i
) {
3164 BUG_ON(!sg_page(sg
));
3165 sg
->dma_address
= page_to_phys(sg_page(sg
)) + sg
->offset
;
3166 sg
->dma_length
= sg
->length
;
3171 static int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
, int nelems
,
3172 enum dma_data_direction dir
, struct dma_attrs
*attrs
)
3175 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
3176 struct dmar_domain
*domain
;
3179 struct iova
*iova
= NULL
;
3181 struct scatterlist
*sg
;
3182 unsigned long start_vpfn
;
3183 struct intel_iommu
*iommu
;
3185 BUG_ON(dir
== DMA_NONE
);
3186 if (iommu_no_mapping(hwdev
))
3187 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
3189 domain
= get_valid_domain_for_dev(pdev
);
3193 iommu
= domain_get_iommu(domain
);
3195 for_each_sg(sglist
, sg
, nelems
, i
)
3196 size
+= aligned_nrpages(sg
->offset
, sg
->length
);
3198 iova
= intel_alloc_iova(hwdev
, domain
, dma_to_mm_pfn(size
),
3201 sglist
->dma_length
= 0;
3206 * Check if DMAR supports zero-length reads on write only
3209 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
3210 !cap_zlr(iommu
->cap
))
3211 prot
|= DMA_PTE_READ
;
3212 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
3213 prot
|= DMA_PTE_WRITE
;
3215 start_vpfn
= mm_to_dma_pfn(iova
->pfn_lo
);
3217 ret
= domain_sg_mapping(domain
, start_vpfn
, sglist
, size
, prot
);
3218 if (unlikely(ret
)) {
3219 /* clear the page */
3220 dma_pte_clear_range(domain
, start_vpfn
,
3221 start_vpfn
+ size
- 1);
3222 /* free page tables */
3223 dma_pte_free_pagetable(domain
, start_vpfn
,
3224 start_vpfn
+ size
- 1);
3226 __free_iova(&domain
->iovad
, iova
);
3230 /* it's a non-present to present mapping. Only flush if caching mode */
3231 if (cap_caching_mode(iommu
->cap
))
3232 iommu_flush_iotlb_psi(iommu
, domain
->id
, start_vpfn
, size
, 1);
3234 iommu_flush_write_buffer(iommu
);
3239 static int intel_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
3244 struct dma_map_ops intel_dma_ops
= {
3245 .alloc_coherent
= intel_alloc_coherent
,
3246 .free_coherent
= intel_free_coherent
,
3247 .map_sg
= intel_map_sg
,
3248 .unmap_sg
= intel_unmap_sg
,
3249 .map_page
= intel_map_page
,
3250 .unmap_page
= intel_unmap_page
,
3251 .mapping_error
= intel_mapping_error
,
3254 static inline int iommu_domain_cache_init(void)
3258 iommu_domain_cache
= kmem_cache_create("iommu_domain",
3259 sizeof(struct dmar_domain
),
3264 if (!iommu_domain_cache
) {
3265 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
3272 static inline int iommu_devinfo_cache_init(void)
3276 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
3277 sizeof(struct device_domain_info
),
3281 if (!iommu_devinfo_cache
) {
3282 printk(KERN_ERR
"Couldn't create devinfo cache\n");
3289 static inline int iommu_iova_cache_init(void)
3293 iommu_iova_cache
= kmem_cache_create("iommu_iova",
3294 sizeof(struct iova
),
3298 if (!iommu_iova_cache
) {
3299 printk(KERN_ERR
"Couldn't create iova cache\n");
3306 static int __init
iommu_init_mempool(void)
3309 ret
= iommu_iova_cache_init();
3313 ret
= iommu_domain_cache_init();
3317 ret
= iommu_devinfo_cache_init();
3321 kmem_cache_destroy(iommu_domain_cache
);
3323 kmem_cache_destroy(iommu_iova_cache
);
3328 static void __init
iommu_exit_mempool(void)
3330 kmem_cache_destroy(iommu_devinfo_cache
);
3331 kmem_cache_destroy(iommu_domain_cache
);
3332 kmem_cache_destroy(iommu_iova_cache
);
3336 static void quirk_ioat_snb_local_iommu(struct pci_dev
*pdev
)
3338 struct dmar_drhd_unit
*drhd
;
3342 /* We know that this device on this chipset has its own IOMMU.
3343 * If we find it under a different IOMMU, then the BIOS is lying
3344 * to us. Hope that the IOMMU for this device is actually
3345 * disabled, and it needs no translation...
3347 rc
= pci_bus_read_config_dword(pdev
->bus
, PCI_DEVFN(0, 0), 0xb0, &vtbar
);
3349 /* "can't" happen */
3350 dev_info(&pdev
->dev
, "failed to run vt-d quirk\n");
3353 vtbar
&= 0xffff0000;
3355 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3356 drhd
= dmar_find_matched_drhd_unit(pdev
);
3357 if (WARN_TAINT_ONCE(!drhd
|| drhd
->reg_base_addr
- vtbar
!= 0xa000,
3358 TAINT_FIRMWARE_WORKAROUND
,
3359 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3360 pdev
->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3362 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_IOAT_SNB
, quirk_ioat_snb_local_iommu
);
3364 static void __init
init_no_remapping_devices(void)
3366 struct dmar_drhd_unit
*drhd
;
3368 for_each_drhd_unit(drhd
) {
3369 if (!drhd
->include_all
) {
3371 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
3372 if (drhd
->devices
[i
] != NULL
)
3374 /* ignore DMAR unit if no pci devices exist */
3375 if (i
== drhd
->devices_cnt
)
3383 for_each_drhd_unit(drhd
) {
3385 if (drhd
->ignored
|| drhd
->include_all
)
3388 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
3389 if (drhd
->devices
[i
] &&
3390 !IS_GFX_DEVICE(drhd
->devices
[i
]))
3393 if (i
< drhd
->devices_cnt
)
3396 /* bypass IOMMU if it is just for gfx devices */
3398 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
3399 if (!drhd
->devices
[i
])
3401 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
3406 #ifdef CONFIG_SUSPEND
3407 static int init_iommu_hw(void)
3409 struct dmar_drhd_unit
*drhd
;
3410 struct intel_iommu
*iommu
= NULL
;
3412 for_each_active_iommu(iommu
, drhd
)
3414 dmar_reenable_qi(iommu
);
3416 for_each_iommu(iommu
, drhd
) {
3417 if (drhd
->ignored
) {
3419 * we always have to disable PMRs or DMA may fail on
3423 iommu_disable_protect_mem_regions(iommu
);
3427 iommu_flush_write_buffer(iommu
);
3429 iommu_set_root_entry(iommu
);
3431 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3432 DMA_CCMD_GLOBAL_INVL
);
3433 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3434 DMA_TLB_GLOBAL_FLUSH
);
3435 if (iommu_enable_translation(iommu
))
3437 iommu_disable_protect_mem_regions(iommu
);
3443 static void iommu_flush_all(void)
3445 struct dmar_drhd_unit
*drhd
;
3446 struct intel_iommu
*iommu
;
3448 for_each_active_iommu(iommu
, drhd
) {
3449 iommu
->flush
.flush_context(iommu
, 0, 0, 0,
3450 DMA_CCMD_GLOBAL_INVL
);
3451 iommu
->flush
.flush_iotlb(iommu
, 0, 0, 0,
3452 DMA_TLB_GLOBAL_FLUSH
);
3456 static int iommu_suspend(void)
3458 struct dmar_drhd_unit
*drhd
;
3459 struct intel_iommu
*iommu
= NULL
;
3462 for_each_active_iommu(iommu
, drhd
) {
3463 iommu
->iommu_state
= kzalloc(sizeof(u32
) * MAX_SR_DMAR_REGS
,
3465 if (!iommu
->iommu_state
)
3471 for_each_active_iommu(iommu
, drhd
) {
3472 iommu_disable_translation(iommu
);
3474 spin_lock_irqsave(&iommu
->register_lock
, flag
);
3476 iommu
->iommu_state
[SR_DMAR_FECTL_REG
] =
3477 readl(iommu
->reg
+ DMAR_FECTL_REG
);
3478 iommu
->iommu_state
[SR_DMAR_FEDATA_REG
] =
3479 readl(iommu
->reg
+ DMAR_FEDATA_REG
);
3480 iommu
->iommu_state
[SR_DMAR_FEADDR_REG
] =
3481 readl(iommu
->reg
+ DMAR_FEADDR_REG
);
3482 iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
] =
3483 readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
3485 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3490 for_each_active_iommu(iommu
, drhd
)
3491 kfree(iommu
->iommu_state
);
3496 static void iommu_resume(void)
3498 struct dmar_drhd_unit
*drhd
;
3499 struct intel_iommu
*iommu
= NULL
;
3502 if (init_iommu_hw()) {
3504 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3506 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3510 for_each_active_iommu(iommu
, drhd
) {
3512 spin_lock_irqsave(&iommu
->register_lock
, flag
);
3514 writel(iommu
->iommu_state
[SR_DMAR_FECTL_REG
],
3515 iommu
->reg
+ DMAR_FECTL_REG
);
3516 writel(iommu
->iommu_state
[SR_DMAR_FEDATA_REG
],
3517 iommu
->reg
+ DMAR_FEDATA_REG
);
3518 writel(iommu
->iommu_state
[SR_DMAR_FEADDR_REG
],
3519 iommu
->reg
+ DMAR_FEADDR_REG
);
3520 writel(iommu
->iommu_state
[SR_DMAR_FEUADDR_REG
],
3521 iommu
->reg
+ DMAR_FEUADDR_REG
);
3523 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
3526 for_each_active_iommu(iommu
, drhd
)
3527 kfree(iommu
->iommu_state
);
3530 static struct syscore_ops iommu_syscore_ops
= {
3531 .resume
= iommu_resume
,
3532 .suspend
= iommu_suspend
,
3535 static void __init
init_iommu_pm_ops(void)
3537 register_syscore_ops(&iommu_syscore_ops
);
3541 static inline void init_iommu_pm_ops(void) {}
3542 #endif /* CONFIG_PM */
3545 * Here we only respond to action of unbound device from driver.
3547 * Added device is not attached to its DMAR domain here yet. That will happen
3548 * when mapping the device to iova.
3550 static int device_notifier(struct notifier_block
*nb
,
3551 unsigned long action
, void *data
)
3553 struct device
*dev
= data
;
3554 struct pci_dev
*pdev
= to_pci_dev(dev
);
3555 struct dmar_domain
*domain
;
3557 if (unlikely(dev
->bus
!= &pci_bus_type
))
3561 case BUS_NOTIFY_UNBOUND_DRIVER
:
3562 if (iommu_no_mapping(dev
))
3565 if (iommu_pass_through
)
3568 domain
= find_domain(pdev
);
3572 domain_remove_one_dev_info(domain
, pdev
);
3574 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
3575 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
) &&
3576 list_empty(&domain
->devices
))
3577 domain_exit(domain
);
3579 remove_dev_from_drhd(pdev
);
3580 remove_dev_from_atsr(pdev
);
3583 case BUS_NOTIFY_ADD_DEVICE
:
3584 restore_dev_to_drhd(pdev
);
3585 restore_dev_to_atsr(pdev
);
3592 static struct notifier_block device_nb
= {
3593 .notifier_call
= device_notifier
,
3596 int __init
intel_iommu_init(void)
3600 /* VT-d is required for a TXT/tboot launch, so enforce that */
3601 force_on
= tboot_force_iommu();
3603 if (dmar_table_init()) {
3605 panic("tboot: Failed to initialize DMAR table\n");
3609 if (dmar_dev_scope_init()) {
3611 panic("tboot: Failed to initialize DMAR device scope\n");
3616 * Check the need for DMA-remapping initialization now.
3617 * Above initialization will also be used by Interrupt-remapping.
3619 if (no_iommu
|| dmar_disabled
)
3622 if (iommu_init_mempool()) {
3624 panic("tboot: Failed to initialize iommu memory\n");
3628 if (dmar_init_reserved_ranges()) {
3630 panic("tboot: Failed to reserve iommu ranges\n");
3634 init_no_remapping_devices();
3639 panic("tboot: Failed to initialize DMARs\n");
3640 printk(KERN_ERR
"IOMMU: dmar init failed\n");
3641 put_iova_domain(&reserved_iova_list
);
3642 iommu_exit_mempool();
3646 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3648 init_timer(&unmap_timer
);
3649 #ifdef CONFIG_SWIOTLB
3652 dma_ops
= &intel_dma_ops
;
3654 init_iommu_pm_ops();
3656 register_iommu(&intel_iommu_ops
);
3658 bus_register_notifier(&pci_bus_type
, &device_nb
);
3663 static void iommu_detach_dependent_devices(struct intel_iommu
*iommu
,
3664 struct pci_dev
*pdev
)
3666 struct pci_dev
*tmp
, *parent
;
3668 if (!iommu
|| !pdev
)
3671 /* dependent device detach */
3672 tmp
= pci_find_upstream_pcie_bridge(pdev
);
3673 /* Secondary interface's bus number and devfn 0 */
3675 parent
= pdev
->bus
->self
;
3676 while (parent
!= tmp
) {
3677 iommu_detach_dev(iommu
, parent
->bus
->number
,
3679 parent
= parent
->bus
->self
;
3681 if (pci_is_pcie(tmp
)) /* this is a PCIe-to-PCI bridge */
3682 iommu_detach_dev(iommu
,
3683 tmp
->subordinate
->number
, 0);
3684 else /* this is a legacy PCI bridge */
3685 iommu_detach_dev(iommu
, tmp
->bus
->number
,
3690 static void domain_remove_one_dev_info(struct dmar_domain
*domain
,
3691 struct pci_dev
*pdev
)
3693 struct device_domain_info
*info
;
3694 struct intel_iommu
*iommu
;
3695 unsigned long flags
;
3697 struct list_head
*entry
, *tmp
;
3699 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
3704 spin_lock_irqsave(&device_domain_lock
, flags
);
3705 list_for_each_safe(entry
, tmp
, &domain
->devices
) {
3706 info
= list_entry(entry
, struct device_domain_info
, link
);
3707 if (info
->segment
== pci_domain_nr(pdev
->bus
) &&
3708 info
->bus
== pdev
->bus
->number
&&
3709 info
->devfn
== pdev
->devfn
) {
3710 list_del(&info
->link
);
3711 list_del(&info
->global
);
3713 info
->dev
->dev
.archdata
.iommu
= NULL
;
3714 spin_unlock_irqrestore(&device_domain_lock
, flags
);
3716 iommu_disable_dev_iotlb(info
);
3717 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
3718 iommu_detach_dependent_devices(iommu
, pdev
);
3719 free_devinfo_mem(info
);
3721 spin_lock_irqsave(&device_domain_lock
, flags
);
3729 /* if there is no other devices under the same iommu
3730 * owned by this domain, clear this iommu in iommu_bmp
3731 * update iommu count and coherency
3733 if (iommu
== device_to_iommu(info
->segment
, info
->bus
,
3739 unsigned long tmp_flags
;
3740 spin_lock_irqsave(&domain
->iommu_lock
, tmp_flags
);
3741 clear_bit(iommu
->seq_id
, &domain
->iommu_bmp
);
3742 domain
->iommu_count
--;
3743 domain_update_iommu_cap(domain
);
3744 spin_unlock_irqrestore(&domain
->iommu_lock
, tmp_flags
);
3746 if (!(domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
) &&
3747 !(domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)) {
3748 spin_lock_irqsave(&iommu
->lock
, tmp_flags
);
3749 clear_bit(domain
->id
, iommu
->domain_ids
);
3750 iommu
->domains
[domain
->id
] = NULL
;
3751 spin_unlock_irqrestore(&iommu
->lock
, tmp_flags
);
3755 spin_unlock_irqrestore(&device_domain_lock
, flags
);
3758 static void vm_domain_remove_all_dev_info(struct dmar_domain
*domain
)
3760 struct device_domain_info
*info
;
3761 struct intel_iommu
*iommu
;
3762 unsigned long flags1
, flags2
;
3764 spin_lock_irqsave(&device_domain_lock
, flags1
);
3765 while (!list_empty(&domain
->devices
)) {
3766 info
= list_entry(domain
->devices
.next
,
3767 struct device_domain_info
, link
);
3768 list_del(&info
->link
);
3769 list_del(&info
->global
);
3771 info
->dev
->dev
.archdata
.iommu
= NULL
;
3773 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
3775 iommu_disable_dev_iotlb(info
);
3776 iommu
= device_to_iommu(info
->segment
, info
->bus
, info
->devfn
);
3777 iommu_detach_dev(iommu
, info
->bus
, info
->devfn
);
3778 iommu_detach_dependent_devices(iommu
, info
->dev
);
3780 /* clear this iommu in iommu_bmp, update iommu count
3783 spin_lock_irqsave(&domain
->iommu_lock
, flags2
);
3784 if (test_and_clear_bit(iommu
->seq_id
,
3785 &domain
->iommu_bmp
)) {
3786 domain
->iommu_count
--;
3787 domain_update_iommu_cap(domain
);
3789 spin_unlock_irqrestore(&domain
->iommu_lock
, flags2
);
3791 free_devinfo_mem(info
);
3792 spin_lock_irqsave(&device_domain_lock
, flags1
);
3794 spin_unlock_irqrestore(&device_domain_lock
, flags1
);
3797 /* domain id for virtual machine, it won't be set in context */
3798 static unsigned long vm_domid
;
3800 static struct dmar_domain
*iommu_alloc_vm_domain(void)
3802 struct dmar_domain
*domain
;
3804 domain
= alloc_domain_mem();
3808 domain
->id
= vm_domid
++;
3810 memset(&domain
->iommu_bmp
, 0, sizeof(unsigned long));
3811 domain
->flags
= DOMAIN_FLAG_VIRTUAL_MACHINE
;
3816 static int md_domain_init(struct dmar_domain
*domain
, int guest_width
)
3820 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
3821 spin_lock_init(&domain
->iommu_lock
);
3823 domain_reserve_special_ranges(domain
);
3825 /* calculate AGAW */
3826 domain
->gaw
= guest_width
;
3827 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
3828 domain
->agaw
= width_to_agaw(adjust_width
);
3830 INIT_LIST_HEAD(&domain
->devices
);
3832 domain
->iommu_count
= 0;
3833 domain
->iommu_coherency
= 0;
3834 domain
->iommu_snooping
= 0;
3835 domain
->iommu_superpage
= 0;
3836 domain
->max_addr
= 0;
3839 /* always allocate the top pgd */
3840 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page(domain
->nid
);
3843 domain_flush_cache(domain
, domain
->pgd
, PAGE_SIZE
);
3847 static void iommu_free_vm_domain(struct dmar_domain
*domain
)
3849 unsigned long flags
;
3850 struct dmar_drhd_unit
*drhd
;
3851 struct intel_iommu
*iommu
;
3853 unsigned long ndomains
;
3855 for_each_drhd_unit(drhd
) {
3858 iommu
= drhd
->iommu
;
3860 ndomains
= cap_ndoms(iommu
->cap
);
3861 for_each_set_bit(i
, iommu
->domain_ids
, ndomains
) {
3862 if (iommu
->domains
[i
] == domain
) {
3863 spin_lock_irqsave(&iommu
->lock
, flags
);
3864 clear_bit(i
, iommu
->domain_ids
);
3865 iommu
->domains
[i
] = NULL
;
3866 spin_unlock_irqrestore(&iommu
->lock
, flags
);
3873 static void vm_domain_exit(struct dmar_domain
*domain
)
3875 /* Domain 0 is reserved, so dont process it */
3879 vm_domain_remove_all_dev_info(domain
);
3881 put_iova_domain(&domain
->iovad
);
3884 dma_pte_clear_range(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
3886 /* free page tables */
3887 dma_pte_free_pagetable(domain
, 0, DOMAIN_MAX_PFN(domain
->gaw
));
3889 iommu_free_vm_domain(domain
);
3890 free_domain_mem(domain
);
3893 static int intel_iommu_domain_init(struct iommu_domain
*domain
)
3895 struct dmar_domain
*dmar_domain
;
3897 dmar_domain
= iommu_alloc_vm_domain();
3900 "intel_iommu_domain_init: dmar_domain == NULL\n");
3903 if (md_domain_init(dmar_domain
, DEFAULT_DOMAIN_ADDRESS_WIDTH
)) {
3905 "intel_iommu_domain_init() failed\n");
3906 vm_domain_exit(dmar_domain
);
3909 domain
->priv
= dmar_domain
;
3914 static void intel_iommu_domain_destroy(struct iommu_domain
*domain
)
3916 struct dmar_domain
*dmar_domain
= domain
->priv
;
3918 domain
->priv
= NULL
;
3919 vm_domain_exit(dmar_domain
);
3922 static int intel_iommu_attach_device(struct iommu_domain
*domain
,
3925 struct dmar_domain
*dmar_domain
= domain
->priv
;
3926 struct pci_dev
*pdev
= to_pci_dev(dev
);
3927 struct intel_iommu
*iommu
;
3930 /* normally pdev is not mapped */
3931 if (unlikely(domain_context_mapped(pdev
))) {
3932 struct dmar_domain
*old_domain
;
3934 old_domain
= find_domain(pdev
);
3936 if (dmar_domain
->flags
& DOMAIN_FLAG_VIRTUAL_MACHINE
||
3937 dmar_domain
->flags
& DOMAIN_FLAG_STATIC_IDENTITY
)
3938 domain_remove_one_dev_info(old_domain
, pdev
);
3940 domain_remove_dev_info(old_domain
);
3944 iommu
= device_to_iommu(pci_domain_nr(pdev
->bus
), pdev
->bus
->number
,
3949 /* check if this iommu agaw is sufficient for max mapped address */
3950 addr_width
= agaw_to_width(iommu
->agaw
);
3951 if (addr_width
> cap_mgaw(iommu
->cap
))
3952 addr_width
= cap_mgaw(iommu
->cap
);
3954 if (dmar_domain
->max_addr
> (1LL << addr_width
)) {
3955 printk(KERN_ERR
"%s: iommu width (%d) is not "
3956 "sufficient for the mapped address (%llx)\n",
3957 __func__
, addr_width
, dmar_domain
->max_addr
);
3960 dmar_domain
->gaw
= addr_width
;
3963 * Knock out extra levels of page tables if necessary
3965 while (iommu
->agaw
< dmar_domain
->agaw
) {
3966 struct dma_pte
*pte
;
3968 pte
= dmar_domain
->pgd
;
3969 if (dma_pte_present(pte
)) {
3970 dmar_domain
->pgd
= (struct dma_pte
*)
3971 phys_to_virt(dma_pte_addr(pte
));
3972 free_pgtable_page(pte
);
3974 dmar_domain
->agaw
--;
3977 return domain_add_dev_info(dmar_domain
, pdev
, CONTEXT_TT_MULTI_LEVEL
);
3980 static void intel_iommu_detach_device(struct iommu_domain
*domain
,
3983 struct dmar_domain
*dmar_domain
= domain
->priv
;
3984 struct pci_dev
*pdev
= to_pci_dev(dev
);
3986 domain_remove_one_dev_info(dmar_domain
, pdev
);
3989 static int intel_iommu_map(struct iommu_domain
*domain
,
3990 unsigned long iova
, phys_addr_t hpa
,
3991 int gfp_order
, int iommu_prot
)
3993 struct dmar_domain
*dmar_domain
= domain
->priv
;
3999 if (iommu_prot
& IOMMU_READ
)
4000 prot
|= DMA_PTE_READ
;
4001 if (iommu_prot
& IOMMU_WRITE
)
4002 prot
|= DMA_PTE_WRITE
;
4003 if ((iommu_prot
& IOMMU_CACHE
) && dmar_domain
->iommu_snooping
)
4004 prot
|= DMA_PTE_SNP
;
4006 size
= PAGE_SIZE
<< gfp_order
;
4007 max_addr
= iova
+ size
;
4008 if (dmar_domain
->max_addr
< max_addr
) {
4011 /* check if minimum agaw is sufficient for mapped address */
4012 end
= __DOMAIN_MAX_ADDR(dmar_domain
->gaw
) + 1;
4013 if (end
< max_addr
) {
4014 printk(KERN_ERR
"%s: iommu width (%d) is not "
4015 "sufficient for the mapped address (%llx)\n",
4016 __func__
, dmar_domain
->gaw
, max_addr
);
4019 dmar_domain
->max_addr
= max_addr
;
4021 /* Round up size to next multiple of PAGE_SIZE, if it and
4022 the low bits of hpa would take us onto the next page */
4023 size
= aligned_nrpages(hpa
, size
);
4024 ret
= domain_pfn_mapping(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
4025 hpa
>> VTD_PAGE_SHIFT
, size
, prot
);
4029 static int intel_iommu_unmap(struct iommu_domain
*domain
,
4030 unsigned long iova
, int gfp_order
)
4032 struct dmar_domain
*dmar_domain
= domain
->priv
;
4033 size_t size
= PAGE_SIZE
<< gfp_order
;
4035 dma_pte_clear_range(dmar_domain
, iova
>> VTD_PAGE_SHIFT
,
4036 (iova
+ size
- 1) >> VTD_PAGE_SHIFT
);
4038 if (dmar_domain
->max_addr
== iova
+ size
)
4039 dmar_domain
->max_addr
= iova
;
4044 static phys_addr_t
intel_iommu_iova_to_phys(struct iommu_domain
*domain
,
4047 struct dmar_domain
*dmar_domain
= domain
->priv
;
4048 struct dma_pte
*pte
;
4051 pte
= pfn_to_dma_pte(dmar_domain
, iova
>> VTD_PAGE_SHIFT
, 0);
4053 phys
= dma_pte_addr(pte
);
4058 static int intel_iommu_domain_has_cap(struct iommu_domain
*domain
,
4061 struct dmar_domain
*dmar_domain
= domain
->priv
;
4063 if (cap
== IOMMU_CAP_CACHE_COHERENCY
)
4064 return dmar_domain
->iommu_snooping
;
4065 if (cap
== IOMMU_CAP_INTR_REMAP
)
4066 return intr_remapping_enabled
;
4071 static struct iommu_ops intel_iommu_ops
= {
4072 .domain_init
= intel_iommu_domain_init
,
4073 .domain_destroy
= intel_iommu_domain_destroy
,
4074 .attach_dev
= intel_iommu_attach_device
,
4075 .detach_dev
= intel_iommu_detach_device
,
4076 .map
= intel_iommu_map
,
4077 .unmap
= intel_iommu_unmap
,
4078 .iova_to_phys
= intel_iommu_iova_to_phys
,
4079 .domain_has_cap
= intel_iommu_domain_has_cap
,
4082 static void __devinit
quirk_iommu_rwbf(struct pci_dev
*dev
)
4085 * Mobile 4 Series Chipset neglects to set RWBF capability,
4088 printk(KERN_INFO
"DMAR: Forcing write-buffer flush capability\n");
4091 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
4092 if (dev
->revision
== 0x07) {
4093 printk(KERN_INFO
"DMAR: Disabling IOMMU for graphics on this chipset\n");
4098 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x2a40, quirk_iommu_rwbf
);
4101 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4102 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4103 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4104 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4105 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4106 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4107 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4108 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4110 static void __devinit
quirk_calpella_no_shadow_gtt(struct pci_dev
*dev
)
4114 if (pci_read_config_word(dev
, GGC
, &ggc
))
4117 if (!(ggc
& GGC_MEMORY_VT_ENABLED
)) {
4118 printk(KERN_INFO
"DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4122 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0040, quirk_calpella_no_shadow_gtt
);
4123 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0044, quirk_calpella_no_shadow_gtt
);
4124 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x0062, quirk_calpella_no_shadow_gtt
);
4125 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL
, 0x006a, quirk_calpella_no_shadow_gtt
);
4127 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4128 ISOCH DMAR unit for the Azalia sound device, but not give it any
4129 TLB entries, which causes it to deadlock. Check for that. We do
4130 this in a function called from init_dmars(), instead of in a PCI
4131 quirk, because we don't want to print the obnoxious "BIOS broken"
4132 message if VT-d is actually disabled.
4134 static void __init
check_tylersburg_isoch(void)
4136 struct pci_dev
*pdev
;
4137 uint32_t vtisochctrl
;
4139 /* If there's no Azalia in the system anyway, forget it. */
4140 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x3a3e, NULL
);
4145 /* System Management Registers. Might be hidden, in which case
4146 we can't do the sanity check. But that's OK, because the
4147 known-broken BIOSes _don't_ actually hide it, so far. */
4148 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, 0x342e, NULL
);
4152 if (pci_read_config_dword(pdev
, 0x188, &vtisochctrl
)) {
4159 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4160 if (vtisochctrl
& 1)
4163 /* Drop all bits other than the number of TLB entries */
4164 vtisochctrl
&= 0x1c;
4166 /* If we have the recommended number of TLB entries (16), fine. */
4167 if (vtisochctrl
== 0x10)
4170 /* Zero TLB entries? You get to ride the short bus to school. */
4172 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4173 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4174 dmi_get_system_info(DMI_BIOS_VENDOR
),
4175 dmi_get_system_info(DMI_BIOS_VERSION
),
4176 dmi_get_system_info(DMI_PRODUCT_VERSION
));
4177 iommu_identity_mapping
|= IDENTMAP_AZALIA
;
4181 printk(KERN_WARNING
"DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",