2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 <<<<<<< HEAD:drivers/pci/intel-iommu.c
18 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
19 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
20 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
22 * Copyright (C) 2006-2008 Intel Corporation
23 * Author: Ashok Raj <ashok.raj@intel.com>
24 * Author: Shaohua Li <shaohua.li@intel.com>
25 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
26 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:drivers/pci/intel-iommu.c
29 #include <linux/init.h>
30 #include <linux/bitmap.h>
31 #include <linux/slab.h>
32 #include <linux/irq.h>
33 #include <linux/interrupt.h>
34 #include <linux/sysdev.h>
35 #include <linux/spinlock.h>
36 #include <linux/pci.h>
37 #include <linux/dmar.h>
38 #include <linux/dma-mapping.h>
39 #include <linux/mempool.h>
41 #include "intel-iommu.h"
42 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
43 #include <asm/cacheflush.h>
47 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
48 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
50 #define IOAPIC_RANGE_START (0xfee00000)
51 #define IOAPIC_RANGE_END (0xfeefffff)
52 #define IOVA_START_ADDR (0x1000)
54 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
56 #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
58 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
60 static void domain_remove_dev_info(struct dmar_domain
*domain
);
62 static int dmar_disabled
;
63 static int __initdata dmar_map_gfx
= 1;
64 static int dmar_forcedac
;
66 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
67 static DEFINE_SPINLOCK(device_domain_lock
);
68 static LIST_HEAD(device_domain_list
);
70 static int __init
intel_iommu_setup(char *str
)
75 if (!strncmp(str
, "off", 3)) {
77 printk(KERN_INFO
"Intel-IOMMU: disabled\n");
78 } else if (!strncmp(str
, "igfx_off", 8)) {
81 "Intel-IOMMU: disable GFX device mapping\n");
82 } else if (!strncmp(str
, "forcedac", 8)) {
84 "Intel-IOMMU: Forcing DAC for PCI devices\n");
88 str
+= strcspn(str
, ",");
94 __setup("intel_iommu=", intel_iommu_setup
);
96 static struct kmem_cache
*iommu_domain_cache
;
97 static struct kmem_cache
*iommu_devinfo_cache
;
98 static struct kmem_cache
*iommu_iova_cache
;
100 static inline void *iommu_kmem_cache_alloc(struct kmem_cache
*cachep
)
105 /* trying to avoid low memory issues */
106 flags
= current
->flags
& PF_MEMALLOC
;
107 current
->flags
|= PF_MEMALLOC
;
108 vaddr
= kmem_cache_alloc(cachep
, GFP_ATOMIC
);
109 current
->flags
&= (~PF_MEMALLOC
| flags
);
114 static inline void *alloc_pgtable_page(void)
119 /* trying to avoid low memory issues */
120 flags
= current
->flags
& PF_MEMALLOC
;
121 current
->flags
|= PF_MEMALLOC
;
122 vaddr
= (void *)get_zeroed_page(GFP_ATOMIC
);
123 current
->flags
&= (~PF_MEMALLOC
| flags
);
127 static inline void free_pgtable_page(void *vaddr
)
129 free_page((unsigned long)vaddr
);
132 static inline void *alloc_domain_mem(void)
134 return iommu_kmem_cache_alloc(iommu_domain_cache
);
137 static inline void free_domain_mem(void *vaddr
)
139 kmem_cache_free(iommu_domain_cache
, vaddr
);
142 static inline void * alloc_devinfo_mem(void)
144 return iommu_kmem_cache_alloc(iommu_devinfo_cache
);
147 static inline void free_devinfo_mem(void *vaddr
)
149 kmem_cache_free(iommu_devinfo_cache
, vaddr
);
152 struct iova
*alloc_iova_mem(void)
154 return iommu_kmem_cache_alloc(iommu_iova_cache
);
157 void free_iova_mem(struct iova
*iova
)
159 kmem_cache_free(iommu_iova_cache
, iova
);
162 static inline void __iommu_flush_cache(
163 struct intel_iommu
*iommu
, void *addr
, int size
)
165 if (!ecap_coherent(iommu
->ecap
))
166 clflush_cache_range(addr
, size
);
169 /* Gets context entry for a given bus and devfn */
170 static struct context_entry
* device_to_context_entry(struct intel_iommu
*iommu
,
173 struct root_entry
*root
;
174 struct context_entry
*context
;
175 unsigned long phy_addr
;
178 spin_lock_irqsave(&iommu
->lock
, flags
);
179 root
= &iommu
->root_entry
[bus
];
180 context
= get_context_addr_from_root(root
);
182 context
= (struct context_entry
*)alloc_pgtable_page();
184 spin_unlock_irqrestore(&iommu
->lock
, flags
);
187 __iommu_flush_cache(iommu
, (void *)context
, PAGE_SIZE_4K
);
188 phy_addr
= virt_to_phys((void *)context
);
189 set_root_value(root
, phy_addr
);
190 set_root_present(root
);
191 __iommu_flush_cache(iommu
, root
, sizeof(*root
));
193 spin_unlock_irqrestore(&iommu
->lock
, flags
);
194 return &context
[devfn
];
197 static int device_context_mapped(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
199 struct root_entry
*root
;
200 struct context_entry
*context
;
204 spin_lock_irqsave(&iommu
->lock
, flags
);
205 root
= &iommu
->root_entry
[bus
];
206 context
= get_context_addr_from_root(root
);
211 ret
= context_present(context
[devfn
]);
213 spin_unlock_irqrestore(&iommu
->lock
, flags
);
217 static void clear_context_table(struct intel_iommu
*iommu
, u8 bus
, u8 devfn
)
219 struct root_entry
*root
;
220 struct context_entry
*context
;
223 spin_lock_irqsave(&iommu
->lock
, flags
);
224 root
= &iommu
->root_entry
[bus
];
225 context
= get_context_addr_from_root(root
);
227 context_clear_entry(context
[devfn
]);
228 __iommu_flush_cache(iommu
, &context
[devfn
], \
231 spin_unlock_irqrestore(&iommu
->lock
, flags
);
234 static void free_context_table(struct intel_iommu
*iommu
)
236 struct root_entry
*root
;
239 struct context_entry
*context
;
241 spin_lock_irqsave(&iommu
->lock
, flags
);
242 if (!iommu
->root_entry
) {
245 for (i
= 0; i
< ROOT_ENTRY_NR
; i
++) {
246 root
= &iommu
->root_entry
[i
];
247 context
= get_context_addr_from_root(root
);
249 free_pgtable_page(context
);
251 free_pgtable_page(iommu
->root_entry
);
252 iommu
->root_entry
= NULL
;
254 spin_unlock_irqrestore(&iommu
->lock
, flags
);
257 /* page table handling */
258 #define LEVEL_STRIDE (9)
259 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
261 static inline int agaw_to_level(int agaw
)
266 static inline int agaw_to_width(int agaw
)
268 return 30 + agaw
* LEVEL_STRIDE
;
272 static inline int width_to_agaw(int width
)
274 return (width
- 30) / LEVEL_STRIDE
;
277 static inline unsigned int level_to_offset_bits(int level
)
279 return (12 + (level
- 1) * LEVEL_STRIDE
);
282 static inline int address_level_offset(u64 addr
, int level
)
284 return ((addr
>> level_to_offset_bits(level
)) & LEVEL_MASK
);
287 static inline u64
level_mask(int level
)
289 return ((u64
)-1 << level_to_offset_bits(level
));
292 static inline u64
level_size(int level
)
294 return ((u64
)1 << level_to_offset_bits(level
));
297 static inline u64
align_to_level(u64 addr
, int level
)
299 return ((addr
+ level_size(level
) - 1) & level_mask(level
));
302 static struct dma_pte
* addr_to_dma_pte(struct dmar_domain
*domain
, u64 addr
)
304 int addr_width
= agaw_to_width(domain
->agaw
);
305 struct dma_pte
*parent
, *pte
= NULL
;
306 int level
= agaw_to_level(domain
->agaw
);
310 BUG_ON(!domain
->pgd
);
312 addr
&= (((u64
)1) << addr_width
) - 1;
313 parent
= domain
->pgd
;
315 spin_lock_irqsave(&domain
->mapping_lock
, flags
);
319 offset
= address_level_offset(addr
, level
);
320 pte
= &parent
[offset
];
324 if (!dma_pte_present(*pte
)) {
325 tmp_page
= alloc_pgtable_page();
328 spin_unlock_irqrestore(&domain
->mapping_lock
,
332 __iommu_flush_cache(domain
->iommu
, tmp_page
,
334 dma_set_pte_addr(*pte
, virt_to_phys(tmp_page
));
336 * high level table always sets r/w, last level page
337 * table control read/write
339 dma_set_pte_readable(*pte
);
340 dma_set_pte_writable(*pte
);
341 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
343 parent
= phys_to_virt(dma_pte_addr(*pte
));
347 spin_unlock_irqrestore(&domain
->mapping_lock
, flags
);
351 /* return address's pte at specific level */
352 static struct dma_pte
*dma_addr_level_pte(struct dmar_domain
*domain
, u64 addr
,
355 struct dma_pte
*parent
, *pte
= NULL
;
356 int total
= agaw_to_level(domain
->agaw
);
359 parent
= domain
->pgd
;
360 while (level
<= total
) {
361 offset
= address_level_offset(addr
, total
);
362 pte
= &parent
[offset
];
366 if (!dma_pte_present(*pte
))
368 parent
= phys_to_virt(dma_pte_addr(*pte
));
374 /* clear one page's page table */
375 static void dma_pte_clear_one(struct dmar_domain
*domain
, u64 addr
)
377 struct dma_pte
*pte
= NULL
;
379 /* get last level pte */
380 pte
= dma_addr_level_pte(domain
, addr
, 1);
384 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
388 /* clear last level pte, a tlb flush should be followed */
389 static void dma_pte_clear_range(struct dmar_domain
*domain
, u64 start
, u64 end
)
391 int addr_width
= agaw_to_width(domain
->agaw
);
393 start
&= (((u64
)1) << addr_width
) - 1;
394 end
&= (((u64
)1) << addr_width
) - 1;
395 /* in case it's partial page */
396 start
= PAGE_ALIGN_4K(start
);
399 /* we don't need lock here, nobody else touches the iova range */
400 while (start
< end
) {
401 dma_pte_clear_one(domain
, start
);
402 start
+= PAGE_SIZE_4K
;
406 /* free page table pages. last level pte should already be cleared */
407 static void dma_pte_free_pagetable(struct dmar_domain
*domain
,
410 int addr_width
= agaw_to_width(domain
->agaw
);
412 int total
= agaw_to_level(domain
->agaw
);
416 start
&= (((u64
)1) << addr_width
) - 1;
417 end
&= (((u64
)1) << addr_width
) - 1;
419 /* we don't need lock here, nobody else touches the iova range */
421 while (level
<= total
) {
422 tmp
= align_to_level(start
, level
);
423 if (tmp
>= end
|| (tmp
+ level_size(level
) > end
))
427 pte
= dma_addr_level_pte(domain
, tmp
, level
);
430 phys_to_virt(dma_pte_addr(*pte
)));
432 __iommu_flush_cache(domain
->iommu
,
435 tmp
+= level_size(level
);
440 if (start
== 0 && end
>= ((((u64
)1) << addr_width
) - 1)) {
441 free_pgtable_page(domain
->pgd
);
447 static int iommu_alloc_root_entry(struct intel_iommu
*iommu
)
449 struct root_entry
*root
;
452 root
= (struct root_entry
*)alloc_pgtable_page();
456 __iommu_flush_cache(iommu
, root
, PAGE_SIZE_4K
);
458 spin_lock_irqsave(&iommu
->lock
, flags
);
459 iommu
->root_entry
= root
;
460 spin_unlock_irqrestore(&iommu
->lock
, flags
);
465 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
467 unsigned long start_time = jiffies;\
469 sts = op (iommu->reg + offset);\
472 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
473 panic("DMAR hardware is malfunctioning\n");\
478 static void iommu_set_root_entry(struct intel_iommu
*iommu
)
484 addr
= iommu
->root_entry
;
486 spin_lock_irqsave(&iommu
->register_lock
, flag
);
487 dmar_writeq(iommu
->reg
+ DMAR_RTADDR_REG
, virt_to_phys(addr
));
489 cmd
= iommu
->gcmd
| DMA_GCMD_SRTP
;
490 writel(cmd
, iommu
->reg
+ DMAR_GCMD_REG
);
492 /* Make sure hardware complete it */
493 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
494 readl
, (sts
& DMA_GSTS_RTPS
), sts
);
496 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
499 static void iommu_flush_write_buffer(struct intel_iommu
*iommu
)
504 if (!cap_rwbf(iommu
->cap
))
506 val
= iommu
->gcmd
| DMA_GCMD_WBF
;
508 spin_lock_irqsave(&iommu
->register_lock
, flag
);
509 writel(val
, iommu
->reg
+ DMAR_GCMD_REG
);
511 /* Make sure hardware complete it */
512 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
513 readl
, (!(val
& DMA_GSTS_WBFS
)), val
);
515 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
518 /* return value determine if we need a write buffer flush */
519 static int __iommu_flush_context(struct intel_iommu
*iommu
,
520 u16 did
, u16 source_id
, u8 function_mask
, u64 type
,
521 int non_present_entry_flush
)
527 * In the non-present entry flush case, if hardware doesn't cache
528 * non-present entry we do nothing and if hardware cache non-present
529 * entry, we flush entries of domain 0 (the domain id is used to cache
530 * any non-present entries)
532 if (non_present_entry_flush
) {
533 if (!cap_caching_mode(iommu
->cap
))
540 case DMA_CCMD_GLOBAL_INVL
:
541 val
= DMA_CCMD_GLOBAL_INVL
;
543 case DMA_CCMD_DOMAIN_INVL
:
544 val
= DMA_CCMD_DOMAIN_INVL
|DMA_CCMD_DID(did
);
546 case DMA_CCMD_DEVICE_INVL
:
547 val
= DMA_CCMD_DEVICE_INVL
|DMA_CCMD_DID(did
)
548 | DMA_CCMD_SID(source_id
) | DMA_CCMD_FM(function_mask
);
555 spin_lock_irqsave(&iommu
->register_lock
, flag
);
556 dmar_writeq(iommu
->reg
+ DMAR_CCMD_REG
, val
);
558 /* Make sure hardware complete it */
559 IOMMU_WAIT_OP(iommu
, DMAR_CCMD_REG
,
560 dmar_readq
, (!(val
& DMA_CCMD_ICC
)), val
);
562 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
564 /* flush context entry will implictly flush write buffer */
568 static int inline iommu_flush_context_global(struct intel_iommu
*iommu
,
569 int non_present_entry_flush
)
571 return __iommu_flush_context(iommu
, 0, 0, 0, DMA_CCMD_GLOBAL_INVL
,
572 non_present_entry_flush
);
575 static int inline iommu_flush_context_domain(struct intel_iommu
*iommu
, u16 did
,
576 int non_present_entry_flush
)
578 return __iommu_flush_context(iommu
, did
, 0, 0, DMA_CCMD_DOMAIN_INVL
,
579 non_present_entry_flush
);
582 static int inline iommu_flush_context_device(struct intel_iommu
*iommu
,
583 u16 did
, u16 source_id
, u8 function_mask
, int non_present_entry_flush
)
585 return __iommu_flush_context(iommu
, did
, source_id
, function_mask
,
586 DMA_CCMD_DEVICE_INVL
, non_present_entry_flush
);
589 /* return value determine if we need a write buffer flush */
590 static int __iommu_flush_iotlb(struct intel_iommu
*iommu
, u16 did
,
591 u64 addr
, unsigned int size_order
, u64 type
,
592 int non_present_entry_flush
)
594 int tlb_offset
= ecap_iotlb_offset(iommu
->ecap
);
595 u64 val
= 0, val_iva
= 0;
599 * In the non-present entry flush case, if hardware doesn't cache
600 * non-present entry we do nothing and if hardware cache non-present
601 * entry, we flush entries of domain 0 (the domain id is used to cache
602 * any non-present entries)
604 if (non_present_entry_flush
) {
605 if (!cap_caching_mode(iommu
->cap
))
612 case DMA_TLB_GLOBAL_FLUSH
:
613 /* global flush doesn't need set IVA_REG */
614 val
= DMA_TLB_GLOBAL_FLUSH
|DMA_TLB_IVT
;
616 case DMA_TLB_DSI_FLUSH
:
617 val
= DMA_TLB_DSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
619 case DMA_TLB_PSI_FLUSH
:
620 val
= DMA_TLB_PSI_FLUSH
|DMA_TLB_IVT
|DMA_TLB_DID(did
);
621 /* Note: always flush non-leaf currently */
622 val_iva
= size_order
| addr
;
627 /* Note: set drain read/write */
630 * This is probably to be super secure.. Looks like we can
631 * ignore it without any impact.
633 if (cap_read_drain(iommu
->cap
))
634 val
|= DMA_TLB_READ_DRAIN
;
636 if (cap_write_drain(iommu
->cap
))
637 val
|= DMA_TLB_WRITE_DRAIN
;
639 spin_lock_irqsave(&iommu
->register_lock
, flag
);
640 /* Note: Only uses first TLB reg currently */
642 dmar_writeq(iommu
->reg
+ tlb_offset
, val_iva
);
643 dmar_writeq(iommu
->reg
+ tlb_offset
+ 8, val
);
645 /* Make sure hardware complete it */
646 IOMMU_WAIT_OP(iommu
, tlb_offset
+ 8,
647 dmar_readq
, (!(val
& DMA_TLB_IVT
)), val
);
649 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
651 /* check IOTLB invalidation granularity */
652 if (DMA_TLB_IAIG(val
) == 0)
653 printk(KERN_ERR
"IOMMU: flush IOTLB failed\n");
654 if (DMA_TLB_IAIG(val
) != DMA_TLB_IIRG(type
))
655 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
656 DMA_TLB_IIRG(type
), DMA_TLB_IAIG(val
));
657 /* flush context entry will implictly flush write buffer */
661 static int inline iommu_flush_iotlb_global(struct intel_iommu
*iommu
,
662 int non_present_entry_flush
)
664 return __iommu_flush_iotlb(iommu
, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH
,
665 non_present_entry_flush
);
668 static int inline iommu_flush_iotlb_dsi(struct intel_iommu
*iommu
, u16 did
,
669 int non_present_entry_flush
)
671 return __iommu_flush_iotlb(iommu
, did
, 0, 0, DMA_TLB_DSI_FLUSH
,
672 non_present_entry_flush
);
675 static int iommu_flush_iotlb_psi(struct intel_iommu
*iommu
, u16 did
,
676 u64 addr
, unsigned int pages
, int non_present_entry_flush
)
680 BUG_ON(addr
& (~PAGE_MASK_4K
));
683 /* Fallback to domain selective flush if no PSI support */
684 if (!cap_pgsel_inv(iommu
->cap
))
685 return iommu_flush_iotlb_dsi(iommu
, did
,
686 non_present_entry_flush
);
689 * PSI requires page size to be 2 ^ x, and the base address is naturally
690 * aligned to the size
692 mask
= ilog2(__roundup_pow_of_two(pages
));
693 /* Fallback to domain selective flush if size is too big */
694 if (mask
> cap_max_amask_val(iommu
->cap
))
695 return iommu_flush_iotlb_dsi(iommu
, did
,
696 non_present_entry_flush
);
698 return __iommu_flush_iotlb(iommu
, did
, addr
, mask
,
699 DMA_TLB_PSI_FLUSH
, non_present_entry_flush
);
702 static void iommu_disable_protect_mem_regions(struct intel_iommu
*iommu
)
707 spin_lock_irqsave(&iommu
->register_lock
, flags
);
708 pmen
= readl(iommu
->reg
+ DMAR_PMEN_REG
);
709 pmen
&= ~DMA_PMEN_EPM
;
710 writel(pmen
, iommu
->reg
+ DMAR_PMEN_REG
);
712 /* wait for the protected region status bit to clear */
713 IOMMU_WAIT_OP(iommu
, DMAR_PMEN_REG
,
714 readl
, !(pmen
& DMA_PMEN_PRS
), pmen
);
716 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
719 static int iommu_enable_translation(struct intel_iommu
*iommu
)
724 spin_lock_irqsave(&iommu
->register_lock
, flags
);
725 writel(iommu
->gcmd
|DMA_GCMD_TE
, iommu
->reg
+ DMAR_GCMD_REG
);
727 /* Make sure hardware complete it */
728 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
729 readl
, (sts
& DMA_GSTS_TES
), sts
);
731 iommu
->gcmd
|= DMA_GCMD_TE
;
732 spin_unlock_irqrestore(&iommu
->register_lock
, flags
);
736 static int iommu_disable_translation(struct intel_iommu
*iommu
)
741 spin_lock_irqsave(&iommu
->register_lock
, flag
);
742 iommu
->gcmd
&= ~DMA_GCMD_TE
;
743 writel(iommu
->gcmd
, iommu
->reg
+ DMAR_GCMD_REG
);
745 /* Make sure hardware complete it */
746 IOMMU_WAIT_OP(iommu
, DMAR_GSTS_REG
,
747 readl
, (!(sts
& DMA_GSTS_TES
)), sts
);
749 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
753 /* iommu interrupt handling. Most stuff are MSI-like. */
755 static const char *fault_reason_strings
[] =
758 "Present bit in root entry is clear",
759 "Present bit in context entry is clear",
760 "Invalid context entry",
761 "Access beyond MGAW",
762 "PTE Write access is not set",
763 "PTE Read access is not set",
764 "Next page table ptr is invalid",
765 "Root table address invalid",
766 "Context table ptr is invalid",
767 "non-zero reserved fields in RTP",
768 "non-zero reserved fields in CTP",
769 "non-zero reserved fields in PTE",
771 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
773 const char *dmar_get_fault_reason(u8 fault_reason
)
775 if (fault_reason
> MAX_FAULT_REASON_IDX
)
778 return fault_reason_strings
[fault_reason
];
781 void dmar_msi_unmask(unsigned int irq
)
783 struct intel_iommu
*iommu
= get_irq_data(irq
);
787 spin_lock_irqsave(&iommu
->register_lock
, flag
);
788 writel(0, iommu
->reg
+ DMAR_FECTL_REG
);
789 /* Read a reg to force flush the post write */
790 readl(iommu
->reg
+ DMAR_FECTL_REG
);
791 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
794 void dmar_msi_mask(unsigned int irq
)
797 struct intel_iommu
*iommu
= get_irq_data(irq
);
800 spin_lock_irqsave(&iommu
->register_lock
, flag
);
801 writel(DMA_FECTL_IM
, iommu
->reg
+ DMAR_FECTL_REG
);
802 /* Read a reg to force flush the post write */
803 readl(iommu
->reg
+ DMAR_FECTL_REG
);
804 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
807 void dmar_msi_write(int irq
, struct msi_msg
*msg
)
809 struct intel_iommu
*iommu
= get_irq_data(irq
);
812 spin_lock_irqsave(&iommu
->register_lock
, flag
);
813 writel(msg
->data
, iommu
->reg
+ DMAR_FEDATA_REG
);
814 writel(msg
->address_lo
, iommu
->reg
+ DMAR_FEADDR_REG
);
815 writel(msg
->address_hi
, iommu
->reg
+ DMAR_FEUADDR_REG
);
816 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
819 void dmar_msi_read(int irq
, struct msi_msg
*msg
)
821 struct intel_iommu
*iommu
= get_irq_data(irq
);
824 spin_lock_irqsave(&iommu
->register_lock
, flag
);
825 msg
->data
= readl(iommu
->reg
+ DMAR_FEDATA_REG
);
826 msg
->address_lo
= readl(iommu
->reg
+ DMAR_FEADDR_REG
);
827 msg
->address_hi
= readl(iommu
->reg
+ DMAR_FEUADDR_REG
);
828 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
831 static int iommu_page_fault_do_one(struct intel_iommu
*iommu
, int type
,
832 u8 fault_reason
, u16 source_id
, u64 addr
)
836 reason
= dmar_get_fault_reason(fault_reason
);
839 "DMAR:[%s] Request device [%02x:%02x.%d] "
841 "DMAR:[fault reason %02d] %s\n",
842 (type
? "DMA Read" : "DMA Write"),
843 (source_id
>> 8), PCI_SLOT(source_id
& 0xFF),
844 PCI_FUNC(source_id
& 0xFF), addr
, fault_reason
, reason
);
848 #define PRIMARY_FAULT_REG_LEN (16)
849 static irqreturn_t
iommu_page_fault(int irq
, void *dev_id
)
851 struct intel_iommu
*iommu
= dev_id
;
852 int reg
, fault_index
;
856 spin_lock_irqsave(&iommu
->register_lock
, flag
);
857 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
859 /* TBD: ignore advanced fault log currently */
860 if (!(fault_status
& DMA_FSTS_PPF
))
863 fault_index
= dma_fsts_fault_record_index(fault_status
);
864 reg
= cap_fault_reg_offset(iommu
->cap
);
872 /* highest 32 bits */
873 data
= readl(iommu
->reg
+ reg
+
874 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
875 if (!(data
& DMA_FRCD_F
))
878 fault_reason
= dma_frcd_fault_reason(data
);
879 type
= dma_frcd_type(data
);
881 data
= readl(iommu
->reg
+ reg
+
882 fault_index
* PRIMARY_FAULT_REG_LEN
+ 8);
883 source_id
= dma_frcd_source_id(data
);
885 guest_addr
= dmar_readq(iommu
->reg
+ reg
+
886 fault_index
* PRIMARY_FAULT_REG_LEN
);
887 guest_addr
= dma_frcd_page_addr(guest_addr
);
888 /* clear the fault */
889 writel(DMA_FRCD_F
, iommu
->reg
+ reg
+
890 fault_index
* PRIMARY_FAULT_REG_LEN
+ 12);
892 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
894 iommu_page_fault_do_one(iommu
, type
, fault_reason
,
895 source_id
, guest_addr
);
898 if (fault_index
> cap_num_fault_regs(iommu
->cap
))
900 spin_lock_irqsave(&iommu
->register_lock
, flag
);
903 /* clear primary fault overflow */
904 fault_status
= readl(iommu
->reg
+ DMAR_FSTS_REG
);
905 if (fault_status
& DMA_FSTS_PFO
)
906 writel(DMA_FSTS_PFO
, iommu
->reg
+ DMAR_FSTS_REG
);
908 spin_unlock_irqrestore(&iommu
->register_lock
, flag
);
912 int dmar_set_interrupt(struct intel_iommu
*iommu
)
918 printk(KERN_ERR
"IOMMU: no free vectors\n");
922 set_irq_data(irq
, iommu
);
925 ret
= arch_setup_dmar_msi(irq
);
927 set_irq_data(irq
, NULL
);
933 /* Force fault register is cleared */
934 iommu_page_fault(irq
, iommu
);
936 ret
= request_irq(irq
, iommu_page_fault
, 0, iommu
->name
, iommu
);
938 printk(KERN_ERR
"IOMMU: can't request irq\n");
942 static int iommu_init_domains(struct intel_iommu
*iommu
)
944 unsigned long ndomains
;
945 unsigned long nlongs
;
947 ndomains
= cap_ndoms(iommu
->cap
);
948 pr_debug("Number of Domains supportd <%ld>\n", ndomains
);
949 nlongs
= BITS_TO_LONGS(ndomains
);
951 /* TBD: there might be 64K domains,
952 * consider other allocation for future chip
954 iommu
->domain_ids
= kcalloc(nlongs
, sizeof(unsigned long), GFP_KERNEL
);
955 if (!iommu
->domain_ids
) {
956 printk(KERN_ERR
"Allocating domain id array failed\n");
959 iommu
->domains
= kcalloc(ndomains
, sizeof(struct dmar_domain
*),
961 if (!iommu
->domains
) {
962 printk(KERN_ERR
"Allocating domain array failed\n");
963 kfree(iommu
->domain_ids
);
968 * if Caching mode is set, then invalid translations are tagged
969 * with domainid 0. Hence we need to pre-allocate it.
971 if (cap_caching_mode(iommu
->cap
))
972 set_bit(0, iommu
->domain_ids
);
976 static struct intel_iommu
*alloc_iommu(struct dmar_drhd_unit
*drhd
)
978 struct intel_iommu
*iommu
;
983 iommu
= kzalloc(sizeof(*iommu
), GFP_KERNEL
);
986 iommu
->reg
= ioremap(drhd
->reg_base_addr
, PAGE_SIZE_4K
);
988 printk(KERN_ERR
"IOMMU: can't map the region\n");
991 iommu
->cap
= dmar_readq(iommu
->reg
+ DMAR_CAP_REG
);
992 iommu
->ecap
= dmar_readq(iommu
->reg
+ DMAR_ECAP_REG
);
994 /* the registers might be more than one page */
995 map_size
= max_t(int, ecap_max_iotlb_offset(iommu
->ecap
),
996 cap_max_fault_reg_offset(iommu
->cap
));
997 map_size
= PAGE_ALIGN_4K(map_size
);
998 if (map_size
> PAGE_SIZE_4K
) {
1000 iommu
->reg
= ioremap(drhd
->reg_base_addr
, map_size
);
1002 printk(KERN_ERR
"IOMMU: can't map the region\n");
1007 ver
= readl(iommu
->reg
+ DMAR_VER_REG
);
1008 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1009 drhd
->reg_base_addr
, DMAR_VER_MAJOR(ver
), DMAR_VER_MINOR(ver
),
1010 iommu
->cap
, iommu
->ecap
);
1011 ret
= iommu_init_domains(iommu
);
1014 spin_lock_init(&iommu
->lock
);
1015 spin_lock_init(&iommu
->register_lock
);
1017 drhd
->iommu
= iommu
;
1020 iounmap(iommu
->reg
);
1026 static void domain_exit(struct dmar_domain
*domain
);
1027 static void free_iommu(struct intel_iommu
*iommu
)
1029 struct dmar_domain
*domain
;
1035 i
= find_first_bit(iommu
->domain_ids
, cap_ndoms(iommu
->cap
));
1036 for (; i
< cap_ndoms(iommu
->cap
); ) {
1037 domain
= iommu
->domains
[i
];
1038 clear_bit(i
, iommu
->domain_ids
);
1039 domain_exit(domain
);
1040 i
= find_next_bit(iommu
->domain_ids
,
1041 cap_ndoms(iommu
->cap
), i
+1);
1044 if (iommu
->gcmd
& DMA_GCMD_TE
)
1045 iommu_disable_translation(iommu
);
1048 set_irq_data(iommu
->irq
, NULL
);
1049 /* This will mask the irq */
1050 free_irq(iommu
->irq
, iommu
);
1051 destroy_irq(iommu
->irq
);
1054 kfree(iommu
->domains
);
1055 kfree(iommu
->domain_ids
);
1057 /* free context mapping */
1058 free_context_table(iommu
);
1061 iounmap(iommu
->reg
);
1065 static struct dmar_domain
* iommu_alloc_domain(struct intel_iommu
*iommu
)
1068 unsigned long ndomains
;
1069 struct dmar_domain
*domain
;
1070 unsigned long flags
;
1072 domain
= alloc_domain_mem();
1076 ndomains
= cap_ndoms(iommu
->cap
);
1078 spin_lock_irqsave(&iommu
->lock
, flags
);
1079 num
= find_first_zero_bit(iommu
->domain_ids
, ndomains
);
1080 if (num
>= ndomains
) {
1081 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1082 free_domain_mem(domain
);
1083 printk(KERN_ERR
"IOMMU: no free domain ids\n");
1087 set_bit(num
, iommu
->domain_ids
);
1089 domain
->iommu
= iommu
;
1090 iommu
->domains
[num
] = domain
;
1091 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1096 static void iommu_free_domain(struct dmar_domain
*domain
)
1098 unsigned long flags
;
1100 spin_lock_irqsave(&domain
->iommu
->lock
, flags
);
1101 clear_bit(domain
->id
, domain
->iommu
->domain_ids
);
1102 spin_unlock_irqrestore(&domain
->iommu
->lock
, flags
);
1105 static struct iova_domain reserved_iova_list
;
1107 static void dmar_init_reserved_ranges(void)
1109 struct pci_dev
*pdev
= NULL
;
1114 init_iova_domain(&reserved_iova_list
, DMA_32BIT_PFN
);
1116 /* IOAPIC ranges shouldn't be accessed by DMA */
1117 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(IOAPIC_RANGE_START
),
1118 IOVA_PFN(IOAPIC_RANGE_END
));
1120 printk(KERN_ERR
"Reserve IOAPIC range failed\n");
1122 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1123 for_each_pci_dev(pdev
) {
1126 for (i
= 0; i
< PCI_NUM_RESOURCES
; i
++) {
1127 r
= &pdev
->resource
[i
];
1128 if (!r
->flags
|| !(r
->flags
& IORESOURCE_MEM
))
1131 addr
&= PAGE_MASK_4K
;
1132 size
= r
->end
- addr
;
1133 size
= PAGE_ALIGN_4K(size
);
1134 iova
= reserve_iova(&reserved_iova_list
, IOVA_PFN(addr
),
1135 IOVA_PFN(size
+ addr
) - 1);
1137 printk(KERN_ERR
"Reserve iova failed\n");
1143 static void domain_reserve_special_ranges(struct dmar_domain
*domain
)
1145 copy_reserved_iova(&reserved_iova_list
, &domain
->iovad
);
1148 static inline int guestwidth_to_adjustwidth(int gaw
)
1151 int r
= (gaw
- 12) % 9;
1162 static int domain_init(struct dmar_domain
*domain
, int guest_width
)
1164 struct intel_iommu
*iommu
;
1165 int adjust_width
, agaw
;
1166 unsigned long sagaw
;
1168 init_iova_domain(&domain
->iovad
, DMA_32BIT_PFN
);
1169 spin_lock_init(&domain
->mapping_lock
);
1171 domain_reserve_special_ranges(domain
);
1173 /* calculate AGAW */
1174 iommu
= domain
->iommu
;
1175 if (guest_width
> cap_mgaw(iommu
->cap
))
1176 guest_width
= cap_mgaw(iommu
->cap
);
1177 domain
->gaw
= guest_width
;
1178 adjust_width
= guestwidth_to_adjustwidth(guest_width
);
1179 agaw
= width_to_agaw(adjust_width
);
1180 sagaw
= cap_sagaw(iommu
->cap
);
1181 if (!test_bit(agaw
, &sagaw
)) {
1182 /* hardware doesn't support it, choose a bigger one */
1183 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw
);
1184 agaw
= find_next_bit(&sagaw
, 5, agaw
);
1188 domain
->agaw
= agaw
;
1189 INIT_LIST_HEAD(&domain
->devices
);
1191 /* always allocate the top pgd */
1192 domain
->pgd
= (struct dma_pte
*)alloc_pgtable_page();
1195 __iommu_flush_cache(iommu
, domain
->pgd
, PAGE_SIZE_4K
);
1199 static void domain_exit(struct dmar_domain
*domain
)
1203 /* Domain 0 is reserved, so dont process it */
1207 domain_remove_dev_info(domain
);
1209 put_iova_domain(&domain
->iovad
);
1210 end
= DOMAIN_MAX_ADDR(domain
->gaw
);
1211 end
= end
& (~PAGE_MASK_4K
);
1214 dma_pte_clear_range(domain
, 0, end
);
1216 /* free page tables */
1217 dma_pte_free_pagetable(domain
, 0, end
);
1219 iommu_free_domain(domain
);
1220 free_domain_mem(domain
);
1223 static int domain_context_mapping_one(struct dmar_domain
*domain
,
1226 struct context_entry
*context
;
1227 struct intel_iommu
*iommu
= domain
->iommu
;
1228 unsigned long flags
;
1230 pr_debug("Set context mapping for %02x:%02x.%d\n",
1231 bus
, PCI_SLOT(devfn
), PCI_FUNC(devfn
));
1232 BUG_ON(!domain
->pgd
);
1233 context
= device_to_context_entry(iommu
, bus
, devfn
);
1236 spin_lock_irqsave(&iommu
->lock
, flags
);
1237 if (context_present(*context
)) {
1238 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1242 context_set_domain_id(*context
, domain
->id
);
1243 context_set_address_width(*context
, domain
->agaw
);
1244 context_set_address_root(*context
, virt_to_phys(domain
->pgd
));
1245 context_set_translation_type(*context
, CONTEXT_TT_MULTI_LEVEL
);
1246 context_set_fault_enable(*context
);
1247 context_set_present(*context
);
1248 __iommu_flush_cache(iommu
, context
, sizeof(*context
));
1250 /* it's a non-present to present mapping */
1251 if (iommu_flush_context_device(iommu
, domain
->id
,
1252 (((u16
)bus
) << 8) | devfn
, DMA_CCMD_MASK_NOBIT
, 1))
1253 iommu_flush_write_buffer(iommu
);
1255 iommu_flush_iotlb_dsi(iommu
, 0, 0);
1256 spin_unlock_irqrestore(&iommu
->lock
, flags
);
1261 domain_context_mapping(struct dmar_domain
*domain
, struct pci_dev
*pdev
)
1264 struct pci_dev
*tmp
, *parent
;
1266 ret
= domain_context_mapping_one(domain
, pdev
->bus
->number
,
1271 /* dependent device mapping */
1272 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1275 /* Secondary interface's bus number and devfn 0 */
1276 parent
= pdev
->bus
->self
;
1277 while (parent
!= tmp
) {
1278 ret
= domain_context_mapping_one(domain
, parent
->bus
->number
,
1282 parent
= parent
->bus
->self
;
1284 if (tmp
->is_pcie
) /* this is a PCIE-to-PCI bridge */
1285 return domain_context_mapping_one(domain
,
1286 tmp
->subordinate
->number
, 0);
1287 else /* this is a legacy PCI bridge */
1288 return domain_context_mapping_one(domain
,
1289 tmp
->bus
->number
, tmp
->devfn
);
1292 static int domain_context_mapped(struct dmar_domain
*domain
,
1293 struct pci_dev
*pdev
)
1296 struct pci_dev
*tmp
, *parent
;
1298 ret
= device_context_mapped(domain
->iommu
,
1299 pdev
->bus
->number
, pdev
->devfn
);
1302 /* dependent device mapping */
1303 tmp
= pci_find_upstream_pcie_bridge(pdev
);
1306 /* Secondary interface's bus number and devfn 0 */
1307 parent
= pdev
->bus
->self
;
1308 while (parent
!= tmp
) {
1309 ret
= device_context_mapped(domain
->iommu
, parent
->bus
->number
,
1313 parent
= parent
->bus
->self
;
1316 return device_context_mapped(domain
->iommu
,
1317 tmp
->subordinate
->number
, 0);
1319 return device_context_mapped(domain
->iommu
,
1320 tmp
->bus
->number
, tmp
->devfn
);
1324 domain_page_mapping(struct dmar_domain
*domain
, dma_addr_t iova
,
1325 u64 hpa
, size_t size
, int prot
)
1327 u64 start_pfn
, end_pfn
;
1328 struct dma_pte
*pte
;
1331 if ((prot
& (DMA_PTE_READ
|DMA_PTE_WRITE
)) == 0)
1333 iova
&= PAGE_MASK_4K
;
1334 start_pfn
= ((u64
)hpa
) >> PAGE_SHIFT_4K
;
1335 end_pfn
= (PAGE_ALIGN_4K(((u64
)hpa
) + size
)) >> PAGE_SHIFT_4K
;
1337 while (start_pfn
< end_pfn
) {
1338 pte
= addr_to_dma_pte(domain
, iova
+ PAGE_SIZE_4K
* index
);
1341 /* We don't need lock here, nobody else
1342 * touches the iova range
1344 BUG_ON(dma_pte_addr(*pte
));
1345 dma_set_pte_addr(*pte
, start_pfn
<< PAGE_SHIFT_4K
);
1346 dma_set_pte_prot(*pte
, prot
);
1347 __iommu_flush_cache(domain
->iommu
, pte
, sizeof(*pte
));
1354 static void detach_domain_for_dev(struct dmar_domain
*domain
, u8 bus
, u8 devfn
)
1356 clear_context_table(domain
->iommu
, bus
, devfn
);
1357 iommu_flush_context_global(domain
->iommu
, 0);
1358 iommu_flush_iotlb_global(domain
->iommu
, 0);
1361 static void domain_remove_dev_info(struct dmar_domain
*domain
)
1363 struct device_domain_info
*info
;
1364 unsigned long flags
;
1366 spin_lock_irqsave(&device_domain_lock
, flags
);
1367 while (!list_empty(&domain
->devices
)) {
1368 info
= list_entry(domain
->devices
.next
,
1369 struct device_domain_info
, link
);
1370 list_del(&info
->link
);
1371 list_del(&info
->global
);
1373 info
->dev
->dev
.archdata
.iommu
= NULL
;
1374 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1376 detach_domain_for_dev(info
->domain
, info
->bus
, info
->devfn
);
1377 free_devinfo_mem(info
);
1379 spin_lock_irqsave(&device_domain_lock
, flags
);
1381 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1386 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1388 struct dmar_domain
*
1389 find_domain(struct pci_dev
*pdev
)
1391 struct device_domain_info
*info
;
1393 /* No lock here, assumes no domain exit in normal case */
1394 info
= pdev
->dev
.archdata
.iommu
;
1396 return info
->domain
;
1400 static int dmar_pci_device_match(struct pci_dev
*devices
[], int cnt
,
1401 struct pci_dev
*dev
)
1406 for (index
= 0; index
< cnt
; index
++)
1407 if (dev
== devices
[index
])
1410 /* Check our parent */
1411 dev
= dev
->bus
->self
;
1417 static struct dmar_drhd_unit
*
1418 dmar_find_matched_drhd_unit(struct pci_dev
*dev
)
1420 struct dmar_drhd_unit
*drhd
= NULL
;
1422 list_for_each_entry(drhd
, &dmar_drhd_units
, list
) {
1423 if (drhd
->include_all
|| dmar_pci_device_match(drhd
->devices
,
1424 drhd
->devices_cnt
, dev
))
1431 /* domain is initialized */
1432 static struct dmar_domain
*get_domain_for_dev(struct pci_dev
*pdev
, int gaw
)
1434 struct dmar_domain
*domain
, *found
= NULL
;
1435 struct intel_iommu
*iommu
;
1436 struct dmar_drhd_unit
*drhd
;
1437 struct device_domain_info
*info
, *tmp
;
1438 struct pci_dev
*dev_tmp
;
1439 unsigned long flags
;
1440 int bus
= 0, devfn
= 0;
1442 domain
= find_domain(pdev
);
1446 dev_tmp
= pci_find_upstream_pcie_bridge(pdev
);
1448 if (dev_tmp
->is_pcie
) {
1449 bus
= dev_tmp
->subordinate
->number
;
1452 bus
= dev_tmp
->bus
->number
;
1453 devfn
= dev_tmp
->devfn
;
1455 spin_lock_irqsave(&device_domain_lock
, flags
);
1456 list_for_each_entry(info
, &device_domain_list
, global
) {
1457 if (info
->bus
== bus
&& info
->devfn
== devfn
) {
1458 found
= info
->domain
;
1462 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1463 /* pcie-pci bridge already has a domain, uses it */
1470 /* Allocate new domain for the device */
1471 drhd
= dmar_find_matched_drhd_unit(pdev
);
1473 printk(KERN_ERR
"IOMMU: can't find DMAR for device %s\n",
1477 iommu
= drhd
->iommu
;
1479 domain
= iommu_alloc_domain(iommu
);
1483 if (domain_init(domain
, gaw
)) {
1484 domain_exit(domain
);
1488 /* register pcie-to-pci device */
1490 info
= alloc_devinfo_mem();
1492 domain_exit(domain
);
1496 info
->devfn
= devfn
;
1498 info
->domain
= domain
;
1499 /* This domain is shared by devices under p2p bridge */
1500 domain
->flags
|= DOMAIN_FLAG_MULTIPLE_DEVICES
;
1502 /* pcie-to-pci bridge already has a domain, uses it */
1504 spin_lock_irqsave(&device_domain_lock
, flags
);
1505 list_for_each_entry(tmp
, &device_domain_list
, global
) {
1506 if (tmp
->bus
== bus
&& tmp
->devfn
== devfn
) {
1507 found
= tmp
->domain
;
1512 free_devinfo_mem(info
);
1513 domain_exit(domain
);
1516 list_add(&info
->link
, &domain
->devices
);
1517 list_add(&info
->global
, &device_domain_list
);
1519 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1523 info
= alloc_devinfo_mem();
1526 info
->bus
= pdev
->bus
->number
;
1527 info
->devfn
= pdev
->devfn
;
1529 info
->domain
= domain
;
1530 spin_lock_irqsave(&device_domain_lock
, flags
);
1531 /* somebody is fast */
1532 found
= find_domain(pdev
);
1533 if (found
!= NULL
) {
1534 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1535 if (found
!= domain
) {
1536 domain_exit(domain
);
1539 free_devinfo_mem(info
);
1542 list_add(&info
->link
, &domain
->devices
);
1543 list_add(&info
->global
, &device_domain_list
);
1544 pdev
->dev
.archdata
.iommu
= info
;
1545 spin_unlock_irqrestore(&device_domain_lock
, flags
);
1548 /* recheck it here, maybe others set it */
1549 return find_domain(pdev
);
1552 static int iommu_prepare_identity_map(struct pci_dev
*pdev
, u64 start
, u64 end
)
1554 struct dmar_domain
*domain
;
1560 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1561 pci_name(pdev
), start
, end
);
1562 /* page table init */
1563 domain
= get_domain_for_dev(pdev
, DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1567 /* The address might not be aligned */
1568 base
= start
& PAGE_MASK_4K
;
1570 size
= PAGE_ALIGN_4K(size
);
1571 if (!reserve_iova(&domain
->iovad
, IOVA_PFN(base
),
1572 IOVA_PFN(base
+ size
) - 1)) {
1573 printk(KERN_ERR
"IOMMU: reserve iova failed\n");
1578 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1579 size
, base
, pci_name(pdev
));
1581 * RMRR range might have overlap with physical memory range,
1584 dma_pte_clear_range(domain
, base
, base
+ size
);
1586 ret
= domain_page_mapping(domain
, base
, base
, size
,
1587 DMA_PTE_READ
|DMA_PTE_WRITE
);
1591 /* context entry init */
1592 ret
= domain_context_mapping(domain
, pdev
);
1596 domain_exit(domain
);
1601 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit
*rmrr
,
1602 struct pci_dev
*pdev
)
1604 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1606 return iommu_prepare_identity_map(pdev
, rmrr
->base_address
,
1607 rmrr
->end_address
+ 1);
1610 #ifdef CONFIG_DMAR_GFX_WA
1611 extern int arch_get_ram_range(int slot
, u64
*addr
, u64
*size
);
1612 static void __init
iommu_prepare_gfx_mapping(void)
1614 struct pci_dev
*pdev
= NULL
;
1619 for_each_pci_dev(pdev
) {
1620 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
||
1621 !IS_GFX_DEVICE(pdev
))
1623 printk(KERN_INFO
"IOMMU: gfx device %s 1-1 mapping\n",
1625 slot
= arch_get_ram_range(0, &base
, &size
);
1627 ret
= iommu_prepare_identity_map(pdev
,
1631 slot
= arch_get_ram_range(slot
, &base
, &size
);
1635 printk(KERN_ERR
"IOMMU: mapping reserved region failed\n");
1640 #ifdef CONFIG_DMAR_FLOPPY_WA
1641 static inline void iommu_prepare_isa(void)
1643 struct pci_dev
*pdev
;
1646 pdev
= pci_get_class(PCI_CLASS_BRIDGE_ISA
<< 8, NULL
);
1650 printk(KERN_INFO
"IOMMU: Prepare 0-16M unity mapping for LPC\n");
1651 ret
= iommu_prepare_identity_map(pdev
, 0, 16*1024*1024);
1654 printk("IOMMU: Failed to create 0-64M identity map, "
1655 "floppy might not work\n");
1659 static inline void iommu_prepare_isa(void)
1663 #endif /* !CONFIG_DMAR_FLPY_WA */
1665 int __init
init_dmars(void)
1667 struct dmar_drhd_unit
*drhd
;
1668 struct dmar_rmrr_unit
*rmrr
;
1669 struct pci_dev
*pdev
;
1670 struct intel_iommu
*iommu
;
1676 * initialize and program root entry to not present
1679 for_each_drhd_unit(drhd
) {
1682 iommu
= alloc_iommu(drhd
);
1690 * we could share the same root & context tables
1691 * amoung all IOMMU's. Need to Split it later.
1693 ret
= iommu_alloc_root_entry(iommu
);
1695 printk(KERN_ERR
"IOMMU: allocate root entry failed\n");
1702 * for each dev attached to rmrr
1704 * locate drhd for dev, alloc domain for dev
1705 * allocate free domain
1706 * allocate page table entries for rmrr
1707 * if context not allocated for bus
1708 * allocate and init context
1709 * set present in root table for this bus
1710 * init context with domain, translation etc
1714 for_each_rmrr_units(rmrr
) {
1716 for (i
= 0; i
< rmrr
->devices_cnt
; i
++) {
1717 pdev
= rmrr
->devices
[i
];
1718 /* some BIOS lists non-exist devices in DMAR table */
1721 ret
= iommu_prepare_rmrr_dev(rmrr
, pdev
);
1724 "IOMMU: mapping reserved region failed\n");
1728 iommu_prepare_gfx_mapping();
1730 iommu_prepare_isa();
1735 * global invalidate context cache
1736 * global invalidate iotlb
1737 * enable translation
1739 for_each_drhd_unit(drhd
) {
1742 iommu
= drhd
->iommu
;
1743 sprintf (iommu
->name
, "dmar%d", unit
++);
1745 iommu_flush_write_buffer(iommu
);
1747 ret
= dmar_set_interrupt(iommu
);
1751 iommu_set_root_entry(iommu
);
1753 iommu_flush_context_global(iommu
, 0);
1754 iommu_flush_iotlb_global(iommu
, 0);
1756 iommu_disable_protect_mem_regions(iommu
);
1758 ret
= iommu_enable_translation(iommu
);
1765 for_each_drhd_unit(drhd
) {
1768 iommu
= drhd
->iommu
;
1774 static inline u64
aligned_size(u64 host_addr
, size_t size
)
1777 addr
= (host_addr
& (~PAGE_MASK_4K
)) + size
;
1778 return PAGE_ALIGN_4K(addr
);
1782 iommu_alloc_iova(struct dmar_domain
*domain
, size_t size
, u64 end
)
1786 /* Make sure it's in range */
1787 end
= min_t(u64
, DOMAIN_MAX_ADDR(domain
->gaw
), end
);
1788 if (!size
|| (IOVA_START_ADDR
+ size
> end
))
1791 piova
= alloc_iova(&domain
->iovad
,
1792 size
>> PAGE_SHIFT_4K
, IOVA_PFN(end
), 1);
1796 static struct iova
*
1797 __intel_alloc_iova(struct device
*dev
, struct dmar_domain
*domain
,
1800 struct pci_dev
*pdev
= to_pci_dev(dev
);
1801 struct iova
*iova
= NULL
;
1803 if ((pdev
->dma_mask
<= DMA_32BIT_MASK
) || (dmar_forcedac
)) {
1804 iova
= iommu_alloc_iova(domain
, size
, pdev
->dma_mask
);
1807 * First try to allocate an io virtual address in
1808 * DMA_32BIT_MASK and if that fails then try allocating
1811 iova
= iommu_alloc_iova(domain
, size
, DMA_32BIT_MASK
);
1813 iova
= iommu_alloc_iova(domain
, size
, pdev
->dma_mask
);
1817 printk(KERN_ERR
"Allocating iova for %s failed", pci_name(pdev
));
1824 static struct dmar_domain
*
1825 get_valid_domain_for_dev(struct pci_dev
*pdev
)
1827 struct dmar_domain
*domain
;
1830 domain
= get_domain_for_dev(pdev
,
1831 DEFAULT_DOMAIN_ADDRESS_WIDTH
);
1834 "Allocating domain for %s failed", pci_name(pdev
));
1838 /* make sure context mapping is ok */
1839 if (unlikely(!domain_context_mapped(domain
, pdev
))) {
1840 ret
= domain_context_mapping(domain
, pdev
);
1843 "Domain context map for %s failed",
1852 static dma_addr_t
intel_map_single(struct device
*hwdev
, void *addr
,
1853 size_t size
, int dir
)
1855 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1857 struct dmar_domain
*domain
;
1858 unsigned long start_addr
;
1862 BUG_ON(dir
== DMA_NONE
);
1863 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1864 return virt_to_bus(addr
);
1866 domain
= get_valid_domain_for_dev(pdev
);
1870 addr
= (void *)virt_to_phys(addr
);
1871 size
= aligned_size((u64
)addr
, size
);
1873 iova
= __intel_alloc_iova(hwdev
, domain
, size
);
1877 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
1880 * Check if DMAR supports zero-length reads on write only
1883 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
1884 !cap_zlr(domain
->iommu
->cap
))
1885 prot
|= DMA_PTE_READ
;
1886 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
1887 prot
|= DMA_PTE_WRITE
;
1889 * addr - (addr + size) might be partial page, we should map the whole
1890 * page. Note: if two part of one page are separately mapped, we
1891 * might have two guest_addr mapping to the same host addr, but this
1892 * is not a big problem
1894 ret
= domain_page_mapping(domain
, start_addr
,
1895 ((u64
)addr
) & PAGE_MASK_4K
, size
, prot
);
1899 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1900 pci_name(pdev
), size
, (u64
)addr
,
1901 size
, (u64
)start_addr
, dir
);
1903 /* it's a non-present to present mapping */
1904 ret
= iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
1905 start_addr
, size
>> PAGE_SHIFT_4K
, 1);
1907 iommu_flush_write_buffer(domain
->iommu
);
1909 return (start_addr
+ ((u64
)addr
& (~PAGE_MASK_4K
)));
1913 __free_iova(&domain
->iovad
, iova
);
1914 printk(KERN_ERR
"Device %s request: %lx@%llx dir %d --- failed\n",
1915 pci_name(pdev
), size
, (u64
)addr
, dir
);
1919 static void intel_unmap_single(struct device
*dev
, dma_addr_t dev_addr
,
1920 size_t size
, int dir
)
1922 struct pci_dev
*pdev
= to_pci_dev(dev
);
1923 struct dmar_domain
*domain
;
1924 unsigned long start_addr
;
1927 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
1929 domain
= find_domain(pdev
);
1932 iova
= find_iova(&domain
->iovad
, IOVA_PFN(dev_addr
));
1936 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
1937 size
= aligned_size((u64
)dev_addr
, size
);
1939 pr_debug("Device %s unmapping: %lx@%llx\n",
1940 pci_name(pdev
), size
, (u64
)start_addr
);
1942 /* clear the whole page */
1943 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
1944 /* free page tables */
1945 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
1947 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
1948 size
>> PAGE_SHIFT_4K
, 0))
1949 iommu_flush_write_buffer(domain
->iommu
);
1952 __free_iova(&domain
->iovad
, iova
);
1955 static void * intel_alloc_coherent(struct device
*hwdev
, size_t size
,
1956 dma_addr_t
*dma_handle
, gfp_t flags
)
1961 size
= PAGE_ALIGN_4K(size
);
1962 order
= get_order(size
);
1963 flags
&= ~(GFP_DMA
| GFP_DMA32
);
1965 vaddr
= (void *)__get_free_pages(flags
, order
);
1968 memset(vaddr
, 0, size
);
1970 *dma_handle
= intel_map_single(hwdev
, vaddr
, size
, DMA_BIDIRECTIONAL
);
1973 free_pages((unsigned long)vaddr
, order
);
1977 static void intel_free_coherent(struct device
*hwdev
, size_t size
,
1978 void *vaddr
, dma_addr_t dma_handle
)
1982 size
= PAGE_ALIGN_4K(size
);
1983 order
= get_order(size
);
1985 intel_unmap_single(hwdev
, dma_handle
, size
, DMA_BIDIRECTIONAL
);
1986 free_pages((unsigned long)vaddr
, order
);
1989 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1990 static void intel_unmap_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
1991 int nelems
, int dir
)
1994 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
1995 struct dmar_domain
*domain
;
1996 unsigned long start_addr
;
2000 struct scatterlist
*sg
;
2002 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2005 domain
= find_domain(pdev
);
2007 iova
= find_iova(&domain
->iovad
, IOVA_PFN(sglist
[0].dma_address
));
2010 for_each_sg(sglist
, sg
, nelems
, i
) {
2011 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2012 size
+= aligned_size((u64
)addr
, sg
->length
);
2015 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
2017 /* clear the whole page */
2018 dma_pte_clear_range(domain
, start_addr
, start_addr
+ size
);
2019 /* free page tables */
2020 dma_pte_free_pagetable(domain
, start_addr
, start_addr
+ size
);
2022 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
, start_addr
,
2023 size
>> PAGE_SHIFT_4K
, 0))
2024 iommu_flush_write_buffer(domain
->iommu
);
2027 __free_iova(&domain
->iovad
, iova
);
2030 static int intel_nontranslate_map_sg(struct device
*hddev
,
2031 struct scatterlist
*sglist
, int nelems
, int dir
)
2034 struct scatterlist
*sg
;
2036 for_each_sg(sglist
, sg
, nelems
, i
) {
2037 BUG_ON(!sg_page(sg
));
2038 sg
->dma_address
= virt_to_bus(SG_ENT_VIRT_ADDRESS(sg
));
2039 sg
->dma_length
= sg
->length
;
2044 static int intel_map_sg(struct device
*hwdev
, struct scatterlist
*sglist
,
2045 int nelems
, int dir
)
2049 struct pci_dev
*pdev
= to_pci_dev(hwdev
);
2050 struct dmar_domain
*domain
;
2054 struct iova
*iova
= NULL
;
2056 struct scatterlist
*sg
;
2057 unsigned long start_addr
;
2059 BUG_ON(dir
== DMA_NONE
);
2060 if (pdev
->dev
.archdata
.iommu
== DUMMY_DEVICE_DOMAIN_INFO
)
2061 return intel_nontranslate_map_sg(hwdev
, sglist
, nelems
, dir
);
2063 domain
= get_valid_domain_for_dev(pdev
);
2067 for_each_sg(sglist
, sg
, nelems
, i
) {
2068 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2069 addr
= (void *)virt_to_phys(addr
);
2070 size
+= aligned_size((u64
)addr
, sg
->length
);
2073 iova
= __intel_alloc_iova(hwdev
, domain
, size
);
2075 sglist
->dma_length
= 0;
2080 * Check if DMAR supports zero-length reads on write only
2083 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
|| \
2084 !cap_zlr(domain
->iommu
->cap
))
2085 prot
|= DMA_PTE_READ
;
2086 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
2087 prot
|= DMA_PTE_WRITE
;
2089 start_addr
= iova
->pfn_lo
<< PAGE_SHIFT_4K
;
2091 for_each_sg(sglist
, sg
, nelems
, i
) {
2092 addr
= SG_ENT_VIRT_ADDRESS(sg
);
2093 addr
= (void *)virt_to_phys(addr
);
2094 size
= aligned_size((u64
)addr
, sg
->length
);
2095 ret
= domain_page_mapping(domain
, start_addr
+ offset
,
2096 ((u64
)addr
) & PAGE_MASK_4K
,
2099 /* clear the page */
2100 dma_pte_clear_range(domain
, start_addr
,
2101 start_addr
+ offset
);
2102 /* free page tables */
2103 dma_pte_free_pagetable(domain
, start_addr
,
2104 start_addr
+ offset
);
2106 __free_iova(&domain
->iovad
, iova
);
2109 sg
->dma_address
= start_addr
+ offset
+
2110 ((u64
)addr
& (~PAGE_MASK_4K
));
2111 sg
->dma_length
= sg
->length
;
2115 /* it's a non-present to present mapping */
2116 if (iommu_flush_iotlb_psi(domain
->iommu
, domain
->id
,
2117 start_addr
, offset
>> PAGE_SHIFT_4K
, 1))
2118 iommu_flush_write_buffer(domain
->iommu
);
2122 static struct dma_mapping_ops intel_dma_ops
= {
2123 .alloc_coherent
= intel_alloc_coherent
,
2124 .free_coherent
= intel_free_coherent
,
2125 .map_single
= intel_map_single
,
2126 .unmap_single
= intel_unmap_single
,
2127 .map_sg
= intel_map_sg
,
2128 .unmap_sg
= intel_unmap_sg
,
2131 static inline int iommu_domain_cache_init(void)
2135 iommu_domain_cache
= kmem_cache_create("iommu_domain",
2136 sizeof(struct dmar_domain
),
2141 if (!iommu_domain_cache
) {
2142 printk(KERN_ERR
"Couldn't create iommu_domain cache\n");
2149 static inline int iommu_devinfo_cache_init(void)
2153 iommu_devinfo_cache
= kmem_cache_create("iommu_devinfo",
2154 sizeof(struct device_domain_info
),
2159 if (!iommu_devinfo_cache
) {
2160 printk(KERN_ERR
"Couldn't create devinfo cache\n");
2167 static inline int iommu_iova_cache_init(void)
2171 iommu_iova_cache
= kmem_cache_create("iommu_iova",
2172 sizeof(struct iova
),
2177 if (!iommu_iova_cache
) {
2178 printk(KERN_ERR
"Couldn't create iova cache\n");
2185 static int __init
iommu_init_mempool(void)
2188 ret
= iommu_iova_cache_init();
2192 ret
= iommu_domain_cache_init();
2196 ret
= iommu_devinfo_cache_init();
2200 kmem_cache_destroy(iommu_domain_cache
);
2202 kmem_cache_destroy(iommu_iova_cache
);
2207 static void __init
iommu_exit_mempool(void)
2209 kmem_cache_destroy(iommu_devinfo_cache
);
2210 kmem_cache_destroy(iommu_domain_cache
);
2211 kmem_cache_destroy(iommu_iova_cache
);
2215 void __init
detect_intel_iommu(void)
2217 if (swiotlb
|| no_iommu
|| iommu_detected
|| dmar_disabled
)
2219 if (early_dmar_detect()) {
2224 static void __init
init_no_remapping_devices(void)
2226 struct dmar_drhd_unit
*drhd
;
2228 for_each_drhd_unit(drhd
) {
2229 if (!drhd
->include_all
) {
2231 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2232 if (drhd
->devices
[i
] != NULL
)
2234 /* ignore DMAR unit if no pci devices exist */
2235 if (i
== drhd
->devices_cnt
)
2243 for_each_drhd_unit(drhd
) {
2245 if (drhd
->ignored
|| drhd
->include_all
)
2248 for (i
= 0; i
< drhd
->devices_cnt
; i
++)
2249 if (drhd
->devices
[i
] &&
2250 !IS_GFX_DEVICE(drhd
->devices
[i
]))
2253 if (i
< drhd
->devices_cnt
)
2256 /* bypass IOMMU if it is just for gfx devices */
2258 for (i
= 0; i
< drhd
->devices_cnt
; i
++) {
2259 if (!drhd
->devices
[i
])
2261 drhd
->devices
[i
]->dev
.archdata
.iommu
= DUMMY_DEVICE_DOMAIN_INFO
;
2266 int __init
intel_iommu_init(void)
2270 if (no_iommu
|| swiotlb
|| dmar_disabled
)
2273 if (dmar_table_init())
2276 iommu_init_mempool();
2277 dmar_init_reserved_ranges();
2279 init_no_remapping_devices();
2283 printk(KERN_ERR
"IOMMU: dmar init failed\n");
2284 put_iova_domain(&reserved_iova_list
);
2285 iommu_exit_mempool();
2289 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2292 dma_ops
= &intel_dma_ops
;