Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[wrt350n-kernel.git] / drivers / pci / intel-iommu.c
blob498ab0d3f825e6b67cad2b6326595f81cb46a504
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 <<<<<<< HEAD:drivers/pci/intel-iommu.c
18 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
19 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
20 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 =======
22 * Copyright (C) 2006-2008 Intel Corporation
23 * Author: Ashok Raj <ashok.raj@intel.com>
24 * Author: Shaohua Li <shaohua.li@intel.com>
25 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
26 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:drivers/pci/intel-iommu.c
29 #include <linux/init.h>
30 #include <linux/bitmap.h>
31 #include <linux/slab.h>
32 #include <linux/irq.h>
33 #include <linux/interrupt.h>
34 #include <linux/sysdev.h>
35 #include <linux/spinlock.h>
36 #include <linux/pci.h>
37 #include <linux/dmar.h>
38 #include <linux/dma-mapping.h>
39 #include <linux/mempool.h>
40 #include "iova.h"
41 #include "intel-iommu.h"
42 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
43 #include <asm/cacheflush.h>
44 #include <asm/gart.h>
45 #include "pci.h"
47 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
48 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
50 #define IOAPIC_RANGE_START (0xfee00000)
51 #define IOAPIC_RANGE_END (0xfeefffff)
52 #define IOVA_START_ADDR (0x1000)
54 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
56 #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
58 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
60 static void domain_remove_dev_info(struct dmar_domain *domain);
62 static int dmar_disabled;
63 static int __initdata dmar_map_gfx = 1;
64 static int dmar_forcedac;
66 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
67 static DEFINE_SPINLOCK(device_domain_lock);
68 static LIST_HEAD(device_domain_list);
70 static int __init intel_iommu_setup(char *str)
72 if (!str)
73 return -EINVAL;
74 while (*str) {
75 if (!strncmp(str, "off", 3)) {
76 dmar_disabled = 1;
77 printk(KERN_INFO"Intel-IOMMU: disabled\n");
78 } else if (!strncmp(str, "igfx_off", 8)) {
79 dmar_map_gfx = 0;
80 printk(KERN_INFO
81 "Intel-IOMMU: disable GFX device mapping\n");
82 } else if (!strncmp(str, "forcedac", 8)) {
83 printk (KERN_INFO
84 "Intel-IOMMU: Forcing DAC for PCI devices\n");
85 dmar_forcedac = 1;
88 str += strcspn(str, ",");
89 while (*str == ',')
90 str++;
92 return 0;
94 __setup("intel_iommu=", intel_iommu_setup);
96 static struct kmem_cache *iommu_domain_cache;
97 static struct kmem_cache *iommu_devinfo_cache;
98 static struct kmem_cache *iommu_iova_cache;
100 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
102 unsigned int flags;
103 void *vaddr;
105 /* trying to avoid low memory issues */
106 flags = current->flags & PF_MEMALLOC;
107 current->flags |= PF_MEMALLOC;
108 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
109 current->flags &= (~PF_MEMALLOC | flags);
110 return vaddr;
114 static inline void *alloc_pgtable_page(void)
116 unsigned int flags;
117 void *vaddr;
119 /* trying to avoid low memory issues */
120 flags = current->flags & PF_MEMALLOC;
121 current->flags |= PF_MEMALLOC;
122 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
123 current->flags &= (~PF_MEMALLOC | flags);
124 return vaddr;
127 static inline void free_pgtable_page(void *vaddr)
129 free_page((unsigned long)vaddr);
132 static inline void *alloc_domain_mem(void)
134 return iommu_kmem_cache_alloc(iommu_domain_cache);
137 static inline void free_domain_mem(void *vaddr)
139 kmem_cache_free(iommu_domain_cache, vaddr);
142 static inline void * alloc_devinfo_mem(void)
144 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
147 static inline void free_devinfo_mem(void *vaddr)
149 kmem_cache_free(iommu_devinfo_cache, vaddr);
152 struct iova *alloc_iova_mem(void)
154 return iommu_kmem_cache_alloc(iommu_iova_cache);
157 void free_iova_mem(struct iova *iova)
159 kmem_cache_free(iommu_iova_cache, iova);
162 static inline void __iommu_flush_cache(
163 struct intel_iommu *iommu, void *addr, int size)
165 if (!ecap_coherent(iommu->ecap))
166 clflush_cache_range(addr, size);
169 /* Gets context entry for a given bus and devfn */
170 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
171 u8 bus, u8 devfn)
173 struct root_entry *root;
174 struct context_entry *context;
175 unsigned long phy_addr;
176 unsigned long flags;
178 spin_lock_irqsave(&iommu->lock, flags);
179 root = &iommu->root_entry[bus];
180 context = get_context_addr_from_root(root);
181 if (!context) {
182 context = (struct context_entry *)alloc_pgtable_page();
183 if (!context) {
184 spin_unlock_irqrestore(&iommu->lock, flags);
185 return NULL;
187 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
188 phy_addr = virt_to_phys((void *)context);
189 set_root_value(root, phy_addr);
190 set_root_present(root);
191 __iommu_flush_cache(iommu, root, sizeof(*root));
193 spin_unlock_irqrestore(&iommu->lock, flags);
194 return &context[devfn];
197 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
199 struct root_entry *root;
200 struct context_entry *context;
201 int ret;
202 unsigned long flags;
204 spin_lock_irqsave(&iommu->lock, flags);
205 root = &iommu->root_entry[bus];
206 context = get_context_addr_from_root(root);
207 if (!context) {
208 ret = 0;
209 goto out;
211 ret = context_present(context[devfn]);
212 out:
213 spin_unlock_irqrestore(&iommu->lock, flags);
214 return ret;
217 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
219 struct root_entry *root;
220 struct context_entry *context;
221 unsigned long flags;
223 spin_lock_irqsave(&iommu->lock, flags);
224 root = &iommu->root_entry[bus];
225 context = get_context_addr_from_root(root);
226 if (context) {
227 context_clear_entry(context[devfn]);
228 __iommu_flush_cache(iommu, &context[devfn], \
229 sizeof(*context));
231 spin_unlock_irqrestore(&iommu->lock, flags);
234 static void free_context_table(struct intel_iommu *iommu)
236 struct root_entry *root;
237 int i;
238 unsigned long flags;
239 struct context_entry *context;
241 spin_lock_irqsave(&iommu->lock, flags);
242 if (!iommu->root_entry) {
243 goto out;
245 for (i = 0; i < ROOT_ENTRY_NR; i++) {
246 root = &iommu->root_entry[i];
247 context = get_context_addr_from_root(root);
248 if (context)
249 free_pgtable_page(context);
251 free_pgtable_page(iommu->root_entry);
252 iommu->root_entry = NULL;
253 out:
254 spin_unlock_irqrestore(&iommu->lock, flags);
257 /* page table handling */
258 #define LEVEL_STRIDE (9)
259 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
261 static inline int agaw_to_level(int agaw)
263 return agaw + 2;
266 static inline int agaw_to_width(int agaw)
268 return 30 + agaw * LEVEL_STRIDE;
272 static inline int width_to_agaw(int width)
274 return (width - 30) / LEVEL_STRIDE;
277 static inline unsigned int level_to_offset_bits(int level)
279 return (12 + (level - 1) * LEVEL_STRIDE);
282 static inline int address_level_offset(u64 addr, int level)
284 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
287 static inline u64 level_mask(int level)
289 return ((u64)-1 << level_to_offset_bits(level));
292 static inline u64 level_size(int level)
294 return ((u64)1 << level_to_offset_bits(level));
297 static inline u64 align_to_level(u64 addr, int level)
299 return ((addr + level_size(level) - 1) & level_mask(level));
302 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
304 int addr_width = agaw_to_width(domain->agaw);
305 struct dma_pte *parent, *pte = NULL;
306 int level = agaw_to_level(domain->agaw);
307 int offset;
308 unsigned long flags;
310 BUG_ON(!domain->pgd);
312 addr &= (((u64)1) << addr_width) - 1;
313 parent = domain->pgd;
315 spin_lock_irqsave(&domain->mapping_lock, flags);
316 while (level > 0) {
317 void *tmp_page;
319 offset = address_level_offset(addr, level);
320 pte = &parent[offset];
321 if (level == 1)
322 break;
324 if (!dma_pte_present(*pte)) {
325 tmp_page = alloc_pgtable_page();
327 if (!tmp_page) {
328 spin_unlock_irqrestore(&domain->mapping_lock,
329 flags);
330 return NULL;
332 __iommu_flush_cache(domain->iommu, tmp_page,
333 PAGE_SIZE_4K);
334 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
336 * high level table always sets r/w, last level page
337 * table control read/write
339 dma_set_pte_readable(*pte);
340 dma_set_pte_writable(*pte);
341 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
343 parent = phys_to_virt(dma_pte_addr(*pte));
344 level--;
347 spin_unlock_irqrestore(&domain->mapping_lock, flags);
348 return pte;
351 /* return address's pte at specific level */
352 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
353 int level)
355 struct dma_pte *parent, *pte = NULL;
356 int total = agaw_to_level(domain->agaw);
357 int offset;
359 parent = domain->pgd;
360 while (level <= total) {
361 offset = address_level_offset(addr, total);
362 pte = &parent[offset];
363 if (level == total)
364 return pte;
366 if (!dma_pte_present(*pte))
367 break;
368 parent = phys_to_virt(dma_pte_addr(*pte));
369 total--;
371 return NULL;
374 /* clear one page's page table */
375 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
377 struct dma_pte *pte = NULL;
379 /* get last level pte */
380 pte = dma_addr_level_pte(domain, addr, 1);
382 if (pte) {
383 dma_clear_pte(*pte);
384 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
388 /* clear last level pte, a tlb flush should be followed */
389 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
391 int addr_width = agaw_to_width(domain->agaw);
393 start &= (((u64)1) << addr_width) - 1;
394 end &= (((u64)1) << addr_width) - 1;
395 /* in case it's partial page */
396 start = PAGE_ALIGN_4K(start);
397 end &= PAGE_MASK_4K;
399 /* we don't need lock here, nobody else touches the iova range */
400 while (start < end) {
401 dma_pte_clear_one(domain, start);
402 start += PAGE_SIZE_4K;
406 /* free page table pages. last level pte should already be cleared */
407 static void dma_pte_free_pagetable(struct dmar_domain *domain,
408 u64 start, u64 end)
410 int addr_width = agaw_to_width(domain->agaw);
411 struct dma_pte *pte;
412 int total = agaw_to_level(domain->agaw);
413 int level;
414 u64 tmp;
416 start &= (((u64)1) << addr_width) - 1;
417 end &= (((u64)1) << addr_width) - 1;
419 /* we don't need lock here, nobody else touches the iova range */
420 level = 2;
421 while (level <= total) {
422 tmp = align_to_level(start, level);
423 if (tmp >= end || (tmp + level_size(level) > end))
424 return;
426 while (tmp < end) {
427 pte = dma_addr_level_pte(domain, tmp, level);
428 if (pte) {
429 free_pgtable_page(
430 phys_to_virt(dma_pte_addr(*pte)));
431 dma_clear_pte(*pte);
432 __iommu_flush_cache(domain->iommu,
433 pte, sizeof(*pte));
435 tmp += level_size(level);
437 level++;
439 /* free pgd */
440 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
441 free_pgtable_page(domain->pgd);
442 domain->pgd = NULL;
446 /* iommu handling */
447 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
449 struct root_entry *root;
450 unsigned long flags;
452 root = (struct root_entry *)alloc_pgtable_page();
453 if (!root)
454 return -ENOMEM;
456 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
458 spin_lock_irqsave(&iommu->lock, flags);
459 iommu->root_entry = root;
460 spin_unlock_irqrestore(&iommu->lock, flags);
462 return 0;
465 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
467 unsigned long start_time = jiffies;\
468 while (1) {\
469 sts = op (iommu->reg + offset);\
470 if (cond)\
471 break;\
472 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
473 panic("DMAR hardware is malfunctioning\n");\
474 cpu_relax();\
478 static void iommu_set_root_entry(struct intel_iommu *iommu)
480 void *addr;
481 u32 cmd, sts;
482 unsigned long flag;
484 addr = iommu->root_entry;
486 spin_lock_irqsave(&iommu->register_lock, flag);
487 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
489 cmd = iommu->gcmd | DMA_GCMD_SRTP;
490 writel(cmd, iommu->reg + DMAR_GCMD_REG);
492 /* Make sure hardware complete it */
493 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
494 readl, (sts & DMA_GSTS_RTPS), sts);
496 spin_unlock_irqrestore(&iommu->register_lock, flag);
499 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
501 u32 val;
502 unsigned long flag;
504 if (!cap_rwbf(iommu->cap))
505 return;
506 val = iommu->gcmd | DMA_GCMD_WBF;
508 spin_lock_irqsave(&iommu->register_lock, flag);
509 writel(val, iommu->reg + DMAR_GCMD_REG);
511 /* Make sure hardware complete it */
512 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
513 readl, (!(val & DMA_GSTS_WBFS)), val);
515 spin_unlock_irqrestore(&iommu->register_lock, flag);
518 /* return value determine if we need a write buffer flush */
519 static int __iommu_flush_context(struct intel_iommu *iommu,
520 u16 did, u16 source_id, u8 function_mask, u64 type,
521 int non_present_entry_flush)
523 u64 val = 0;
524 unsigned long flag;
527 * In the non-present entry flush case, if hardware doesn't cache
528 * non-present entry we do nothing and if hardware cache non-present
529 * entry, we flush entries of domain 0 (the domain id is used to cache
530 * any non-present entries)
532 if (non_present_entry_flush) {
533 if (!cap_caching_mode(iommu->cap))
534 return 1;
535 else
536 did = 0;
539 switch (type) {
540 case DMA_CCMD_GLOBAL_INVL:
541 val = DMA_CCMD_GLOBAL_INVL;
542 break;
543 case DMA_CCMD_DOMAIN_INVL:
544 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
545 break;
546 case DMA_CCMD_DEVICE_INVL:
547 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
548 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
549 break;
550 default:
551 BUG();
553 val |= DMA_CCMD_ICC;
555 spin_lock_irqsave(&iommu->register_lock, flag);
556 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
558 /* Make sure hardware complete it */
559 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
560 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
562 spin_unlock_irqrestore(&iommu->register_lock, flag);
564 /* flush context entry will implictly flush write buffer */
565 return 0;
568 static int inline iommu_flush_context_global(struct intel_iommu *iommu,
569 int non_present_entry_flush)
571 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
572 non_present_entry_flush);
575 static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
576 int non_present_entry_flush)
578 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
579 non_present_entry_flush);
582 static int inline iommu_flush_context_device(struct intel_iommu *iommu,
583 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
585 return __iommu_flush_context(iommu, did, source_id, function_mask,
586 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
589 /* return value determine if we need a write buffer flush */
590 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
591 u64 addr, unsigned int size_order, u64 type,
592 int non_present_entry_flush)
594 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
595 u64 val = 0, val_iva = 0;
596 unsigned long flag;
599 * In the non-present entry flush case, if hardware doesn't cache
600 * non-present entry we do nothing and if hardware cache non-present
601 * entry, we flush entries of domain 0 (the domain id is used to cache
602 * any non-present entries)
604 if (non_present_entry_flush) {
605 if (!cap_caching_mode(iommu->cap))
606 return 1;
607 else
608 did = 0;
611 switch (type) {
612 case DMA_TLB_GLOBAL_FLUSH:
613 /* global flush doesn't need set IVA_REG */
614 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
615 break;
616 case DMA_TLB_DSI_FLUSH:
617 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
618 break;
619 case DMA_TLB_PSI_FLUSH:
620 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
621 /* Note: always flush non-leaf currently */
622 val_iva = size_order | addr;
623 break;
624 default:
625 BUG();
627 /* Note: set drain read/write */
628 #if 0
630 * This is probably to be super secure.. Looks like we can
631 * ignore it without any impact.
633 if (cap_read_drain(iommu->cap))
634 val |= DMA_TLB_READ_DRAIN;
635 #endif
636 if (cap_write_drain(iommu->cap))
637 val |= DMA_TLB_WRITE_DRAIN;
639 spin_lock_irqsave(&iommu->register_lock, flag);
640 /* Note: Only uses first TLB reg currently */
641 if (val_iva)
642 dmar_writeq(iommu->reg + tlb_offset, val_iva);
643 dmar_writeq(iommu->reg + tlb_offset + 8, val);
645 /* Make sure hardware complete it */
646 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
647 dmar_readq, (!(val & DMA_TLB_IVT)), val);
649 spin_unlock_irqrestore(&iommu->register_lock, flag);
651 /* check IOTLB invalidation granularity */
652 if (DMA_TLB_IAIG(val) == 0)
653 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
654 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
655 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
656 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
657 /* flush context entry will implictly flush write buffer */
658 return 0;
661 static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
662 int non_present_entry_flush)
664 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
665 non_present_entry_flush);
668 static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
669 int non_present_entry_flush)
671 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
672 non_present_entry_flush);
675 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
676 u64 addr, unsigned int pages, int non_present_entry_flush)
678 unsigned int mask;
680 BUG_ON(addr & (~PAGE_MASK_4K));
681 BUG_ON(pages == 0);
683 /* Fallback to domain selective flush if no PSI support */
684 if (!cap_pgsel_inv(iommu->cap))
685 return iommu_flush_iotlb_dsi(iommu, did,
686 non_present_entry_flush);
689 * PSI requires page size to be 2 ^ x, and the base address is naturally
690 * aligned to the size
692 mask = ilog2(__roundup_pow_of_two(pages));
693 /* Fallback to domain selective flush if size is too big */
694 if (mask > cap_max_amask_val(iommu->cap))
695 return iommu_flush_iotlb_dsi(iommu, did,
696 non_present_entry_flush);
698 return __iommu_flush_iotlb(iommu, did, addr, mask,
699 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
702 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
704 u32 pmen;
705 unsigned long flags;
707 spin_lock_irqsave(&iommu->register_lock, flags);
708 pmen = readl(iommu->reg + DMAR_PMEN_REG);
709 pmen &= ~DMA_PMEN_EPM;
710 writel(pmen, iommu->reg + DMAR_PMEN_REG);
712 /* wait for the protected region status bit to clear */
713 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
714 readl, !(pmen & DMA_PMEN_PRS), pmen);
716 spin_unlock_irqrestore(&iommu->register_lock, flags);
719 static int iommu_enable_translation(struct intel_iommu *iommu)
721 u32 sts;
722 unsigned long flags;
724 spin_lock_irqsave(&iommu->register_lock, flags);
725 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
727 /* Make sure hardware complete it */
728 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
729 readl, (sts & DMA_GSTS_TES), sts);
731 iommu->gcmd |= DMA_GCMD_TE;
732 spin_unlock_irqrestore(&iommu->register_lock, flags);
733 return 0;
736 static int iommu_disable_translation(struct intel_iommu *iommu)
738 u32 sts;
739 unsigned long flag;
741 spin_lock_irqsave(&iommu->register_lock, flag);
742 iommu->gcmd &= ~DMA_GCMD_TE;
743 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
745 /* Make sure hardware complete it */
746 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
747 readl, (!(sts & DMA_GSTS_TES)), sts);
749 spin_unlock_irqrestore(&iommu->register_lock, flag);
750 return 0;
753 /* iommu interrupt handling. Most stuff are MSI-like. */
755 static const char *fault_reason_strings[] =
757 "Software",
758 "Present bit in root entry is clear",
759 "Present bit in context entry is clear",
760 "Invalid context entry",
761 "Access beyond MGAW",
762 "PTE Write access is not set",
763 "PTE Read access is not set",
764 "Next page table ptr is invalid",
765 "Root table address invalid",
766 "Context table ptr is invalid",
767 "non-zero reserved fields in RTP",
768 "non-zero reserved fields in CTP",
769 "non-zero reserved fields in PTE",
771 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
773 const char *dmar_get_fault_reason(u8 fault_reason)
775 if (fault_reason > MAX_FAULT_REASON_IDX)
776 return "Unknown";
777 else
778 return fault_reason_strings[fault_reason];
781 void dmar_msi_unmask(unsigned int irq)
783 struct intel_iommu *iommu = get_irq_data(irq);
784 unsigned long flag;
786 /* unmask it */
787 spin_lock_irqsave(&iommu->register_lock, flag);
788 writel(0, iommu->reg + DMAR_FECTL_REG);
789 /* Read a reg to force flush the post write */
790 readl(iommu->reg + DMAR_FECTL_REG);
791 spin_unlock_irqrestore(&iommu->register_lock, flag);
794 void dmar_msi_mask(unsigned int irq)
796 unsigned long flag;
797 struct intel_iommu *iommu = get_irq_data(irq);
799 /* mask it */
800 spin_lock_irqsave(&iommu->register_lock, flag);
801 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
802 /* Read a reg to force flush the post write */
803 readl(iommu->reg + DMAR_FECTL_REG);
804 spin_unlock_irqrestore(&iommu->register_lock, flag);
807 void dmar_msi_write(int irq, struct msi_msg *msg)
809 struct intel_iommu *iommu = get_irq_data(irq);
810 unsigned long flag;
812 spin_lock_irqsave(&iommu->register_lock, flag);
813 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
814 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
815 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
816 spin_unlock_irqrestore(&iommu->register_lock, flag);
819 void dmar_msi_read(int irq, struct msi_msg *msg)
821 struct intel_iommu *iommu = get_irq_data(irq);
822 unsigned long flag;
824 spin_lock_irqsave(&iommu->register_lock, flag);
825 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
826 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
827 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
828 spin_unlock_irqrestore(&iommu->register_lock, flag);
831 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
832 u8 fault_reason, u16 source_id, u64 addr)
834 const char *reason;
836 reason = dmar_get_fault_reason(fault_reason);
838 printk(KERN_ERR
839 "DMAR:[%s] Request device [%02x:%02x.%d] "
840 "fault addr %llx \n"
841 "DMAR:[fault reason %02d] %s\n",
842 (type ? "DMA Read" : "DMA Write"),
843 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
844 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
845 return 0;
848 #define PRIMARY_FAULT_REG_LEN (16)
849 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
851 struct intel_iommu *iommu = dev_id;
852 int reg, fault_index;
853 u32 fault_status;
854 unsigned long flag;
856 spin_lock_irqsave(&iommu->register_lock, flag);
857 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
859 /* TBD: ignore advanced fault log currently */
860 if (!(fault_status & DMA_FSTS_PPF))
861 goto clear_overflow;
863 fault_index = dma_fsts_fault_record_index(fault_status);
864 reg = cap_fault_reg_offset(iommu->cap);
865 while (1) {
866 u8 fault_reason;
867 u16 source_id;
868 u64 guest_addr;
869 int type;
870 u32 data;
872 /* highest 32 bits */
873 data = readl(iommu->reg + reg +
874 fault_index * PRIMARY_FAULT_REG_LEN + 12);
875 if (!(data & DMA_FRCD_F))
876 break;
878 fault_reason = dma_frcd_fault_reason(data);
879 type = dma_frcd_type(data);
881 data = readl(iommu->reg + reg +
882 fault_index * PRIMARY_FAULT_REG_LEN + 8);
883 source_id = dma_frcd_source_id(data);
885 guest_addr = dmar_readq(iommu->reg + reg +
886 fault_index * PRIMARY_FAULT_REG_LEN);
887 guest_addr = dma_frcd_page_addr(guest_addr);
888 /* clear the fault */
889 writel(DMA_FRCD_F, iommu->reg + reg +
890 fault_index * PRIMARY_FAULT_REG_LEN + 12);
892 spin_unlock_irqrestore(&iommu->register_lock, flag);
894 iommu_page_fault_do_one(iommu, type, fault_reason,
895 source_id, guest_addr);
897 fault_index++;
898 if (fault_index > cap_num_fault_regs(iommu->cap))
899 fault_index = 0;
900 spin_lock_irqsave(&iommu->register_lock, flag);
902 clear_overflow:
903 /* clear primary fault overflow */
904 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
905 if (fault_status & DMA_FSTS_PFO)
906 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
908 spin_unlock_irqrestore(&iommu->register_lock, flag);
909 return IRQ_HANDLED;
912 int dmar_set_interrupt(struct intel_iommu *iommu)
914 int irq, ret;
916 irq = create_irq();
917 if (!irq) {
918 printk(KERN_ERR "IOMMU: no free vectors\n");
919 return -EINVAL;
922 set_irq_data(irq, iommu);
923 iommu->irq = irq;
925 ret = arch_setup_dmar_msi(irq);
926 if (ret) {
927 set_irq_data(irq, NULL);
928 iommu->irq = 0;
929 destroy_irq(irq);
930 return 0;
933 /* Force fault register is cleared */
934 iommu_page_fault(irq, iommu);
936 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
937 if (ret)
938 printk(KERN_ERR "IOMMU: can't request irq\n");
939 return ret;
942 static int iommu_init_domains(struct intel_iommu *iommu)
944 unsigned long ndomains;
945 unsigned long nlongs;
947 ndomains = cap_ndoms(iommu->cap);
948 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
949 nlongs = BITS_TO_LONGS(ndomains);
951 /* TBD: there might be 64K domains,
952 * consider other allocation for future chip
954 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
955 if (!iommu->domain_ids) {
956 printk(KERN_ERR "Allocating domain id array failed\n");
957 return -ENOMEM;
959 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
960 GFP_KERNEL);
961 if (!iommu->domains) {
962 printk(KERN_ERR "Allocating domain array failed\n");
963 kfree(iommu->domain_ids);
964 return -ENOMEM;
968 * if Caching mode is set, then invalid translations are tagged
969 * with domainid 0. Hence we need to pre-allocate it.
971 if (cap_caching_mode(iommu->cap))
972 set_bit(0, iommu->domain_ids);
973 return 0;
976 static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
978 struct intel_iommu *iommu;
979 int ret;
980 int map_size;
981 u32 ver;
983 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
984 if (!iommu)
985 return NULL;
986 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
987 if (!iommu->reg) {
988 printk(KERN_ERR "IOMMU: can't map the region\n");
989 goto error;
991 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
992 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
994 /* the registers might be more than one page */
995 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
996 cap_max_fault_reg_offset(iommu->cap));
997 map_size = PAGE_ALIGN_4K(map_size);
998 if (map_size > PAGE_SIZE_4K) {
999 iounmap(iommu->reg);
1000 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
1001 if (!iommu->reg) {
1002 printk(KERN_ERR "IOMMU: can't map the region\n");
1003 goto error;
1007 ver = readl(iommu->reg + DMAR_VER_REG);
1008 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1009 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1010 iommu->cap, iommu->ecap);
1011 ret = iommu_init_domains(iommu);
1012 if (ret)
1013 goto error_unmap;
1014 spin_lock_init(&iommu->lock);
1015 spin_lock_init(&iommu->register_lock);
1017 drhd->iommu = iommu;
1018 return iommu;
1019 error_unmap:
1020 iounmap(iommu->reg);
1021 error:
1022 kfree(iommu);
1023 return NULL;
1026 static void domain_exit(struct dmar_domain *domain);
1027 static void free_iommu(struct intel_iommu *iommu)
1029 struct dmar_domain *domain;
1030 int i;
1032 if (!iommu)
1033 return;
1035 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1036 for (; i < cap_ndoms(iommu->cap); ) {
1037 domain = iommu->domains[i];
1038 clear_bit(i, iommu->domain_ids);
1039 domain_exit(domain);
1040 i = find_next_bit(iommu->domain_ids,
1041 cap_ndoms(iommu->cap), i+1);
1044 if (iommu->gcmd & DMA_GCMD_TE)
1045 iommu_disable_translation(iommu);
1047 if (iommu->irq) {
1048 set_irq_data(iommu->irq, NULL);
1049 /* This will mask the irq */
1050 free_irq(iommu->irq, iommu);
1051 destroy_irq(iommu->irq);
1054 kfree(iommu->domains);
1055 kfree(iommu->domain_ids);
1057 /* free context mapping */
1058 free_context_table(iommu);
1060 if (iommu->reg)
1061 iounmap(iommu->reg);
1062 kfree(iommu);
1065 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1067 unsigned long num;
1068 unsigned long ndomains;
1069 struct dmar_domain *domain;
1070 unsigned long flags;
1072 domain = alloc_domain_mem();
1073 if (!domain)
1074 return NULL;
1076 ndomains = cap_ndoms(iommu->cap);
1078 spin_lock_irqsave(&iommu->lock, flags);
1079 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1080 if (num >= ndomains) {
1081 spin_unlock_irqrestore(&iommu->lock, flags);
1082 free_domain_mem(domain);
1083 printk(KERN_ERR "IOMMU: no free domain ids\n");
1084 return NULL;
1087 set_bit(num, iommu->domain_ids);
1088 domain->id = num;
1089 domain->iommu = iommu;
1090 iommu->domains[num] = domain;
1091 spin_unlock_irqrestore(&iommu->lock, flags);
1093 return domain;
1096 static void iommu_free_domain(struct dmar_domain *domain)
1098 unsigned long flags;
1100 spin_lock_irqsave(&domain->iommu->lock, flags);
1101 clear_bit(domain->id, domain->iommu->domain_ids);
1102 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1105 static struct iova_domain reserved_iova_list;
1107 static void dmar_init_reserved_ranges(void)
1109 struct pci_dev *pdev = NULL;
1110 struct iova *iova;
1111 int i;
1112 u64 addr, size;
1114 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1116 /* IOAPIC ranges shouldn't be accessed by DMA */
1117 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1118 IOVA_PFN(IOAPIC_RANGE_END));
1119 if (!iova)
1120 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1122 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1123 for_each_pci_dev(pdev) {
1124 struct resource *r;
1126 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1127 r = &pdev->resource[i];
1128 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1129 continue;
1130 addr = r->start;
1131 addr &= PAGE_MASK_4K;
1132 size = r->end - addr;
1133 size = PAGE_ALIGN_4K(size);
1134 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1135 IOVA_PFN(size + addr) - 1);
1136 if (!iova)
1137 printk(KERN_ERR "Reserve iova failed\n");
1143 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1145 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1148 static inline int guestwidth_to_adjustwidth(int gaw)
1150 int agaw;
1151 int r = (gaw - 12) % 9;
1153 if (r == 0)
1154 agaw = gaw;
1155 else
1156 agaw = gaw + 9 - r;
1157 if (agaw > 64)
1158 agaw = 64;
1159 return agaw;
1162 static int domain_init(struct dmar_domain *domain, int guest_width)
1164 struct intel_iommu *iommu;
1165 int adjust_width, agaw;
1166 unsigned long sagaw;
1168 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1169 spin_lock_init(&domain->mapping_lock);
1171 domain_reserve_special_ranges(domain);
1173 /* calculate AGAW */
1174 iommu = domain->iommu;
1175 if (guest_width > cap_mgaw(iommu->cap))
1176 guest_width = cap_mgaw(iommu->cap);
1177 domain->gaw = guest_width;
1178 adjust_width = guestwidth_to_adjustwidth(guest_width);
1179 agaw = width_to_agaw(adjust_width);
1180 sagaw = cap_sagaw(iommu->cap);
1181 if (!test_bit(agaw, &sagaw)) {
1182 /* hardware doesn't support it, choose a bigger one */
1183 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1184 agaw = find_next_bit(&sagaw, 5, agaw);
1185 if (agaw >= 5)
1186 return -ENODEV;
1188 domain->agaw = agaw;
1189 INIT_LIST_HEAD(&domain->devices);
1191 /* always allocate the top pgd */
1192 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1193 if (!domain->pgd)
1194 return -ENOMEM;
1195 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1196 return 0;
1199 static void domain_exit(struct dmar_domain *domain)
1201 u64 end;
1203 /* Domain 0 is reserved, so dont process it */
1204 if (!domain)
1205 return;
1207 domain_remove_dev_info(domain);
1208 /* destroy iovas */
1209 put_iova_domain(&domain->iovad);
1210 end = DOMAIN_MAX_ADDR(domain->gaw);
1211 end = end & (~PAGE_MASK_4K);
1213 /* clear ptes */
1214 dma_pte_clear_range(domain, 0, end);
1216 /* free page tables */
1217 dma_pte_free_pagetable(domain, 0, end);
1219 iommu_free_domain(domain);
1220 free_domain_mem(domain);
1223 static int domain_context_mapping_one(struct dmar_domain *domain,
1224 u8 bus, u8 devfn)
1226 struct context_entry *context;
1227 struct intel_iommu *iommu = domain->iommu;
1228 unsigned long flags;
1230 pr_debug("Set context mapping for %02x:%02x.%d\n",
1231 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1232 BUG_ON(!domain->pgd);
1233 context = device_to_context_entry(iommu, bus, devfn);
1234 if (!context)
1235 return -ENOMEM;
1236 spin_lock_irqsave(&iommu->lock, flags);
1237 if (context_present(*context)) {
1238 spin_unlock_irqrestore(&iommu->lock, flags);
1239 return 0;
1242 context_set_domain_id(*context, domain->id);
1243 context_set_address_width(*context, domain->agaw);
1244 context_set_address_root(*context, virt_to_phys(domain->pgd));
1245 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1246 context_set_fault_enable(*context);
1247 context_set_present(*context);
1248 __iommu_flush_cache(iommu, context, sizeof(*context));
1250 /* it's a non-present to present mapping */
1251 if (iommu_flush_context_device(iommu, domain->id,
1252 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1253 iommu_flush_write_buffer(iommu);
1254 else
1255 iommu_flush_iotlb_dsi(iommu, 0, 0);
1256 spin_unlock_irqrestore(&iommu->lock, flags);
1257 return 0;
1260 static int
1261 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1263 int ret;
1264 struct pci_dev *tmp, *parent;
1266 ret = domain_context_mapping_one(domain, pdev->bus->number,
1267 pdev->devfn);
1268 if (ret)
1269 return ret;
1271 /* dependent device mapping */
1272 tmp = pci_find_upstream_pcie_bridge(pdev);
1273 if (!tmp)
1274 return 0;
1275 /* Secondary interface's bus number and devfn 0 */
1276 parent = pdev->bus->self;
1277 while (parent != tmp) {
1278 ret = domain_context_mapping_one(domain, parent->bus->number,
1279 parent->devfn);
1280 if (ret)
1281 return ret;
1282 parent = parent->bus->self;
1284 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1285 return domain_context_mapping_one(domain,
1286 tmp->subordinate->number, 0);
1287 else /* this is a legacy PCI bridge */
1288 return domain_context_mapping_one(domain,
1289 tmp->bus->number, tmp->devfn);
1292 static int domain_context_mapped(struct dmar_domain *domain,
1293 struct pci_dev *pdev)
1295 int ret;
1296 struct pci_dev *tmp, *parent;
1298 ret = device_context_mapped(domain->iommu,
1299 pdev->bus->number, pdev->devfn);
1300 if (!ret)
1301 return ret;
1302 /* dependent device mapping */
1303 tmp = pci_find_upstream_pcie_bridge(pdev);
1304 if (!tmp)
1305 return ret;
1306 /* Secondary interface's bus number and devfn 0 */
1307 parent = pdev->bus->self;
1308 while (parent != tmp) {
1309 ret = device_context_mapped(domain->iommu, parent->bus->number,
1310 parent->devfn);
1311 if (!ret)
1312 return ret;
1313 parent = parent->bus->self;
1315 if (tmp->is_pcie)
1316 return device_context_mapped(domain->iommu,
1317 tmp->subordinate->number, 0);
1318 else
1319 return device_context_mapped(domain->iommu,
1320 tmp->bus->number, tmp->devfn);
1323 static int
1324 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1325 u64 hpa, size_t size, int prot)
1327 u64 start_pfn, end_pfn;
1328 struct dma_pte *pte;
1329 int index;
1331 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1332 return -EINVAL;
1333 iova &= PAGE_MASK_4K;
1334 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1335 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1336 index = 0;
1337 while (start_pfn < end_pfn) {
1338 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1339 if (!pte)
1340 return -ENOMEM;
1341 /* We don't need lock here, nobody else
1342 * touches the iova range
1344 BUG_ON(dma_pte_addr(*pte));
1345 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1346 dma_set_pte_prot(*pte, prot);
1347 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1348 start_pfn++;
1349 index++;
1351 return 0;
1354 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1356 clear_context_table(domain->iommu, bus, devfn);
1357 iommu_flush_context_global(domain->iommu, 0);
1358 iommu_flush_iotlb_global(domain->iommu, 0);
1361 static void domain_remove_dev_info(struct dmar_domain *domain)
1363 struct device_domain_info *info;
1364 unsigned long flags;
1366 spin_lock_irqsave(&device_domain_lock, flags);
1367 while (!list_empty(&domain->devices)) {
1368 info = list_entry(domain->devices.next,
1369 struct device_domain_info, link);
1370 list_del(&info->link);
1371 list_del(&info->global);
1372 if (info->dev)
1373 info->dev->dev.archdata.iommu = NULL;
1374 spin_unlock_irqrestore(&device_domain_lock, flags);
1376 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1377 free_devinfo_mem(info);
1379 spin_lock_irqsave(&device_domain_lock, flags);
1381 spin_unlock_irqrestore(&device_domain_lock, flags);
1385 * find_domain
1386 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1388 struct dmar_domain *
1389 find_domain(struct pci_dev *pdev)
1391 struct device_domain_info *info;
1393 /* No lock here, assumes no domain exit in normal case */
1394 info = pdev->dev.archdata.iommu;
1395 if (info)
1396 return info->domain;
1397 return NULL;
1400 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1401 struct pci_dev *dev)
1403 int index;
1405 while (dev) {
1406 for (index = 0; index < cnt; index ++)
1407 if (dev == devices[index])
1408 return 1;
1410 /* Check our parent */
1411 dev = dev->bus->self;
1414 return 0;
1417 static struct dmar_drhd_unit *
1418 dmar_find_matched_drhd_unit(struct pci_dev *dev)
1420 struct dmar_drhd_unit *drhd = NULL;
1422 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1423 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1424 drhd->devices_cnt, dev))
1425 return drhd;
1428 return NULL;
1431 /* domain is initialized */
1432 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1434 struct dmar_domain *domain, *found = NULL;
1435 struct intel_iommu *iommu;
1436 struct dmar_drhd_unit *drhd;
1437 struct device_domain_info *info, *tmp;
1438 struct pci_dev *dev_tmp;
1439 unsigned long flags;
1440 int bus = 0, devfn = 0;
1442 domain = find_domain(pdev);
1443 if (domain)
1444 return domain;
1446 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1447 if (dev_tmp) {
1448 if (dev_tmp->is_pcie) {
1449 bus = dev_tmp->subordinate->number;
1450 devfn = 0;
1451 } else {
1452 bus = dev_tmp->bus->number;
1453 devfn = dev_tmp->devfn;
1455 spin_lock_irqsave(&device_domain_lock, flags);
1456 list_for_each_entry(info, &device_domain_list, global) {
1457 if (info->bus == bus && info->devfn == devfn) {
1458 found = info->domain;
1459 break;
1462 spin_unlock_irqrestore(&device_domain_lock, flags);
1463 /* pcie-pci bridge already has a domain, uses it */
1464 if (found) {
1465 domain = found;
1466 goto found_domain;
1470 /* Allocate new domain for the device */
1471 drhd = dmar_find_matched_drhd_unit(pdev);
1472 if (!drhd) {
1473 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1474 pci_name(pdev));
1475 return NULL;
1477 iommu = drhd->iommu;
1479 domain = iommu_alloc_domain(iommu);
1480 if (!domain)
1481 goto error;
1483 if (domain_init(domain, gaw)) {
1484 domain_exit(domain);
1485 goto error;
1488 /* register pcie-to-pci device */
1489 if (dev_tmp) {
1490 info = alloc_devinfo_mem();
1491 if (!info) {
1492 domain_exit(domain);
1493 goto error;
1495 info->bus = bus;
1496 info->devfn = devfn;
1497 info->dev = NULL;
1498 info->domain = domain;
1499 /* This domain is shared by devices under p2p bridge */
1500 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1502 /* pcie-to-pci bridge already has a domain, uses it */
1503 found = NULL;
1504 spin_lock_irqsave(&device_domain_lock, flags);
1505 list_for_each_entry(tmp, &device_domain_list, global) {
1506 if (tmp->bus == bus && tmp->devfn == devfn) {
1507 found = tmp->domain;
1508 break;
1511 if (found) {
1512 free_devinfo_mem(info);
1513 domain_exit(domain);
1514 domain = found;
1515 } else {
1516 list_add(&info->link, &domain->devices);
1517 list_add(&info->global, &device_domain_list);
1519 spin_unlock_irqrestore(&device_domain_lock, flags);
1522 found_domain:
1523 info = alloc_devinfo_mem();
1524 if (!info)
1525 goto error;
1526 info->bus = pdev->bus->number;
1527 info->devfn = pdev->devfn;
1528 info->dev = pdev;
1529 info->domain = domain;
1530 spin_lock_irqsave(&device_domain_lock, flags);
1531 /* somebody is fast */
1532 found = find_domain(pdev);
1533 if (found != NULL) {
1534 spin_unlock_irqrestore(&device_domain_lock, flags);
1535 if (found != domain) {
1536 domain_exit(domain);
1537 domain = found;
1539 free_devinfo_mem(info);
1540 return domain;
1542 list_add(&info->link, &domain->devices);
1543 list_add(&info->global, &device_domain_list);
1544 pdev->dev.archdata.iommu = info;
1545 spin_unlock_irqrestore(&device_domain_lock, flags);
1546 return domain;
1547 error:
1548 /* recheck it here, maybe others set it */
1549 return find_domain(pdev);
1552 static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1554 struct dmar_domain *domain;
1555 unsigned long size;
1556 u64 base;
1557 int ret;
1559 printk(KERN_INFO
1560 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1561 pci_name(pdev), start, end);
1562 /* page table init */
1563 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1564 if (!domain)
1565 return -ENOMEM;
1567 /* The address might not be aligned */
1568 base = start & PAGE_MASK_4K;
1569 size = end - base;
1570 size = PAGE_ALIGN_4K(size);
1571 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1572 IOVA_PFN(base + size) - 1)) {
1573 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1574 ret = -ENOMEM;
1575 goto error;
1578 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1579 size, base, pci_name(pdev));
1581 * RMRR range might have overlap with physical memory range,
1582 * clear it first
1584 dma_pte_clear_range(domain, base, base + size);
1586 ret = domain_page_mapping(domain, base, base, size,
1587 DMA_PTE_READ|DMA_PTE_WRITE);
1588 if (ret)
1589 goto error;
1591 /* context entry init */
1592 ret = domain_context_mapping(domain, pdev);
1593 if (!ret)
1594 return 0;
1595 error:
1596 domain_exit(domain);
1597 return ret;
1601 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1602 struct pci_dev *pdev)
1604 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1605 return 0;
1606 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1607 rmrr->end_address + 1);
1610 #ifdef CONFIG_DMAR_GFX_WA
1611 extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1612 static void __init iommu_prepare_gfx_mapping(void)
1614 struct pci_dev *pdev = NULL;
1615 u64 base, size;
1616 int slot;
1617 int ret;
1619 for_each_pci_dev(pdev) {
1620 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1621 !IS_GFX_DEVICE(pdev))
1622 continue;
1623 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1624 pci_name(pdev));
1625 slot = arch_get_ram_range(0, &base, &size);
1626 while (slot >= 0) {
1627 ret = iommu_prepare_identity_map(pdev,
1628 base, base + size);
1629 if (ret)
1630 goto error;
1631 slot = arch_get_ram_range(slot, &base, &size);
1633 continue;
1634 error:
1635 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1638 #endif
1640 #ifdef CONFIG_DMAR_FLOPPY_WA
1641 static inline void iommu_prepare_isa(void)
1643 struct pci_dev *pdev;
1644 int ret;
1646 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1647 if (!pdev)
1648 return;
1650 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1651 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1653 if (ret)
1654 printk("IOMMU: Failed to create 0-64M identity map, "
1655 "floppy might not work\n");
1658 #else
1659 static inline void iommu_prepare_isa(void)
1661 return;
1663 #endif /* !CONFIG_DMAR_FLPY_WA */
1665 int __init init_dmars(void)
1667 struct dmar_drhd_unit *drhd;
1668 struct dmar_rmrr_unit *rmrr;
1669 struct pci_dev *pdev;
1670 struct intel_iommu *iommu;
1671 int ret, unit = 0;
1674 * for each drhd
1675 * allocate root
1676 * initialize and program root entry to not present
1677 * endfor
1679 for_each_drhd_unit(drhd) {
1680 if (drhd->ignored)
1681 continue;
1682 iommu = alloc_iommu(drhd);
1683 if (!iommu) {
1684 ret = -ENOMEM;
1685 goto error;
1689 * TBD:
1690 * we could share the same root & context tables
1691 * amoung all IOMMU's. Need to Split it later.
1693 ret = iommu_alloc_root_entry(iommu);
1694 if (ret) {
1695 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1696 goto error;
1701 * For each rmrr
1702 * for each dev attached to rmrr
1703 * do
1704 * locate drhd for dev, alloc domain for dev
1705 * allocate free domain
1706 * allocate page table entries for rmrr
1707 * if context not allocated for bus
1708 * allocate and init context
1709 * set present in root table for this bus
1710 * init context with domain, translation etc
1711 * endfor
1712 * endfor
1714 for_each_rmrr_units(rmrr) {
1715 int i;
1716 for (i = 0; i < rmrr->devices_cnt; i++) {
1717 pdev = rmrr->devices[i];
1718 /* some BIOS lists non-exist devices in DMAR table */
1719 if (!pdev)
1720 continue;
1721 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1722 if (ret)
1723 printk(KERN_ERR
1724 "IOMMU: mapping reserved region failed\n");
1728 iommu_prepare_gfx_mapping();
1730 iommu_prepare_isa();
1733 * for each drhd
1734 * enable fault log
1735 * global invalidate context cache
1736 * global invalidate iotlb
1737 * enable translation
1739 for_each_drhd_unit(drhd) {
1740 if (drhd->ignored)
1741 continue;
1742 iommu = drhd->iommu;
1743 sprintf (iommu->name, "dmar%d", unit++);
1745 iommu_flush_write_buffer(iommu);
1747 ret = dmar_set_interrupt(iommu);
1748 if (ret)
1749 goto error;
1751 iommu_set_root_entry(iommu);
1753 iommu_flush_context_global(iommu, 0);
1754 iommu_flush_iotlb_global(iommu, 0);
1756 iommu_disable_protect_mem_regions(iommu);
1758 ret = iommu_enable_translation(iommu);
1759 if (ret)
1760 goto error;
1763 return 0;
1764 error:
1765 for_each_drhd_unit(drhd) {
1766 if (drhd->ignored)
1767 continue;
1768 iommu = drhd->iommu;
1769 free_iommu(iommu);
1771 return ret;
1774 static inline u64 aligned_size(u64 host_addr, size_t size)
1776 u64 addr;
1777 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1778 return PAGE_ALIGN_4K(addr);
1781 struct iova *
1782 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1784 struct iova *piova;
1786 /* Make sure it's in range */
1787 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1788 if (!size || (IOVA_START_ADDR + size > end))
1789 return NULL;
1791 piova = alloc_iova(&domain->iovad,
1792 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1793 return piova;
1796 static struct iova *
1797 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1798 size_t size)
1800 struct pci_dev *pdev = to_pci_dev(dev);
1801 struct iova *iova = NULL;
1803 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1804 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1805 } else {
1807 * First try to allocate an io virtual address in
1808 * DMA_32BIT_MASK and if that fails then try allocating
1809 * from higher range
1811 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1812 if (!iova)
1813 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1816 if (!iova) {
1817 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1818 return NULL;
1821 return iova;
1824 static struct dmar_domain *
1825 get_valid_domain_for_dev(struct pci_dev *pdev)
1827 struct dmar_domain *domain;
1828 int ret;
1830 domain = get_domain_for_dev(pdev,
1831 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1832 if (!domain) {
1833 printk(KERN_ERR
1834 "Allocating domain for %s failed", pci_name(pdev));
1835 return NULL;
1838 /* make sure context mapping is ok */
1839 if (unlikely(!domain_context_mapped(domain, pdev))) {
1840 ret = domain_context_mapping(domain, pdev);
1841 if (ret) {
1842 printk(KERN_ERR
1843 "Domain context map for %s failed",
1844 pci_name(pdev));
1845 return NULL;
1849 return domain;
1852 static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1853 size_t size, int dir)
1855 struct pci_dev *pdev = to_pci_dev(hwdev);
1856 int ret;
1857 struct dmar_domain *domain;
1858 unsigned long start_addr;
1859 struct iova *iova;
1860 int prot = 0;
1862 BUG_ON(dir == DMA_NONE);
1863 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1864 return virt_to_bus(addr);
1866 domain = get_valid_domain_for_dev(pdev);
1867 if (!domain)
1868 return 0;
1870 addr = (void *)virt_to_phys(addr);
1871 size = aligned_size((u64)addr, size);
1873 iova = __intel_alloc_iova(hwdev, domain, size);
1874 if (!iova)
1875 goto error;
1877 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1880 * Check if DMAR supports zero-length reads on write only
1881 * mappings..
1883 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1884 !cap_zlr(domain->iommu->cap))
1885 prot |= DMA_PTE_READ;
1886 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1887 prot |= DMA_PTE_WRITE;
1889 * addr - (addr + size) might be partial page, we should map the whole
1890 * page. Note: if two part of one page are separately mapped, we
1891 * might have two guest_addr mapping to the same host addr, but this
1892 * is not a big problem
1894 ret = domain_page_mapping(domain, start_addr,
1895 ((u64)addr) & PAGE_MASK_4K, size, prot);
1896 if (ret)
1897 goto error;
1899 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1900 pci_name(pdev), size, (u64)addr,
1901 size, (u64)start_addr, dir);
1903 /* it's a non-present to present mapping */
1904 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1905 start_addr, size >> PAGE_SHIFT_4K, 1);
1906 if (ret)
1907 iommu_flush_write_buffer(domain->iommu);
1909 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1911 error:
1912 if (iova)
1913 __free_iova(&domain->iovad, iova);
1914 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1915 pci_name(pdev), size, (u64)addr, dir);
1916 return 0;
1919 static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1920 size_t size, int dir)
1922 struct pci_dev *pdev = to_pci_dev(dev);
1923 struct dmar_domain *domain;
1924 unsigned long start_addr;
1925 struct iova *iova;
1927 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1928 return;
1929 domain = find_domain(pdev);
1930 BUG_ON(!domain);
1932 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1933 if (!iova)
1934 return;
1936 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1937 size = aligned_size((u64)dev_addr, size);
1939 pr_debug("Device %s unmapping: %lx@%llx\n",
1940 pci_name(pdev), size, (u64)start_addr);
1942 /* clear the whole page */
1943 dma_pte_clear_range(domain, start_addr, start_addr + size);
1944 /* free page tables */
1945 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1947 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1948 size >> PAGE_SHIFT_4K, 0))
1949 iommu_flush_write_buffer(domain->iommu);
1951 /* free iova */
1952 __free_iova(&domain->iovad, iova);
1955 static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1956 dma_addr_t *dma_handle, gfp_t flags)
1958 void *vaddr;
1959 int order;
1961 size = PAGE_ALIGN_4K(size);
1962 order = get_order(size);
1963 flags &= ~(GFP_DMA | GFP_DMA32);
1965 vaddr = (void *)__get_free_pages(flags, order);
1966 if (!vaddr)
1967 return NULL;
1968 memset(vaddr, 0, size);
1970 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1971 if (*dma_handle)
1972 return vaddr;
1973 free_pages((unsigned long)vaddr, order);
1974 return NULL;
1977 static void intel_free_coherent(struct device *hwdev, size_t size,
1978 void *vaddr, dma_addr_t dma_handle)
1980 int order;
1982 size = PAGE_ALIGN_4K(size);
1983 order = get_order(size);
1985 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1986 free_pages((unsigned long)vaddr, order);
1989 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1990 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
1991 int nelems, int dir)
1993 int i;
1994 struct pci_dev *pdev = to_pci_dev(hwdev);
1995 struct dmar_domain *domain;
1996 unsigned long start_addr;
1997 struct iova *iova;
1998 size_t size = 0;
1999 void *addr;
2000 struct scatterlist *sg;
2002 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2003 return;
2005 domain = find_domain(pdev);
2007 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2008 if (!iova)
2009 return;
2010 for_each_sg(sglist, sg, nelems, i) {
2011 addr = SG_ENT_VIRT_ADDRESS(sg);
2012 size += aligned_size((u64)addr, sg->length);
2015 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2017 /* clear the whole page */
2018 dma_pte_clear_range(domain, start_addr, start_addr + size);
2019 /* free page tables */
2020 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2022 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2023 size >> PAGE_SHIFT_4K, 0))
2024 iommu_flush_write_buffer(domain->iommu);
2026 /* free iova */
2027 __free_iova(&domain->iovad, iova);
2030 static int intel_nontranslate_map_sg(struct device *hddev,
2031 struct scatterlist *sglist, int nelems, int dir)
2033 int i;
2034 struct scatterlist *sg;
2036 for_each_sg(sglist, sg, nelems, i) {
2037 BUG_ON(!sg_page(sg));
2038 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2039 sg->dma_length = sg->length;
2041 return nelems;
2044 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2045 int nelems, int dir)
2047 void *addr;
2048 int i;
2049 struct pci_dev *pdev = to_pci_dev(hwdev);
2050 struct dmar_domain *domain;
2051 size_t size = 0;
2052 int prot = 0;
2053 size_t offset = 0;
2054 struct iova *iova = NULL;
2055 int ret;
2056 struct scatterlist *sg;
2057 unsigned long start_addr;
2059 BUG_ON(dir == DMA_NONE);
2060 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2061 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2063 domain = get_valid_domain_for_dev(pdev);
2064 if (!domain)
2065 return 0;
2067 for_each_sg(sglist, sg, nelems, i) {
2068 addr = SG_ENT_VIRT_ADDRESS(sg);
2069 addr = (void *)virt_to_phys(addr);
2070 size += aligned_size((u64)addr, sg->length);
2073 iova = __intel_alloc_iova(hwdev, domain, size);
2074 if (!iova) {
2075 sglist->dma_length = 0;
2076 return 0;
2080 * Check if DMAR supports zero-length reads on write only
2081 * mappings..
2083 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2084 !cap_zlr(domain->iommu->cap))
2085 prot |= DMA_PTE_READ;
2086 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2087 prot |= DMA_PTE_WRITE;
2089 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2090 offset = 0;
2091 for_each_sg(sglist, sg, nelems, i) {
2092 addr = SG_ENT_VIRT_ADDRESS(sg);
2093 addr = (void *)virt_to_phys(addr);
2094 size = aligned_size((u64)addr, sg->length);
2095 ret = domain_page_mapping(domain, start_addr + offset,
2096 ((u64)addr) & PAGE_MASK_4K,
2097 size, prot);
2098 if (ret) {
2099 /* clear the page */
2100 dma_pte_clear_range(domain, start_addr,
2101 start_addr + offset);
2102 /* free page tables */
2103 dma_pte_free_pagetable(domain, start_addr,
2104 start_addr + offset);
2105 /* free iova */
2106 __free_iova(&domain->iovad, iova);
2107 return 0;
2109 sg->dma_address = start_addr + offset +
2110 ((u64)addr & (~PAGE_MASK_4K));
2111 sg->dma_length = sg->length;
2112 offset += size;
2115 /* it's a non-present to present mapping */
2116 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2117 start_addr, offset >> PAGE_SHIFT_4K, 1))
2118 iommu_flush_write_buffer(domain->iommu);
2119 return nelems;
2122 static struct dma_mapping_ops intel_dma_ops = {
2123 .alloc_coherent = intel_alloc_coherent,
2124 .free_coherent = intel_free_coherent,
2125 .map_single = intel_map_single,
2126 .unmap_single = intel_unmap_single,
2127 .map_sg = intel_map_sg,
2128 .unmap_sg = intel_unmap_sg,
2131 static inline int iommu_domain_cache_init(void)
2133 int ret = 0;
2135 iommu_domain_cache = kmem_cache_create("iommu_domain",
2136 sizeof(struct dmar_domain),
2138 SLAB_HWCACHE_ALIGN,
2140 NULL);
2141 if (!iommu_domain_cache) {
2142 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2143 ret = -ENOMEM;
2146 return ret;
2149 static inline int iommu_devinfo_cache_init(void)
2151 int ret = 0;
2153 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2154 sizeof(struct device_domain_info),
2156 SLAB_HWCACHE_ALIGN,
2158 NULL);
2159 if (!iommu_devinfo_cache) {
2160 printk(KERN_ERR "Couldn't create devinfo cache\n");
2161 ret = -ENOMEM;
2164 return ret;
2167 static inline int iommu_iova_cache_init(void)
2169 int ret = 0;
2171 iommu_iova_cache = kmem_cache_create("iommu_iova",
2172 sizeof(struct iova),
2174 SLAB_HWCACHE_ALIGN,
2176 NULL);
2177 if (!iommu_iova_cache) {
2178 printk(KERN_ERR "Couldn't create iova cache\n");
2179 ret = -ENOMEM;
2182 return ret;
2185 static int __init iommu_init_mempool(void)
2187 int ret;
2188 ret = iommu_iova_cache_init();
2189 if (ret)
2190 return ret;
2192 ret = iommu_domain_cache_init();
2193 if (ret)
2194 goto domain_error;
2196 ret = iommu_devinfo_cache_init();
2197 if (!ret)
2198 return ret;
2200 kmem_cache_destroy(iommu_domain_cache);
2201 domain_error:
2202 kmem_cache_destroy(iommu_iova_cache);
2204 return -ENOMEM;
2207 static void __init iommu_exit_mempool(void)
2209 kmem_cache_destroy(iommu_devinfo_cache);
2210 kmem_cache_destroy(iommu_domain_cache);
2211 kmem_cache_destroy(iommu_iova_cache);
2215 void __init detect_intel_iommu(void)
2217 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2218 return;
2219 if (early_dmar_detect()) {
2220 iommu_detected = 1;
2224 static void __init init_no_remapping_devices(void)
2226 struct dmar_drhd_unit *drhd;
2228 for_each_drhd_unit(drhd) {
2229 if (!drhd->include_all) {
2230 int i;
2231 for (i = 0; i < drhd->devices_cnt; i++)
2232 if (drhd->devices[i] != NULL)
2233 break;
2234 /* ignore DMAR unit if no pci devices exist */
2235 if (i == drhd->devices_cnt)
2236 drhd->ignored = 1;
2240 if (dmar_map_gfx)
2241 return;
2243 for_each_drhd_unit(drhd) {
2244 int i;
2245 if (drhd->ignored || drhd->include_all)
2246 continue;
2248 for (i = 0; i < drhd->devices_cnt; i++)
2249 if (drhd->devices[i] &&
2250 !IS_GFX_DEVICE(drhd->devices[i]))
2251 break;
2253 if (i < drhd->devices_cnt)
2254 continue;
2256 /* bypass IOMMU if it is just for gfx devices */
2257 drhd->ignored = 1;
2258 for (i = 0; i < drhd->devices_cnt; i++) {
2259 if (!drhd->devices[i])
2260 continue;
2261 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2266 int __init intel_iommu_init(void)
2268 int ret = 0;
2270 if (no_iommu || swiotlb || dmar_disabled)
2271 return -ENODEV;
2273 if (dmar_table_init())
2274 return -ENODEV;
2276 iommu_init_mempool();
2277 dmar_init_reserved_ranges();
2279 init_no_remapping_devices();
2281 ret = init_dmars();
2282 if (ret) {
2283 printk(KERN_ERR "IOMMU: dmar init failed\n");
2284 put_iova_domain(&reserved_iova_list);
2285 iommu_exit_mempool();
2286 return ret;
2288 printk(KERN_INFO
2289 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2291 force_iommu = 1;
2292 dma_ops = &intel_dma_ops;
2293 return 0;