Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-2.6/linux-mips/linux-dm7025.git] / drivers / pci / intel-iommu.c
blob4cb949f0ebd9746e8e0c5d29a6c5fba034bbf370
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/slab.h>
26 #include <linux/irq.h>
27 #include <linux/interrupt.h>
28 #include <linux/sysdev.h>
29 #include <linux/spinlock.h>
30 #include <linux/pci.h>
31 #include <linux/dmar.h>
32 #include <linux/dma-mapping.h>
33 #include <linux/mempool.h>
34 #include "iova.h"
35 #include "intel-iommu.h"
36 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
37 #include <asm/cacheflush.h>
38 #include <asm/gart.h>
39 #include "pci.h"
41 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
42 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
44 #define IOAPIC_RANGE_START (0xfee00000)
45 #define IOAPIC_RANGE_END (0xfeefffff)
46 #define IOVA_START_ADDR (0x1000)
48 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
50 #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
52 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
54 static void domain_remove_dev_info(struct dmar_domain *domain);
56 static int dmar_disabled;
57 static int __initdata dmar_map_gfx = 1;
58 static int dmar_forcedac;
60 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
61 static DEFINE_SPINLOCK(device_domain_lock);
62 static LIST_HEAD(device_domain_list);
64 static int __init intel_iommu_setup(char *str)
66 if (!str)
67 return -EINVAL;
68 while (*str) {
69 if (!strncmp(str, "off", 3)) {
70 dmar_disabled = 1;
71 printk(KERN_INFO"Intel-IOMMU: disabled\n");
72 } else if (!strncmp(str, "igfx_off", 8)) {
73 dmar_map_gfx = 0;
74 printk(KERN_INFO
75 "Intel-IOMMU: disable GFX device mapping\n");
76 } else if (!strncmp(str, "forcedac", 8)) {
77 printk (KERN_INFO
78 "Intel-IOMMU: Forcing DAC for PCI devices\n");
79 dmar_forcedac = 1;
82 str += strcspn(str, ",");
83 while (*str == ',')
84 str++;
86 return 0;
88 __setup("intel_iommu=", intel_iommu_setup);
90 static struct kmem_cache *iommu_domain_cache;
91 static struct kmem_cache *iommu_devinfo_cache;
92 static struct kmem_cache *iommu_iova_cache;
94 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
96 unsigned int flags;
97 void *vaddr;
99 /* trying to avoid low memory issues */
100 flags = current->flags & PF_MEMALLOC;
101 current->flags |= PF_MEMALLOC;
102 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
103 current->flags &= (~PF_MEMALLOC | flags);
104 return vaddr;
108 static inline void *alloc_pgtable_page(void)
110 unsigned int flags;
111 void *vaddr;
113 /* trying to avoid low memory issues */
114 flags = current->flags & PF_MEMALLOC;
115 current->flags |= PF_MEMALLOC;
116 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
117 current->flags &= (~PF_MEMALLOC | flags);
118 return vaddr;
121 static inline void free_pgtable_page(void *vaddr)
123 free_page((unsigned long)vaddr);
126 static inline void *alloc_domain_mem(void)
128 return iommu_kmem_cache_alloc(iommu_domain_cache);
131 static inline void free_domain_mem(void *vaddr)
133 kmem_cache_free(iommu_domain_cache, vaddr);
136 static inline void * alloc_devinfo_mem(void)
138 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
141 static inline void free_devinfo_mem(void *vaddr)
143 kmem_cache_free(iommu_devinfo_cache, vaddr);
146 struct iova *alloc_iova_mem(void)
148 return iommu_kmem_cache_alloc(iommu_iova_cache);
151 void free_iova_mem(struct iova *iova)
153 kmem_cache_free(iommu_iova_cache, iova);
156 static inline void __iommu_flush_cache(
157 struct intel_iommu *iommu, void *addr, int size)
159 if (!ecap_coherent(iommu->ecap))
160 clflush_cache_range(addr, size);
163 /* Gets context entry for a given bus and devfn */
164 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
165 u8 bus, u8 devfn)
167 struct root_entry *root;
168 struct context_entry *context;
169 unsigned long phy_addr;
170 unsigned long flags;
172 spin_lock_irqsave(&iommu->lock, flags);
173 root = &iommu->root_entry[bus];
174 context = get_context_addr_from_root(root);
175 if (!context) {
176 context = (struct context_entry *)alloc_pgtable_page();
177 if (!context) {
178 spin_unlock_irqrestore(&iommu->lock, flags);
179 return NULL;
181 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
182 phy_addr = virt_to_phys((void *)context);
183 set_root_value(root, phy_addr);
184 set_root_present(root);
185 __iommu_flush_cache(iommu, root, sizeof(*root));
187 spin_unlock_irqrestore(&iommu->lock, flags);
188 return &context[devfn];
191 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
193 struct root_entry *root;
194 struct context_entry *context;
195 int ret;
196 unsigned long flags;
198 spin_lock_irqsave(&iommu->lock, flags);
199 root = &iommu->root_entry[bus];
200 context = get_context_addr_from_root(root);
201 if (!context) {
202 ret = 0;
203 goto out;
205 ret = context_present(context[devfn]);
206 out:
207 spin_unlock_irqrestore(&iommu->lock, flags);
208 return ret;
211 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
213 struct root_entry *root;
214 struct context_entry *context;
215 unsigned long flags;
217 spin_lock_irqsave(&iommu->lock, flags);
218 root = &iommu->root_entry[bus];
219 context = get_context_addr_from_root(root);
220 if (context) {
221 context_clear_entry(context[devfn]);
222 __iommu_flush_cache(iommu, &context[devfn], \
223 sizeof(*context));
225 spin_unlock_irqrestore(&iommu->lock, flags);
228 static void free_context_table(struct intel_iommu *iommu)
230 struct root_entry *root;
231 int i;
232 unsigned long flags;
233 struct context_entry *context;
235 spin_lock_irqsave(&iommu->lock, flags);
236 if (!iommu->root_entry) {
237 goto out;
239 for (i = 0; i < ROOT_ENTRY_NR; i++) {
240 root = &iommu->root_entry[i];
241 context = get_context_addr_from_root(root);
242 if (context)
243 free_pgtable_page(context);
245 free_pgtable_page(iommu->root_entry);
246 iommu->root_entry = NULL;
247 out:
248 spin_unlock_irqrestore(&iommu->lock, flags);
251 /* page table handling */
252 #define LEVEL_STRIDE (9)
253 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
255 static inline int agaw_to_level(int agaw)
257 return agaw + 2;
260 static inline int agaw_to_width(int agaw)
262 return 30 + agaw * LEVEL_STRIDE;
266 static inline int width_to_agaw(int width)
268 return (width - 30) / LEVEL_STRIDE;
271 static inline unsigned int level_to_offset_bits(int level)
273 return (12 + (level - 1) * LEVEL_STRIDE);
276 static inline int address_level_offset(u64 addr, int level)
278 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
281 static inline u64 level_mask(int level)
283 return ((u64)-1 << level_to_offset_bits(level));
286 static inline u64 level_size(int level)
288 return ((u64)1 << level_to_offset_bits(level));
291 static inline u64 align_to_level(u64 addr, int level)
293 return ((addr + level_size(level) - 1) & level_mask(level));
296 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
298 int addr_width = agaw_to_width(domain->agaw);
299 struct dma_pte *parent, *pte = NULL;
300 int level = agaw_to_level(domain->agaw);
301 int offset;
302 unsigned long flags;
304 BUG_ON(!domain->pgd);
306 addr &= (((u64)1) << addr_width) - 1;
307 parent = domain->pgd;
309 spin_lock_irqsave(&domain->mapping_lock, flags);
310 while (level > 0) {
311 void *tmp_page;
313 offset = address_level_offset(addr, level);
314 pte = &parent[offset];
315 if (level == 1)
316 break;
318 if (!dma_pte_present(*pte)) {
319 tmp_page = alloc_pgtable_page();
321 if (!tmp_page) {
322 spin_unlock_irqrestore(&domain->mapping_lock,
323 flags);
324 return NULL;
326 __iommu_flush_cache(domain->iommu, tmp_page,
327 PAGE_SIZE_4K);
328 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
330 * high level table always sets r/w, last level page
331 * table control read/write
333 dma_set_pte_readable(*pte);
334 dma_set_pte_writable(*pte);
335 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
337 parent = phys_to_virt(dma_pte_addr(*pte));
338 level--;
341 spin_unlock_irqrestore(&domain->mapping_lock, flags);
342 return pte;
345 /* return address's pte at specific level */
346 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
347 int level)
349 struct dma_pte *parent, *pte = NULL;
350 int total = agaw_to_level(domain->agaw);
351 int offset;
353 parent = domain->pgd;
354 while (level <= total) {
355 offset = address_level_offset(addr, total);
356 pte = &parent[offset];
357 if (level == total)
358 return pte;
360 if (!dma_pte_present(*pte))
361 break;
362 parent = phys_to_virt(dma_pte_addr(*pte));
363 total--;
365 return NULL;
368 /* clear one page's page table */
369 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
371 struct dma_pte *pte = NULL;
373 /* get last level pte */
374 pte = dma_addr_level_pte(domain, addr, 1);
376 if (pte) {
377 dma_clear_pte(*pte);
378 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
382 /* clear last level pte, a tlb flush should be followed */
383 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
385 int addr_width = agaw_to_width(domain->agaw);
387 start &= (((u64)1) << addr_width) - 1;
388 end &= (((u64)1) << addr_width) - 1;
389 /* in case it's partial page */
390 start = PAGE_ALIGN_4K(start);
391 end &= PAGE_MASK_4K;
393 /* we don't need lock here, nobody else touches the iova range */
394 while (start < end) {
395 dma_pte_clear_one(domain, start);
396 start += PAGE_SIZE_4K;
400 /* free page table pages. last level pte should already be cleared */
401 static void dma_pte_free_pagetable(struct dmar_domain *domain,
402 u64 start, u64 end)
404 int addr_width = agaw_to_width(domain->agaw);
405 struct dma_pte *pte;
406 int total = agaw_to_level(domain->agaw);
407 int level;
408 u64 tmp;
410 start &= (((u64)1) << addr_width) - 1;
411 end &= (((u64)1) << addr_width) - 1;
413 /* we don't need lock here, nobody else touches the iova range */
414 level = 2;
415 while (level <= total) {
416 tmp = align_to_level(start, level);
417 if (tmp >= end || (tmp + level_size(level) > end))
418 return;
420 while (tmp < end) {
421 pte = dma_addr_level_pte(domain, tmp, level);
422 if (pte) {
423 free_pgtable_page(
424 phys_to_virt(dma_pte_addr(*pte)));
425 dma_clear_pte(*pte);
426 __iommu_flush_cache(domain->iommu,
427 pte, sizeof(*pte));
429 tmp += level_size(level);
431 level++;
433 /* free pgd */
434 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
435 free_pgtable_page(domain->pgd);
436 domain->pgd = NULL;
440 /* iommu handling */
441 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
443 struct root_entry *root;
444 unsigned long flags;
446 root = (struct root_entry *)alloc_pgtable_page();
447 if (!root)
448 return -ENOMEM;
450 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
452 spin_lock_irqsave(&iommu->lock, flags);
453 iommu->root_entry = root;
454 spin_unlock_irqrestore(&iommu->lock, flags);
456 return 0;
459 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
461 unsigned long start_time = jiffies;\
462 while (1) {\
463 sts = op (iommu->reg + offset);\
464 if (cond)\
465 break;\
466 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
467 panic("DMAR hardware is malfunctioning\n");\
468 cpu_relax();\
472 static void iommu_set_root_entry(struct intel_iommu *iommu)
474 void *addr;
475 u32 cmd, sts;
476 unsigned long flag;
478 addr = iommu->root_entry;
480 spin_lock_irqsave(&iommu->register_lock, flag);
481 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
483 cmd = iommu->gcmd | DMA_GCMD_SRTP;
484 writel(cmd, iommu->reg + DMAR_GCMD_REG);
486 /* Make sure hardware complete it */
487 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
488 readl, (sts & DMA_GSTS_RTPS), sts);
490 spin_unlock_irqrestore(&iommu->register_lock, flag);
493 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
495 u32 val;
496 unsigned long flag;
498 if (!cap_rwbf(iommu->cap))
499 return;
500 val = iommu->gcmd | DMA_GCMD_WBF;
502 spin_lock_irqsave(&iommu->register_lock, flag);
503 writel(val, iommu->reg + DMAR_GCMD_REG);
505 /* Make sure hardware complete it */
506 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
507 readl, (!(val & DMA_GSTS_WBFS)), val);
509 spin_unlock_irqrestore(&iommu->register_lock, flag);
512 /* return value determine if we need a write buffer flush */
513 static int __iommu_flush_context(struct intel_iommu *iommu,
514 u16 did, u16 source_id, u8 function_mask, u64 type,
515 int non_present_entry_flush)
517 u64 val = 0;
518 unsigned long flag;
521 * In the non-present entry flush case, if hardware doesn't cache
522 * non-present entry we do nothing and if hardware cache non-present
523 * entry, we flush entries of domain 0 (the domain id is used to cache
524 * any non-present entries)
526 if (non_present_entry_flush) {
527 if (!cap_caching_mode(iommu->cap))
528 return 1;
529 else
530 did = 0;
533 switch (type) {
534 case DMA_CCMD_GLOBAL_INVL:
535 val = DMA_CCMD_GLOBAL_INVL;
536 break;
537 case DMA_CCMD_DOMAIN_INVL:
538 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
539 break;
540 case DMA_CCMD_DEVICE_INVL:
541 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
542 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
543 break;
544 default:
545 BUG();
547 val |= DMA_CCMD_ICC;
549 spin_lock_irqsave(&iommu->register_lock, flag);
550 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
552 /* Make sure hardware complete it */
553 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
554 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
556 spin_unlock_irqrestore(&iommu->register_lock, flag);
558 /* flush context entry will implictly flush write buffer */
559 return 0;
562 static int inline iommu_flush_context_global(struct intel_iommu *iommu,
563 int non_present_entry_flush)
565 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
566 non_present_entry_flush);
569 static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
570 int non_present_entry_flush)
572 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
573 non_present_entry_flush);
576 static int inline iommu_flush_context_device(struct intel_iommu *iommu,
577 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
579 return __iommu_flush_context(iommu, did, source_id, function_mask,
580 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
583 /* return value determine if we need a write buffer flush */
584 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
585 u64 addr, unsigned int size_order, u64 type,
586 int non_present_entry_flush)
588 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
589 u64 val = 0, val_iva = 0;
590 unsigned long flag;
593 * In the non-present entry flush case, if hardware doesn't cache
594 * non-present entry we do nothing and if hardware cache non-present
595 * entry, we flush entries of domain 0 (the domain id is used to cache
596 * any non-present entries)
598 if (non_present_entry_flush) {
599 if (!cap_caching_mode(iommu->cap))
600 return 1;
601 else
602 did = 0;
605 switch (type) {
606 case DMA_TLB_GLOBAL_FLUSH:
607 /* global flush doesn't need set IVA_REG */
608 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
609 break;
610 case DMA_TLB_DSI_FLUSH:
611 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
612 break;
613 case DMA_TLB_PSI_FLUSH:
614 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
615 /* Note: always flush non-leaf currently */
616 val_iva = size_order | addr;
617 break;
618 default:
619 BUG();
621 /* Note: set drain read/write */
622 #if 0
624 * This is probably to be super secure.. Looks like we can
625 * ignore it without any impact.
627 if (cap_read_drain(iommu->cap))
628 val |= DMA_TLB_READ_DRAIN;
629 #endif
630 if (cap_write_drain(iommu->cap))
631 val |= DMA_TLB_WRITE_DRAIN;
633 spin_lock_irqsave(&iommu->register_lock, flag);
634 /* Note: Only uses first TLB reg currently */
635 if (val_iva)
636 dmar_writeq(iommu->reg + tlb_offset, val_iva);
637 dmar_writeq(iommu->reg + tlb_offset + 8, val);
639 /* Make sure hardware complete it */
640 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
641 dmar_readq, (!(val & DMA_TLB_IVT)), val);
643 spin_unlock_irqrestore(&iommu->register_lock, flag);
645 /* check IOTLB invalidation granularity */
646 if (DMA_TLB_IAIG(val) == 0)
647 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
648 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
649 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
650 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
651 /* flush context entry will implictly flush write buffer */
652 return 0;
655 static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
656 int non_present_entry_flush)
658 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
659 non_present_entry_flush);
662 static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
663 int non_present_entry_flush)
665 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
666 non_present_entry_flush);
669 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
670 u64 addr, unsigned int pages, int non_present_entry_flush)
672 unsigned int mask;
674 BUG_ON(addr & (~PAGE_MASK_4K));
675 BUG_ON(pages == 0);
677 /* Fallback to domain selective flush if no PSI support */
678 if (!cap_pgsel_inv(iommu->cap))
679 return iommu_flush_iotlb_dsi(iommu, did,
680 non_present_entry_flush);
683 * PSI requires page size to be 2 ^ x, and the base address is naturally
684 * aligned to the size
686 mask = ilog2(__roundup_pow_of_two(pages));
687 /* Fallback to domain selective flush if size is too big */
688 if (mask > cap_max_amask_val(iommu->cap))
689 return iommu_flush_iotlb_dsi(iommu, did,
690 non_present_entry_flush);
692 return __iommu_flush_iotlb(iommu, did, addr, mask,
693 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
696 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
698 u32 pmen;
699 unsigned long flags;
701 spin_lock_irqsave(&iommu->register_lock, flags);
702 pmen = readl(iommu->reg + DMAR_PMEN_REG);
703 pmen &= ~DMA_PMEN_EPM;
704 writel(pmen, iommu->reg + DMAR_PMEN_REG);
706 /* wait for the protected region status bit to clear */
707 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
708 readl, !(pmen & DMA_PMEN_PRS), pmen);
710 spin_unlock_irqrestore(&iommu->register_lock, flags);
713 static int iommu_enable_translation(struct intel_iommu *iommu)
715 u32 sts;
716 unsigned long flags;
718 spin_lock_irqsave(&iommu->register_lock, flags);
719 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
721 /* Make sure hardware complete it */
722 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723 readl, (sts & DMA_GSTS_TES), sts);
725 iommu->gcmd |= DMA_GCMD_TE;
726 spin_unlock_irqrestore(&iommu->register_lock, flags);
727 return 0;
730 static int iommu_disable_translation(struct intel_iommu *iommu)
732 u32 sts;
733 unsigned long flag;
735 spin_lock_irqsave(&iommu->register_lock, flag);
736 iommu->gcmd &= ~DMA_GCMD_TE;
737 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
739 /* Make sure hardware complete it */
740 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
741 readl, (!(sts & DMA_GSTS_TES)), sts);
743 spin_unlock_irqrestore(&iommu->register_lock, flag);
744 return 0;
747 /* iommu interrupt handling. Most stuff are MSI-like. */
749 static const char *fault_reason_strings[] =
751 "Software",
752 "Present bit in root entry is clear",
753 "Present bit in context entry is clear",
754 "Invalid context entry",
755 "Access beyond MGAW",
756 "PTE Write access is not set",
757 "PTE Read access is not set",
758 "Next page table ptr is invalid",
759 "Root table address invalid",
760 "Context table ptr is invalid",
761 "non-zero reserved fields in RTP",
762 "non-zero reserved fields in CTP",
763 "non-zero reserved fields in PTE",
765 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
767 const char *dmar_get_fault_reason(u8 fault_reason)
769 if (fault_reason > MAX_FAULT_REASON_IDX)
770 return "Unknown";
771 else
772 return fault_reason_strings[fault_reason];
775 void dmar_msi_unmask(unsigned int irq)
777 struct intel_iommu *iommu = get_irq_data(irq);
778 unsigned long flag;
780 /* unmask it */
781 spin_lock_irqsave(&iommu->register_lock, flag);
782 writel(0, iommu->reg + DMAR_FECTL_REG);
783 /* Read a reg to force flush the post write */
784 readl(iommu->reg + DMAR_FECTL_REG);
785 spin_unlock_irqrestore(&iommu->register_lock, flag);
788 void dmar_msi_mask(unsigned int irq)
790 unsigned long flag;
791 struct intel_iommu *iommu = get_irq_data(irq);
793 /* mask it */
794 spin_lock_irqsave(&iommu->register_lock, flag);
795 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
796 /* Read a reg to force flush the post write */
797 readl(iommu->reg + DMAR_FECTL_REG);
798 spin_unlock_irqrestore(&iommu->register_lock, flag);
801 void dmar_msi_write(int irq, struct msi_msg *msg)
803 struct intel_iommu *iommu = get_irq_data(irq);
804 unsigned long flag;
806 spin_lock_irqsave(&iommu->register_lock, flag);
807 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
808 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
809 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
810 spin_unlock_irqrestore(&iommu->register_lock, flag);
813 void dmar_msi_read(int irq, struct msi_msg *msg)
815 struct intel_iommu *iommu = get_irq_data(irq);
816 unsigned long flag;
818 spin_lock_irqsave(&iommu->register_lock, flag);
819 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
820 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
821 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
822 spin_unlock_irqrestore(&iommu->register_lock, flag);
825 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
826 u8 fault_reason, u16 source_id, u64 addr)
828 const char *reason;
830 reason = dmar_get_fault_reason(fault_reason);
832 printk(KERN_ERR
833 "DMAR:[%s] Request device [%02x:%02x.%d] "
834 "fault addr %llx \n"
835 "DMAR:[fault reason %02d] %s\n",
836 (type ? "DMA Read" : "DMA Write"),
837 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
838 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
839 return 0;
842 #define PRIMARY_FAULT_REG_LEN (16)
843 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
845 struct intel_iommu *iommu = dev_id;
846 int reg, fault_index;
847 u32 fault_status;
848 unsigned long flag;
850 spin_lock_irqsave(&iommu->register_lock, flag);
851 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
853 /* TBD: ignore advanced fault log currently */
854 if (!(fault_status & DMA_FSTS_PPF))
855 goto clear_overflow;
857 fault_index = dma_fsts_fault_record_index(fault_status);
858 reg = cap_fault_reg_offset(iommu->cap);
859 while (1) {
860 u8 fault_reason;
861 u16 source_id;
862 u64 guest_addr;
863 int type;
864 u32 data;
866 /* highest 32 bits */
867 data = readl(iommu->reg + reg +
868 fault_index * PRIMARY_FAULT_REG_LEN + 12);
869 if (!(data & DMA_FRCD_F))
870 break;
872 fault_reason = dma_frcd_fault_reason(data);
873 type = dma_frcd_type(data);
875 data = readl(iommu->reg + reg +
876 fault_index * PRIMARY_FAULT_REG_LEN + 8);
877 source_id = dma_frcd_source_id(data);
879 guest_addr = dmar_readq(iommu->reg + reg +
880 fault_index * PRIMARY_FAULT_REG_LEN);
881 guest_addr = dma_frcd_page_addr(guest_addr);
882 /* clear the fault */
883 writel(DMA_FRCD_F, iommu->reg + reg +
884 fault_index * PRIMARY_FAULT_REG_LEN + 12);
886 spin_unlock_irqrestore(&iommu->register_lock, flag);
888 iommu_page_fault_do_one(iommu, type, fault_reason,
889 source_id, guest_addr);
891 fault_index++;
892 if (fault_index > cap_num_fault_regs(iommu->cap))
893 fault_index = 0;
894 spin_lock_irqsave(&iommu->register_lock, flag);
896 clear_overflow:
897 /* clear primary fault overflow */
898 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
899 if (fault_status & DMA_FSTS_PFO)
900 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
902 spin_unlock_irqrestore(&iommu->register_lock, flag);
903 return IRQ_HANDLED;
906 int dmar_set_interrupt(struct intel_iommu *iommu)
908 int irq, ret;
910 irq = create_irq();
911 if (!irq) {
912 printk(KERN_ERR "IOMMU: no free vectors\n");
913 return -EINVAL;
916 set_irq_data(irq, iommu);
917 iommu->irq = irq;
919 ret = arch_setup_dmar_msi(irq);
920 if (ret) {
921 set_irq_data(irq, NULL);
922 iommu->irq = 0;
923 destroy_irq(irq);
924 return 0;
927 /* Force fault register is cleared */
928 iommu_page_fault(irq, iommu);
930 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
931 if (ret)
932 printk(KERN_ERR "IOMMU: can't request irq\n");
933 return ret;
936 static int iommu_init_domains(struct intel_iommu *iommu)
938 unsigned long ndomains;
939 unsigned long nlongs;
941 ndomains = cap_ndoms(iommu->cap);
942 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
943 nlongs = BITS_TO_LONGS(ndomains);
945 /* TBD: there might be 64K domains,
946 * consider other allocation for future chip
948 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
949 if (!iommu->domain_ids) {
950 printk(KERN_ERR "Allocating domain id array failed\n");
951 return -ENOMEM;
953 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
954 GFP_KERNEL);
955 if (!iommu->domains) {
956 printk(KERN_ERR "Allocating domain array failed\n");
957 kfree(iommu->domain_ids);
958 return -ENOMEM;
962 * if Caching mode is set, then invalid translations are tagged
963 * with domainid 0. Hence we need to pre-allocate it.
965 if (cap_caching_mode(iommu->cap))
966 set_bit(0, iommu->domain_ids);
967 return 0;
970 static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
972 struct intel_iommu *iommu;
973 int ret;
974 int map_size;
975 u32 ver;
977 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
978 if (!iommu)
979 return NULL;
980 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
981 if (!iommu->reg) {
982 printk(KERN_ERR "IOMMU: can't map the region\n");
983 goto error;
985 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
986 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
988 /* the registers might be more than one page */
989 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
990 cap_max_fault_reg_offset(iommu->cap));
991 map_size = PAGE_ALIGN_4K(map_size);
992 if (map_size > PAGE_SIZE_4K) {
993 iounmap(iommu->reg);
994 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
995 if (!iommu->reg) {
996 printk(KERN_ERR "IOMMU: can't map the region\n");
997 goto error;
1001 ver = readl(iommu->reg + DMAR_VER_REG);
1002 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1003 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1004 iommu->cap, iommu->ecap);
1005 ret = iommu_init_domains(iommu);
1006 if (ret)
1007 goto error_unmap;
1008 spin_lock_init(&iommu->lock);
1009 spin_lock_init(&iommu->register_lock);
1011 drhd->iommu = iommu;
1012 return iommu;
1013 error_unmap:
1014 iounmap(iommu->reg);
1015 error:
1016 kfree(iommu);
1017 return NULL;
1020 static void domain_exit(struct dmar_domain *domain);
1021 static void free_iommu(struct intel_iommu *iommu)
1023 struct dmar_domain *domain;
1024 int i;
1026 if (!iommu)
1027 return;
1029 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1030 for (; i < cap_ndoms(iommu->cap); ) {
1031 domain = iommu->domains[i];
1032 clear_bit(i, iommu->domain_ids);
1033 domain_exit(domain);
1034 i = find_next_bit(iommu->domain_ids,
1035 cap_ndoms(iommu->cap), i+1);
1038 if (iommu->gcmd & DMA_GCMD_TE)
1039 iommu_disable_translation(iommu);
1041 if (iommu->irq) {
1042 set_irq_data(iommu->irq, NULL);
1043 /* This will mask the irq */
1044 free_irq(iommu->irq, iommu);
1045 destroy_irq(iommu->irq);
1048 kfree(iommu->domains);
1049 kfree(iommu->domain_ids);
1051 /* free context mapping */
1052 free_context_table(iommu);
1054 if (iommu->reg)
1055 iounmap(iommu->reg);
1056 kfree(iommu);
1059 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1061 unsigned long num;
1062 unsigned long ndomains;
1063 struct dmar_domain *domain;
1064 unsigned long flags;
1066 domain = alloc_domain_mem();
1067 if (!domain)
1068 return NULL;
1070 ndomains = cap_ndoms(iommu->cap);
1072 spin_lock_irqsave(&iommu->lock, flags);
1073 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1074 if (num >= ndomains) {
1075 spin_unlock_irqrestore(&iommu->lock, flags);
1076 free_domain_mem(domain);
1077 printk(KERN_ERR "IOMMU: no free domain ids\n");
1078 return NULL;
1081 set_bit(num, iommu->domain_ids);
1082 domain->id = num;
1083 domain->iommu = iommu;
1084 iommu->domains[num] = domain;
1085 spin_unlock_irqrestore(&iommu->lock, flags);
1087 return domain;
1090 static void iommu_free_domain(struct dmar_domain *domain)
1092 unsigned long flags;
1094 spin_lock_irqsave(&domain->iommu->lock, flags);
1095 clear_bit(domain->id, domain->iommu->domain_ids);
1096 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1099 static struct iova_domain reserved_iova_list;
1100 static struct lock_class_key reserved_alloc_key;
1101 static struct lock_class_key reserved_rbtree_key;
1103 static void dmar_init_reserved_ranges(void)
1105 struct pci_dev *pdev = NULL;
1106 struct iova *iova;
1107 int i;
1108 u64 addr, size;
1110 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1112 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1113 &reserved_alloc_key);
1114 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1115 &reserved_rbtree_key);
1117 /* IOAPIC ranges shouldn't be accessed by DMA */
1118 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1119 IOVA_PFN(IOAPIC_RANGE_END));
1120 if (!iova)
1121 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1123 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1124 for_each_pci_dev(pdev) {
1125 struct resource *r;
1127 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1128 r = &pdev->resource[i];
1129 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1130 continue;
1131 addr = r->start;
1132 addr &= PAGE_MASK_4K;
1133 size = r->end - addr;
1134 size = PAGE_ALIGN_4K(size);
1135 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1136 IOVA_PFN(size + addr) - 1);
1137 if (!iova)
1138 printk(KERN_ERR "Reserve iova failed\n");
1144 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1146 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1149 static inline int guestwidth_to_adjustwidth(int gaw)
1151 int agaw;
1152 int r = (gaw - 12) % 9;
1154 if (r == 0)
1155 agaw = gaw;
1156 else
1157 agaw = gaw + 9 - r;
1158 if (agaw > 64)
1159 agaw = 64;
1160 return agaw;
1163 static int domain_init(struct dmar_domain *domain, int guest_width)
1165 struct intel_iommu *iommu;
1166 int adjust_width, agaw;
1167 unsigned long sagaw;
1169 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1170 spin_lock_init(&domain->mapping_lock);
1172 domain_reserve_special_ranges(domain);
1174 /* calculate AGAW */
1175 iommu = domain->iommu;
1176 if (guest_width > cap_mgaw(iommu->cap))
1177 guest_width = cap_mgaw(iommu->cap);
1178 domain->gaw = guest_width;
1179 adjust_width = guestwidth_to_adjustwidth(guest_width);
1180 agaw = width_to_agaw(adjust_width);
1181 sagaw = cap_sagaw(iommu->cap);
1182 if (!test_bit(agaw, &sagaw)) {
1183 /* hardware doesn't support it, choose a bigger one */
1184 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1185 agaw = find_next_bit(&sagaw, 5, agaw);
1186 if (agaw >= 5)
1187 return -ENODEV;
1189 domain->agaw = agaw;
1190 INIT_LIST_HEAD(&domain->devices);
1192 /* always allocate the top pgd */
1193 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1194 if (!domain->pgd)
1195 return -ENOMEM;
1196 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1197 return 0;
1200 static void domain_exit(struct dmar_domain *domain)
1202 u64 end;
1204 /* Domain 0 is reserved, so dont process it */
1205 if (!domain)
1206 return;
1208 domain_remove_dev_info(domain);
1209 /* destroy iovas */
1210 put_iova_domain(&domain->iovad);
1211 end = DOMAIN_MAX_ADDR(domain->gaw);
1212 end = end & (~PAGE_MASK_4K);
1214 /* clear ptes */
1215 dma_pte_clear_range(domain, 0, end);
1217 /* free page tables */
1218 dma_pte_free_pagetable(domain, 0, end);
1220 iommu_free_domain(domain);
1221 free_domain_mem(domain);
1224 static int domain_context_mapping_one(struct dmar_domain *domain,
1225 u8 bus, u8 devfn)
1227 struct context_entry *context;
1228 struct intel_iommu *iommu = domain->iommu;
1229 unsigned long flags;
1231 pr_debug("Set context mapping for %02x:%02x.%d\n",
1232 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1233 BUG_ON(!domain->pgd);
1234 context = device_to_context_entry(iommu, bus, devfn);
1235 if (!context)
1236 return -ENOMEM;
1237 spin_lock_irqsave(&iommu->lock, flags);
1238 if (context_present(*context)) {
1239 spin_unlock_irqrestore(&iommu->lock, flags);
1240 return 0;
1243 context_set_domain_id(*context, domain->id);
1244 context_set_address_width(*context, domain->agaw);
1245 context_set_address_root(*context, virt_to_phys(domain->pgd));
1246 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1247 context_set_fault_enable(*context);
1248 context_set_present(*context);
1249 __iommu_flush_cache(iommu, context, sizeof(*context));
1251 /* it's a non-present to present mapping */
1252 if (iommu_flush_context_device(iommu, domain->id,
1253 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1254 iommu_flush_write_buffer(iommu);
1255 else
1256 iommu_flush_iotlb_dsi(iommu, 0, 0);
1257 spin_unlock_irqrestore(&iommu->lock, flags);
1258 return 0;
1261 static int
1262 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1264 int ret;
1265 struct pci_dev *tmp, *parent;
1267 ret = domain_context_mapping_one(domain, pdev->bus->number,
1268 pdev->devfn);
1269 if (ret)
1270 return ret;
1272 /* dependent device mapping */
1273 tmp = pci_find_upstream_pcie_bridge(pdev);
1274 if (!tmp)
1275 return 0;
1276 /* Secondary interface's bus number and devfn 0 */
1277 parent = pdev->bus->self;
1278 while (parent != tmp) {
1279 ret = domain_context_mapping_one(domain, parent->bus->number,
1280 parent->devfn);
1281 if (ret)
1282 return ret;
1283 parent = parent->bus->self;
1285 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1286 return domain_context_mapping_one(domain,
1287 tmp->subordinate->number, 0);
1288 else /* this is a legacy PCI bridge */
1289 return domain_context_mapping_one(domain,
1290 tmp->bus->number, tmp->devfn);
1293 static int domain_context_mapped(struct dmar_domain *domain,
1294 struct pci_dev *pdev)
1296 int ret;
1297 struct pci_dev *tmp, *parent;
1299 ret = device_context_mapped(domain->iommu,
1300 pdev->bus->number, pdev->devfn);
1301 if (!ret)
1302 return ret;
1303 /* dependent device mapping */
1304 tmp = pci_find_upstream_pcie_bridge(pdev);
1305 if (!tmp)
1306 return ret;
1307 /* Secondary interface's bus number and devfn 0 */
1308 parent = pdev->bus->self;
1309 while (parent != tmp) {
1310 ret = device_context_mapped(domain->iommu, parent->bus->number,
1311 parent->devfn);
1312 if (!ret)
1313 return ret;
1314 parent = parent->bus->self;
1316 if (tmp->is_pcie)
1317 return device_context_mapped(domain->iommu,
1318 tmp->subordinate->number, 0);
1319 else
1320 return device_context_mapped(domain->iommu,
1321 tmp->bus->number, tmp->devfn);
1324 static int
1325 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1326 u64 hpa, size_t size, int prot)
1328 u64 start_pfn, end_pfn;
1329 struct dma_pte *pte;
1330 int index;
1332 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1333 return -EINVAL;
1334 iova &= PAGE_MASK_4K;
1335 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1336 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1337 index = 0;
1338 while (start_pfn < end_pfn) {
1339 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1340 if (!pte)
1341 return -ENOMEM;
1342 /* We don't need lock here, nobody else
1343 * touches the iova range
1345 BUG_ON(dma_pte_addr(*pte));
1346 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1347 dma_set_pte_prot(*pte, prot);
1348 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1349 start_pfn++;
1350 index++;
1352 return 0;
1355 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1357 clear_context_table(domain->iommu, bus, devfn);
1358 iommu_flush_context_global(domain->iommu, 0);
1359 iommu_flush_iotlb_global(domain->iommu, 0);
1362 static void domain_remove_dev_info(struct dmar_domain *domain)
1364 struct device_domain_info *info;
1365 unsigned long flags;
1367 spin_lock_irqsave(&device_domain_lock, flags);
1368 while (!list_empty(&domain->devices)) {
1369 info = list_entry(domain->devices.next,
1370 struct device_domain_info, link);
1371 list_del(&info->link);
1372 list_del(&info->global);
1373 if (info->dev)
1374 info->dev->dev.archdata.iommu = NULL;
1375 spin_unlock_irqrestore(&device_domain_lock, flags);
1377 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1378 free_devinfo_mem(info);
1380 spin_lock_irqsave(&device_domain_lock, flags);
1382 spin_unlock_irqrestore(&device_domain_lock, flags);
1386 * find_domain
1387 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1389 struct dmar_domain *
1390 find_domain(struct pci_dev *pdev)
1392 struct device_domain_info *info;
1394 /* No lock here, assumes no domain exit in normal case */
1395 info = pdev->dev.archdata.iommu;
1396 if (info)
1397 return info->domain;
1398 return NULL;
1401 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1402 struct pci_dev *dev)
1404 int index;
1406 while (dev) {
1407 for (index = 0; index < cnt; index ++)
1408 if (dev == devices[index])
1409 return 1;
1411 /* Check our parent */
1412 dev = dev->bus->self;
1415 return 0;
1418 static struct dmar_drhd_unit *
1419 dmar_find_matched_drhd_unit(struct pci_dev *dev)
1421 struct dmar_drhd_unit *drhd = NULL;
1423 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1424 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1425 drhd->devices_cnt, dev))
1426 return drhd;
1429 return NULL;
1432 /* domain is initialized */
1433 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1435 struct dmar_domain *domain, *found = NULL;
1436 struct intel_iommu *iommu;
1437 struct dmar_drhd_unit *drhd;
1438 struct device_domain_info *info, *tmp;
1439 struct pci_dev *dev_tmp;
1440 unsigned long flags;
1441 int bus = 0, devfn = 0;
1443 domain = find_domain(pdev);
1444 if (domain)
1445 return domain;
1447 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1448 if (dev_tmp) {
1449 if (dev_tmp->is_pcie) {
1450 bus = dev_tmp->subordinate->number;
1451 devfn = 0;
1452 } else {
1453 bus = dev_tmp->bus->number;
1454 devfn = dev_tmp->devfn;
1456 spin_lock_irqsave(&device_domain_lock, flags);
1457 list_for_each_entry(info, &device_domain_list, global) {
1458 if (info->bus == bus && info->devfn == devfn) {
1459 found = info->domain;
1460 break;
1463 spin_unlock_irqrestore(&device_domain_lock, flags);
1464 /* pcie-pci bridge already has a domain, uses it */
1465 if (found) {
1466 domain = found;
1467 goto found_domain;
1471 /* Allocate new domain for the device */
1472 drhd = dmar_find_matched_drhd_unit(pdev);
1473 if (!drhd) {
1474 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1475 pci_name(pdev));
1476 return NULL;
1478 iommu = drhd->iommu;
1480 domain = iommu_alloc_domain(iommu);
1481 if (!domain)
1482 goto error;
1484 if (domain_init(domain, gaw)) {
1485 domain_exit(domain);
1486 goto error;
1489 /* register pcie-to-pci device */
1490 if (dev_tmp) {
1491 info = alloc_devinfo_mem();
1492 if (!info) {
1493 domain_exit(domain);
1494 goto error;
1496 info->bus = bus;
1497 info->devfn = devfn;
1498 info->dev = NULL;
1499 info->domain = domain;
1500 /* This domain is shared by devices under p2p bridge */
1501 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1503 /* pcie-to-pci bridge already has a domain, uses it */
1504 found = NULL;
1505 spin_lock_irqsave(&device_domain_lock, flags);
1506 list_for_each_entry(tmp, &device_domain_list, global) {
1507 if (tmp->bus == bus && tmp->devfn == devfn) {
1508 found = tmp->domain;
1509 break;
1512 if (found) {
1513 free_devinfo_mem(info);
1514 domain_exit(domain);
1515 domain = found;
1516 } else {
1517 list_add(&info->link, &domain->devices);
1518 list_add(&info->global, &device_domain_list);
1520 spin_unlock_irqrestore(&device_domain_lock, flags);
1523 found_domain:
1524 info = alloc_devinfo_mem();
1525 if (!info)
1526 goto error;
1527 info->bus = pdev->bus->number;
1528 info->devfn = pdev->devfn;
1529 info->dev = pdev;
1530 info->domain = domain;
1531 spin_lock_irqsave(&device_domain_lock, flags);
1532 /* somebody is fast */
1533 found = find_domain(pdev);
1534 if (found != NULL) {
1535 spin_unlock_irqrestore(&device_domain_lock, flags);
1536 if (found != domain) {
1537 domain_exit(domain);
1538 domain = found;
1540 free_devinfo_mem(info);
1541 return domain;
1543 list_add(&info->link, &domain->devices);
1544 list_add(&info->global, &device_domain_list);
1545 pdev->dev.archdata.iommu = info;
1546 spin_unlock_irqrestore(&device_domain_lock, flags);
1547 return domain;
1548 error:
1549 /* recheck it here, maybe others set it */
1550 return find_domain(pdev);
1553 static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1555 struct dmar_domain *domain;
1556 unsigned long size;
1557 u64 base;
1558 int ret;
1560 printk(KERN_INFO
1561 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1562 pci_name(pdev), start, end);
1563 /* page table init */
1564 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1565 if (!domain)
1566 return -ENOMEM;
1568 /* The address might not be aligned */
1569 base = start & PAGE_MASK_4K;
1570 size = end - base;
1571 size = PAGE_ALIGN_4K(size);
1572 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1573 IOVA_PFN(base + size) - 1)) {
1574 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1575 ret = -ENOMEM;
1576 goto error;
1579 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1580 size, base, pci_name(pdev));
1582 * RMRR range might have overlap with physical memory range,
1583 * clear it first
1585 dma_pte_clear_range(domain, base, base + size);
1587 ret = domain_page_mapping(domain, base, base, size,
1588 DMA_PTE_READ|DMA_PTE_WRITE);
1589 if (ret)
1590 goto error;
1592 /* context entry init */
1593 ret = domain_context_mapping(domain, pdev);
1594 if (!ret)
1595 return 0;
1596 error:
1597 domain_exit(domain);
1598 return ret;
1602 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1603 struct pci_dev *pdev)
1605 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1606 return 0;
1607 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1608 rmrr->end_address + 1);
1611 #ifdef CONFIG_DMAR_GFX_WA
1612 extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1613 static void __init iommu_prepare_gfx_mapping(void)
1615 struct pci_dev *pdev = NULL;
1616 u64 base, size;
1617 int slot;
1618 int ret;
1620 for_each_pci_dev(pdev) {
1621 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1622 !IS_GFX_DEVICE(pdev))
1623 continue;
1624 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1625 pci_name(pdev));
1626 slot = arch_get_ram_range(0, &base, &size);
1627 while (slot >= 0) {
1628 ret = iommu_prepare_identity_map(pdev,
1629 base, base + size);
1630 if (ret)
1631 goto error;
1632 slot = arch_get_ram_range(slot, &base, &size);
1634 continue;
1635 error:
1636 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1639 #endif
1641 #ifdef CONFIG_DMAR_FLOPPY_WA
1642 static inline void iommu_prepare_isa(void)
1644 struct pci_dev *pdev;
1645 int ret;
1647 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1648 if (!pdev)
1649 return;
1651 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1652 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1654 if (ret)
1655 printk("IOMMU: Failed to create 0-64M identity map, "
1656 "floppy might not work\n");
1659 #else
1660 static inline void iommu_prepare_isa(void)
1662 return;
1664 #endif /* !CONFIG_DMAR_FLPY_WA */
1666 int __init init_dmars(void)
1668 struct dmar_drhd_unit *drhd;
1669 struct dmar_rmrr_unit *rmrr;
1670 struct pci_dev *pdev;
1671 struct intel_iommu *iommu;
1672 int ret, unit = 0;
1675 * for each drhd
1676 * allocate root
1677 * initialize and program root entry to not present
1678 * endfor
1680 for_each_drhd_unit(drhd) {
1681 if (drhd->ignored)
1682 continue;
1683 iommu = alloc_iommu(drhd);
1684 if (!iommu) {
1685 ret = -ENOMEM;
1686 goto error;
1690 * TBD:
1691 * we could share the same root & context tables
1692 * amoung all IOMMU's. Need to Split it later.
1694 ret = iommu_alloc_root_entry(iommu);
1695 if (ret) {
1696 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1697 goto error;
1702 * For each rmrr
1703 * for each dev attached to rmrr
1704 * do
1705 * locate drhd for dev, alloc domain for dev
1706 * allocate free domain
1707 * allocate page table entries for rmrr
1708 * if context not allocated for bus
1709 * allocate and init context
1710 * set present in root table for this bus
1711 * init context with domain, translation etc
1712 * endfor
1713 * endfor
1715 for_each_rmrr_units(rmrr) {
1716 int i;
1717 for (i = 0; i < rmrr->devices_cnt; i++) {
1718 pdev = rmrr->devices[i];
1719 /* some BIOS lists non-exist devices in DMAR table */
1720 if (!pdev)
1721 continue;
1722 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1723 if (ret)
1724 printk(KERN_ERR
1725 "IOMMU: mapping reserved region failed\n");
1729 iommu_prepare_gfx_mapping();
1731 iommu_prepare_isa();
1734 * for each drhd
1735 * enable fault log
1736 * global invalidate context cache
1737 * global invalidate iotlb
1738 * enable translation
1740 for_each_drhd_unit(drhd) {
1741 if (drhd->ignored)
1742 continue;
1743 iommu = drhd->iommu;
1744 sprintf (iommu->name, "dmar%d", unit++);
1746 iommu_flush_write_buffer(iommu);
1748 ret = dmar_set_interrupt(iommu);
1749 if (ret)
1750 goto error;
1752 iommu_set_root_entry(iommu);
1754 iommu_flush_context_global(iommu, 0);
1755 iommu_flush_iotlb_global(iommu, 0);
1757 iommu_disable_protect_mem_regions(iommu);
1759 ret = iommu_enable_translation(iommu);
1760 if (ret)
1761 goto error;
1764 return 0;
1765 error:
1766 for_each_drhd_unit(drhd) {
1767 if (drhd->ignored)
1768 continue;
1769 iommu = drhd->iommu;
1770 free_iommu(iommu);
1772 return ret;
1775 static inline u64 aligned_size(u64 host_addr, size_t size)
1777 u64 addr;
1778 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1779 return PAGE_ALIGN_4K(addr);
1782 struct iova *
1783 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1785 struct iova *piova;
1787 /* Make sure it's in range */
1788 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1789 if (!size || (IOVA_START_ADDR + size > end))
1790 return NULL;
1792 piova = alloc_iova(&domain->iovad,
1793 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1794 return piova;
1797 static struct iova *
1798 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1799 size_t size)
1801 struct pci_dev *pdev = to_pci_dev(dev);
1802 struct iova *iova = NULL;
1804 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1805 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1806 } else {
1808 * First try to allocate an io virtual address in
1809 * DMA_32BIT_MASK and if that fails then try allocating
1810 * from higher range
1812 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1813 if (!iova)
1814 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1817 if (!iova) {
1818 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1819 return NULL;
1822 return iova;
1825 static struct dmar_domain *
1826 get_valid_domain_for_dev(struct pci_dev *pdev)
1828 struct dmar_domain *domain;
1829 int ret;
1831 domain = get_domain_for_dev(pdev,
1832 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1833 if (!domain) {
1834 printk(KERN_ERR
1835 "Allocating domain for %s failed", pci_name(pdev));
1836 return NULL;
1839 /* make sure context mapping is ok */
1840 if (unlikely(!domain_context_mapped(domain, pdev))) {
1841 ret = domain_context_mapping(domain, pdev);
1842 if (ret) {
1843 printk(KERN_ERR
1844 "Domain context map for %s failed",
1845 pci_name(pdev));
1846 return NULL;
1850 return domain;
1853 static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1854 size_t size, int dir)
1856 struct pci_dev *pdev = to_pci_dev(hwdev);
1857 int ret;
1858 struct dmar_domain *domain;
1859 unsigned long start_addr;
1860 struct iova *iova;
1861 int prot = 0;
1863 BUG_ON(dir == DMA_NONE);
1864 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1865 return virt_to_bus(addr);
1867 domain = get_valid_domain_for_dev(pdev);
1868 if (!domain)
1869 return 0;
1871 addr = (void *)virt_to_phys(addr);
1872 size = aligned_size((u64)addr, size);
1874 iova = __intel_alloc_iova(hwdev, domain, size);
1875 if (!iova)
1876 goto error;
1878 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1881 * Check if DMAR supports zero-length reads on write only
1882 * mappings..
1884 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1885 !cap_zlr(domain->iommu->cap))
1886 prot |= DMA_PTE_READ;
1887 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1888 prot |= DMA_PTE_WRITE;
1890 * addr - (addr + size) might be partial page, we should map the whole
1891 * page. Note: if two part of one page are separately mapped, we
1892 * might have two guest_addr mapping to the same host addr, but this
1893 * is not a big problem
1895 ret = domain_page_mapping(domain, start_addr,
1896 ((u64)addr) & PAGE_MASK_4K, size, prot);
1897 if (ret)
1898 goto error;
1900 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1901 pci_name(pdev), size, (u64)addr,
1902 size, (u64)start_addr, dir);
1904 /* it's a non-present to present mapping */
1905 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1906 start_addr, size >> PAGE_SHIFT_4K, 1);
1907 if (ret)
1908 iommu_flush_write_buffer(domain->iommu);
1910 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1912 error:
1913 if (iova)
1914 __free_iova(&domain->iovad, iova);
1915 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1916 pci_name(pdev), size, (u64)addr, dir);
1917 return 0;
1920 static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1921 size_t size, int dir)
1923 struct pci_dev *pdev = to_pci_dev(dev);
1924 struct dmar_domain *domain;
1925 unsigned long start_addr;
1926 struct iova *iova;
1928 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1929 return;
1930 domain = find_domain(pdev);
1931 BUG_ON(!domain);
1933 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1934 if (!iova)
1935 return;
1937 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1938 size = aligned_size((u64)dev_addr, size);
1940 pr_debug("Device %s unmapping: %lx@%llx\n",
1941 pci_name(pdev), size, (u64)start_addr);
1943 /* clear the whole page */
1944 dma_pte_clear_range(domain, start_addr, start_addr + size);
1945 /* free page tables */
1946 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1948 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1949 size >> PAGE_SHIFT_4K, 0))
1950 iommu_flush_write_buffer(domain->iommu);
1952 /* free iova */
1953 __free_iova(&domain->iovad, iova);
1956 static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1957 dma_addr_t *dma_handle, gfp_t flags)
1959 void *vaddr;
1960 int order;
1962 size = PAGE_ALIGN_4K(size);
1963 order = get_order(size);
1964 flags &= ~(GFP_DMA | GFP_DMA32);
1966 vaddr = (void *)__get_free_pages(flags, order);
1967 if (!vaddr)
1968 return NULL;
1969 memset(vaddr, 0, size);
1971 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1972 if (*dma_handle)
1973 return vaddr;
1974 free_pages((unsigned long)vaddr, order);
1975 return NULL;
1978 static void intel_free_coherent(struct device *hwdev, size_t size,
1979 void *vaddr, dma_addr_t dma_handle)
1981 int order;
1983 size = PAGE_ALIGN_4K(size);
1984 order = get_order(size);
1986 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1987 free_pages((unsigned long)vaddr, order);
1990 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1991 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
1992 int nelems, int dir)
1994 int i;
1995 struct pci_dev *pdev = to_pci_dev(hwdev);
1996 struct dmar_domain *domain;
1997 unsigned long start_addr;
1998 struct iova *iova;
1999 size_t size = 0;
2000 void *addr;
2001 struct scatterlist *sg;
2003 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2004 return;
2006 domain = find_domain(pdev);
2008 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2009 if (!iova)
2010 return;
2011 for_each_sg(sglist, sg, nelems, i) {
2012 addr = SG_ENT_VIRT_ADDRESS(sg);
2013 size += aligned_size((u64)addr, sg->length);
2016 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2018 /* clear the whole page */
2019 dma_pte_clear_range(domain, start_addr, start_addr + size);
2020 /* free page tables */
2021 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2023 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2024 size >> PAGE_SHIFT_4K, 0))
2025 iommu_flush_write_buffer(domain->iommu);
2027 /* free iova */
2028 __free_iova(&domain->iovad, iova);
2031 static int intel_nontranslate_map_sg(struct device *hddev,
2032 struct scatterlist *sglist, int nelems, int dir)
2034 int i;
2035 struct scatterlist *sg;
2037 for_each_sg(sglist, sg, nelems, i) {
2038 BUG_ON(!sg_page(sg));
2039 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2040 sg->dma_length = sg->length;
2042 return nelems;
2045 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2046 int nelems, int dir)
2048 void *addr;
2049 int i;
2050 struct pci_dev *pdev = to_pci_dev(hwdev);
2051 struct dmar_domain *domain;
2052 size_t size = 0;
2053 int prot = 0;
2054 size_t offset = 0;
2055 struct iova *iova = NULL;
2056 int ret;
2057 struct scatterlist *sg;
2058 unsigned long start_addr;
2060 BUG_ON(dir == DMA_NONE);
2061 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2062 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2064 domain = get_valid_domain_for_dev(pdev);
2065 if (!domain)
2066 return 0;
2068 for_each_sg(sglist, sg, nelems, i) {
2069 addr = SG_ENT_VIRT_ADDRESS(sg);
2070 addr = (void *)virt_to_phys(addr);
2071 size += aligned_size((u64)addr, sg->length);
2074 iova = __intel_alloc_iova(hwdev, domain, size);
2075 if (!iova) {
2076 sglist->dma_length = 0;
2077 return 0;
2081 * Check if DMAR supports zero-length reads on write only
2082 * mappings..
2084 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2085 !cap_zlr(domain->iommu->cap))
2086 prot |= DMA_PTE_READ;
2087 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2088 prot |= DMA_PTE_WRITE;
2090 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2091 offset = 0;
2092 for_each_sg(sglist, sg, nelems, i) {
2093 addr = SG_ENT_VIRT_ADDRESS(sg);
2094 addr = (void *)virt_to_phys(addr);
2095 size = aligned_size((u64)addr, sg->length);
2096 ret = domain_page_mapping(domain, start_addr + offset,
2097 ((u64)addr) & PAGE_MASK_4K,
2098 size, prot);
2099 if (ret) {
2100 /* clear the page */
2101 dma_pte_clear_range(domain, start_addr,
2102 start_addr + offset);
2103 /* free page tables */
2104 dma_pte_free_pagetable(domain, start_addr,
2105 start_addr + offset);
2106 /* free iova */
2107 __free_iova(&domain->iovad, iova);
2108 return 0;
2110 sg->dma_address = start_addr + offset +
2111 ((u64)addr & (~PAGE_MASK_4K));
2112 sg->dma_length = sg->length;
2113 offset += size;
2116 /* it's a non-present to present mapping */
2117 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2118 start_addr, offset >> PAGE_SHIFT_4K, 1))
2119 iommu_flush_write_buffer(domain->iommu);
2120 return nelems;
2123 static struct dma_mapping_ops intel_dma_ops = {
2124 .alloc_coherent = intel_alloc_coherent,
2125 .free_coherent = intel_free_coherent,
2126 .map_single = intel_map_single,
2127 .unmap_single = intel_unmap_single,
2128 .map_sg = intel_map_sg,
2129 .unmap_sg = intel_unmap_sg,
2132 static inline int iommu_domain_cache_init(void)
2134 int ret = 0;
2136 iommu_domain_cache = kmem_cache_create("iommu_domain",
2137 sizeof(struct dmar_domain),
2139 SLAB_HWCACHE_ALIGN,
2141 NULL);
2142 if (!iommu_domain_cache) {
2143 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2144 ret = -ENOMEM;
2147 return ret;
2150 static inline int iommu_devinfo_cache_init(void)
2152 int ret = 0;
2154 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2155 sizeof(struct device_domain_info),
2157 SLAB_HWCACHE_ALIGN,
2159 NULL);
2160 if (!iommu_devinfo_cache) {
2161 printk(KERN_ERR "Couldn't create devinfo cache\n");
2162 ret = -ENOMEM;
2165 return ret;
2168 static inline int iommu_iova_cache_init(void)
2170 int ret = 0;
2172 iommu_iova_cache = kmem_cache_create("iommu_iova",
2173 sizeof(struct iova),
2175 SLAB_HWCACHE_ALIGN,
2177 NULL);
2178 if (!iommu_iova_cache) {
2179 printk(KERN_ERR "Couldn't create iova cache\n");
2180 ret = -ENOMEM;
2183 return ret;
2186 static int __init iommu_init_mempool(void)
2188 int ret;
2189 ret = iommu_iova_cache_init();
2190 if (ret)
2191 return ret;
2193 ret = iommu_domain_cache_init();
2194 if (ret)
2195 goto domain_error;
2197 ret = iommu_devinfo_cache_init();
2198 if (!ret)
2199 return ret;
2201 kmem_cache_destroy(iommu_domain_cache);
2202 domain_error:
2203 kmem_cache_destroy(iommu_iova_cache);
2205 return -ENOMEM;
2208 static void __init iommu_exit_mempool(void)
2210 kmem_cache_destroy(iommu_devinfo_cache);
2211 kmem_cache_destroy(iommu_domain_cache);
2212 kmem_cache_destroy(iommu_iova_cache);
2216 void __init detect_intel_iommu(void)
2218 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2219 return;
2220 if (early_dmar_detect()) {
2221 iommu_detected = 1;
2225 static void __init init_no_remapping_devices(void)
2227 struct dmar_drhd_unit *drhd;
2229 for_each_drhd_unit(drhd) {
2230 if (!drhd->include_all) {
2231 int i;
2232 for (i = 0; i < drhd->devices_cnt; i++)
2233 if (drhd->devices[i] != NULL)
2234 break;
2235 /* ignore DMAR unit if no pci devices exist */
2236 if (i == drhd->devices_cnt)
2237 drhd->ignored = 1;
2241 if (dmar_map_gfx)
2242 return;
2244 for_each_drhd_unit(drhd) {
2245 int i;
2246 if (drhd->ignored || drhd->include_all)
2247 continue;
2249 for (i = 0; i < drhd->devices_cnt; i++)
2250 if (drhd->devices[i] &&
2251 !IS_GFX_DEVICE(drhd->devices[i]))
2252 break;
2254 if (i < drhd->devices_cnt)
2255 continue;
2257 /* bypass IOMMU if it is just for gfx devices */
2258 drhd->ignored = 1;
2259 for (i = 0; i < drhd->devices_cnt; i++) {
2260 if (!drhd->devices[i])
2261 continue;
2262 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2267 int __init intel_iommu_init(void)
2269 int ret = 0;
2271 if (no_iommu || swiotlb || dmar_disabled)
2272 return -ENODEV;
2274 if (dmar_table_init())
2275 return -ENODEV;
2277 iommu_init_mempool();
2278 dmar_init_reserved_ranges();
2280 init_no_remapping_devices();
2282 ret = init_dmars();
2283 if (ret) {
2284 printk(KERN_ERR "IOMMU: dmar init failed\n");
2285 put_iova_domain(&reserved_iova_list);
2286 iommu_exit_mempool();
2287 return ret;
2289 printk(KERN_INFO
2290 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2292 force_iommu = 1;
2293 dma_ops = &intel_dma_ops;
2294 return 0;