Add linux-next specific files for 20110831
[linux-2.6/next.git] / drivers / iommu / dmar.c
blobf0db877d71e6043133a373f7776a27292f10676c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables.
26 * These routines are used by both DMA-remapping and Interrupt-remapping
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/slab.h>
39 #include <asm/iommu_table.h>
41 #define PREFIX "DMAR: "
43 /* No locks are needed as DMA remapping hardware unit
44 * list is constructed at boot time and hotplug of
45 * these units are not supported by the architecture.
47 LIST_HEAD(dmar_drhd_units);
49 static struct acpi_table_header * __initdata dmar_tbl;
50 static acpi_size dmar_tbl_size;
52 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
55 * add INCLUDE_ALL at the tail, so scan the list will find it at
56 * the very end.
58 if (drhd->include_all)
59 list_add_tail(&drhd->list, &dmar_drhd_units);
60 else
61 list_add(&drhd->list, &dmar_drhd_units);
64 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
65 struct pci_dev **dev, u16 segment)
67 struct pci_bus *bus;
68 struct pci_dev *pdev = NULL;
69 struct acpi_dmar_pci_path *path;
70 int count;
72 bus = pci_find_bus(segment, scope->bus);
73 path = (struct acpi_dmar_pci_path *)(scope + 1);
74 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
75 / sizeof(struct acpi_dmar_pci_path);
77 while (count) {
78 if (pdev)
79 pci_dev_put(pdev);
81 * Some BIOSes list non-exist devices in DMAR table, just
82 * ignore it
84 if (!bus) {
85 printk(KERN_WARNING
86 PREFIX "Device scope bus [%d] not found\n",
87 scope->bus);
88 break;
90 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
91 if (!pdev) {
92 printk(KERN_WARNING PREFIX
93 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
94 segment, bus->number, path->dev, path->fn);
95 break;
97 path ++;
98 count --;
99 bus = pdev->subordinate;
101 if (!pdev) {
102 printk(KERN_WARNING PREFIX
103 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
104 segment, scope->bus, path->dev, path->fn);
105 *dev = NULL;
106 return 0;
108 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
109 pdev->subordinate) || (scope->entry_type == \
110 ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
111 pci_dev_put(pdev);
112 printk(KERN_WARNING PREFIX
113 "Device scope type does not match for %s\n",
114 pci_name(pdev));
115 return -EINVAL;
117 *dev = pdev;
118 return 0;
121 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
122 struct pci_dev ***devices, u16 segment)
124 struct acpi_dmar_device_scope *scope;
125 void * tmp = start;
126 int index;
127 int ret;
129 *cnt = 0;
130 while (start < end) {
131 scope = start;
132 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
133 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
134 (*cnt)++;
135 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
136 printk(KERN_WARNING PREFIX
137 "Unsupported device scope\n");
139 start += scope->length;
141 if (*cnt == 0)
142 return 0;
144 *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
145 if (!*devices)
146 return -ENOMEM;
148 start = tmp;
149 index = 0;
150 while (start < end) {
151 scope = start;
152 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
153 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
154 ret = dmar_parse_one_dev_scope(scope,
155 &(*devices)[index], segment);
156 if (ret) {
157 kfree(*devices);
158 return ret;
160 index ++;
162 start += scope->length;
165 return 0;
169 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
170 * structure which uniquely represent one DMA remapping hardware unit
171 * present in the platform
173 static int __init
174 dmar_parse_one_drhd(struct acpi_dmar_header *header)
176 struct acpi_dmar_hardware_unit *drhd;
177 struct dmar_drhd_unit *dmaru;
178 int ret = 0;
180 drhd = (struct acpi_dmar_hardware_unit *)header;
181 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
182 if (!dmaru)
183 return -ENOMEM;
185 dmaru->hdr = header;
186 dmaru->reg_base_addr = drhd->address;
187 dmaru->segment = drhd->segment;
188 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
190 ret = alloc_iommu(dmaru);
191 if (ret) {
192 kfree(dmaru);
193 return ret;
195 dmar_register_drhd_unit(dmaru);
196 return 0;
199 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
201 struct acpi_dmar_hardware_unit *drhd;
202 int ret = 0;
204 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
206 if (dmaru->include_all)
207 return 0;
209 ret = dmar_parse_dev_scope((void *)(drhd + 1),
210 ((void *)drhd) + drhd->header.length,
211 &dmaru->devices_cnt, &dmaru->devices,
212 drhd->segment);
213 if (ret) {
214 list_del(&dmaru->list);
215 kfree(dmaru);
217 return ret;
220 #ifdef CONFIG_DMAR
221 LIST_HEAD(dmar_rmrr_units);
223 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
225 list_add(&rmrr->list, &dmar_rmrr_units);
229 static int __init
230 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
232 struct acpi_dmar_reserved_memory *rmrr;
233 struct dmar_rmrr_unit *rmrru;
235 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
236 if (!rmrru)
237 return -ENOMEM;
239 rmrru->hdr = header;
240 rmrr = (struct acpi_dmar_reserved_memory *)header;
241 rmrru->base_address = rmrr->base_address;
242 rmrru->end_address = rmrr->end_address;
244 dmar_register_rmrr_unit(rmrru);
245 return 0;
248 static int __init
249 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
251 struct acpi_dmar_reserved_memory *rmrr;
252 int ret;
254 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
255 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
256 ((void *)rmrr) + rmrr->header.length,
257 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
259 if (ret || (rmrru->devices_cnt == 0)) {
260 list_del(&rmrru->list);
261 kfree(rmrru);
263 return ret;
266 LIST_HEAD(dmar_atsr_units);
268 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
270 struct acpi_dmar_atsr *atsr;
271 struct dmar_atsr_unit *atsru;
273 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
274 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
275 if (!atsru)
276 return -ENOMEM;
278 atsru->hdr = hdr;
279 atsru->include_all = atsr->flags & 0x1;
280 atsru->segment = atsr->segment;
282 list_add(&atsru->list, &dmar_atsr_units);
284 return 0;
287 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
289 int rc;
290 struct acpi_dmar_atsr *atsr;
292 if (atsru->include_all)
293 return 0;
295 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
296 rc = dmar_parse_dev_scope((void *)(atsr + 1),
297 (void *)atsr + atsr->header.length,
298 &atsru->devices_cnt, &atsru->devices,
299 atsr->segment);
300 if (rc || !atsru->devices_cnt) {
301 list_del(&atsru->list);
302 kfree(atsru);
305 return rc;
308 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
310 int i;
311 struct pci_bus *bus;
312 struct dmar_atsr_unit *atsru;
314 dev = pci_physfn(dev);
316 list_for_each_entry(atsru, &dmar_atsr_units, list) {
317 if (atsru->segment == pci_domain_nr(dev->bus))
318 goto found;
321 return 0;
323 found:
324 for (bus = dev->bus; bus; bus = bus->parent) {
325 struct pci_dev *bridge = bus->self;
327 if (!bridge || !pci_is_pcie(bridge) ||
328 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
329 return 0;
331 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
332 for (i = 0; i < atsru->devices_cnt; i++)
333 if (atsru->devices[i] == bridge)
334 return 1;
335 break;
339 if (atsru->include_all)
340 return 1;
342 return 0;
344 #endif
346 #ifdef CONFIG_ACPI_NUMA
347 static int __init
348 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
350 struct acpi_dmar_rhsa *rhsa;
351 struct dmar_drhd_unit *drhd;
353 rhsa = (struct acpi_dmar_rhsa *)header;
354 for_each_drhd_unit(drhd) {
355 if (drhd->reg_base_addr == rhsa->base_address) {
356 int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
358 if (!node_online(node))
359 node = -1;
360 drhd->iommu->node = node;
361 return 0;
364 WARN_TAINT(
365 1, TAINT_FIRMWARE_WORKAROUND,
366 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
367 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
368 drhd->reg_base_addr,
369 dmi_get_system_info(DMI_BIOS_VENDOR),
370 dmi_get_system_info(DMI_BIOS_VERSION),
371 dmi_get_system_info(DMI_PRODUCT_VERSION));
373 return 0;
375 #endif
377 static void __init
378 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
380 struct acpi_dmar_hardware_unit *drhd;
381 struct acpi_dmar_reserved_memory *rmrr;
382 struct acpi_dmar_atsr *atsr;
383 struct acpi_dmar_rhsa *rhsa;
385 switch (header->type) {
386 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
387 drhd = container_of(header, struct acpi_dmar_hardware_unit,
388 header);
389 printk (KERN_INFO PREFIX
390 "DRHD base: %#016Lx flags: %#x\n",
391 (unsigned long long)drhd->address, drhd->flags);
392 break;
393 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
394 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
395 header);
396 printk (KERN_INFO PREFIX
397 "RMRR base: %#016Lx end: %#016Lx\n",
398 (unsigned long long)rmrr->base_address,
399 (unsigned long long)rmrr->end_address);
400 break;
401 case ACPI_DMAR_TYPE_ATSR:
402 atsr = container_of(header, struct acpi_dmar_atsr, header);
403 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
404 break;
405 case ACPI_DMAR_HARDWARE_AFFINITY:
406 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
407 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
408 (unsigned long long)rhsa->base_address,
409 rhsa->proximity_domain);
410 break;
415 * dmar_table_detect - checks to see if the platform supports DMAR devices
417 static int __init dmar_table_detect(void)
419 acpi_status status = AE_OK;
421 /* if we could find DMAR table, then there are DMAR devices */
422 status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
423 (struct acpi_table_header **)&dmar_tbl,
424 &dmar_tbl_size);
426 if (ACPI_SUCCESS(status) && !dmar_tbl) {
427 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
428 status = AE_NOT_FOUND;
431 return (ACPI_SUCCESS(status) ? 1 : 0);
435 * parse_dmar_table - parses the DMA reporting table
437 static int __init
438 parse_dmar_table(void)
440 struct acpi_table_dmar *dmar;
441 struct acpi_dmar_header *entry_header;
442 int ret = 0;
445 * Do it again, earlier dmar_tbl mapping could be mapped with
446 * fixed map.
448 dmar_table_detect();
451 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
452 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
454 dmar_tbl = tboot_get_dmar_table(dmar_tbl);
456 dmar = (struct acpi_table_dmar *)dmar_tbl;
457 if (!dmar)
458 return -ENODEV;
460 if (dmar->width < PAGE_SHIFT - 1) {
461 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
462 return -EINVAL;
465 printk (KERN_INFO PREFIX "Host address width %d\n",
466 dmar->width + 1);
468 entry_header = (struct acpi_dmar_header *)(dmar + 1);
469 while (((unsigned long)entry_header) <
470 (((unsigned long)dmar) + dmar_tbl->length)) {
471 /* Avoid looping forever on bad ACPI tables */
472 if (entry_header->length == 0) {
473 printk(KERN_WARNING PREFIX
474 "Invalid 0-length structure\n");
475 ret = -EINVAL;
476 break;
479 dmar_table_print_dmar_entry(entry_header);
481 switch (entry_header->type) {
482 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
483 ret = dmar_parse_one_drhd(entry_header);
484 break;
485 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
486 #ifdef CONFIG_DMAR
487 ret = dmar_parse_one_rmrr(entry_header);
488 #endif
489 break;
490 case ACPI_DMAR_TYPE_ATSR:
491 #ifdef CONFIG_DMAR
492 ret = dmar_parse_one_atsr(entry_header);
493 #endif
494 break;
495 case ACPI_DMAR_HARDWARE_AFFINITY:
496 #ifdef CONFIG_ACPI_NUMA
497 ret = dmar_parse_one_rhsa(entry_header);
498 #endif
499 break;
500 default:
501 printk(KERN_WARNING PREFIX
502 "Unknown DMAR structure type %d\n",
503 entry_header->type);
504 ret = 0; /* for forward compatibility */
505 break;
507 if (ret)
508 break;
510 entry_header = ((void *)entry_header + entry_header->length);
512 return ret;
515 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
516 struct pci_dev *dev)
518 int index;
520 while (dev) {
521 for (index = 0; index < cnt; index++)
522 if (dev == devices[index])
523 return 1;
525 /* Check our parent */
526 dev = dev->bus->self;
529 return 0;
532 struct dmar_drhd_unit *
533 dmar_find_matched_drhd_unit(struct pci_dev *dev)
535 struct dmar_drhd_unit *dmaru = NULL;
536 struct acpi_dmar_hardware_unit *drhd;
538 dev = pci_physfn(dev);
540 list_for_each_entry(dmaru, &dmar_drhd_units, list) {
541 drhd = container_of(dmaru->hdr,
542 struct acpi_dmar_hardware_unit,
543 header);
545 if (dmaru->include_all &&
546 drhd->segment == pci_domain_nr(dev->bus))
547 return dmaru;
549 if (dmar_pci_device_match(dmaru->devices,
550 dmaru->devices_cnt, dev))
551 return dmaru;
554 return NULL;
557 int __init dmar_dev_scope_init(void)
559 struct dmar_drhd_unit *drhd, *drhd_n;
560 int ret = -ENODEV;
562 list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
563 ret = dmar_parse_dev(drhd);
564 if (ret)
565 return ret;
568 #ifdef CONFIG_DMAR
570 struct dmar_rmrr_unit *rmrr, *rmrr_n;
571 struct dmar_atsr_unit *atsr, *atsr_n;
573 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
574 ret = rmrr_parse_dev(rmrr);
575 if (ret)
576 return ret;
579 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
580 ret = atsr_parse_dev(atsr);
581 if (ret)
582 return ret;
585 #endif
587 return ret;
591 int __init dmar_table_init(void)
593 static int dmar_table_initialized;
594 int ret;
596 if (dmar_table_initialized)
597 return 0;
599 dmar_table_initialized = 1;
601 ret = parse_dmar_table();
602 if (ret) {
603 if (ret != -ENODEV)
604 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
605 return ret;
608 if (list_empty(&dmar_drhd_units)) {
609 printk(KERN_INFO PREFIX "No DMAR devices found\n");
610 return -ENODEV;
613 #ifdef CONFIG_DMAR
614 if (list_empty(&dmar_rmrr_units))
615 printk(KERN_INFO PREFIX "No RMRR found\n");
617 if (list_empty(&dmar_atsr_units))
618 printk(KERN_INFO PREFIX "No ATSR found\n");
619 #endif
621 return 0;
624 static void warn_invalid_dmar(u64 addr, const char *message)
626 WARN_TAINT_ONCE(
627 1, TAINT_FIRMWARE_WORKAROUND,
628 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
629 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
630 addr, message,
631 dmi_get_system_info(DMI_BIOS_VENDOR),
632 dmi_get_system_info(DMI_BIOS_VERSION),
633 dmi_get_system_info(DMI_PRODUCT_VERSION));
636 int __init check_zero_address(void)
638 struct acpi_table_dmar *dmar;
639 struct acpi_dmar_header *entry_header;
640 struct acpi_dmar_hardware_unit *drhd;
642 dmar = (struct acpi_table_dmar *)dmar_tbl;
643 entry_header = (struct acpi_dmar_header *)(dmar + 1);
645 while (((unsigned long)entry_header) <
646 (((unsigned long)dmar) + dmar_tbl->length)) {
647 /* Avoid looping forever on bad ACPI tables */
648 if (entry_header->length == 0) {
649 printk(KERN_WARNING PREFIX
650 "Invalid 0-length structure\n");
651 return 0;
654 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
655 void __iomem *addr;
656 u64 cap, ecap;
658 drhd = (void *)entry_header;
659 if (!drhd->address) {
660 warn_invalid_dmar(0, "");
661 goto failed;
664 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
665 if (!addr ) {
666 printk("IOMMU: can't validate: %llx\n", drhd->address);
667 goto failed;
669 cap = dmar_readq(addr + DMAR_CAP_REG);
670 ecap = dmar_readq(addr + DMAR_ECAP_REG);
671 early_iounmap(addr, VTD_PAGE_SIZE);
672 if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
673 warn_invalid_dmar(drhd->address,
674 " returns all ones");
675 goto failed;
679 entry_header = ((void *)entry_header + entry_header->length);
681 return 1;
683 failed:
684 #ifdef CONFIG_DMAR
685 dmar_disabled = 1;
686 #endif
687 return 0;
690 int __init detect_intel_iommu(void)
692 int ret;
694 ret = dmar_table_detect();
695 if (ret)
696 ret = check_zero_address();
698 #ifdef CONFIG_INTR_REMAP
699 struct acpi_table_dmar *dmar;
701 dmar = (struct acpi_table_dmar *) dmar_tbl;
702 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
703 printk(KERN_INFO
704 "Queued invalidation will be enabled to support "
705 "x2apic and Intr-remapping.\n");
706 #endif
707 #ifdef CONFIG_DMAR
708 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
709 iommu_detected = 1;
710 /* Make sure ACS will be enabled */
711 pci_request_acs();
713 #endif
714 #ifdef CONFIG_X86
715 if (ret)
716 x86_init.iommu.iommu_init = intel_iommu_init;
717 #endif
719 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
720 dmar_tbl = NULL;
722 return ret ? 1 : -ENODEV;
726 int alloc_iommu(struct dmar_drhd_unit *drhd)
728 struct intel_iommu *iommu;
729 int map_size;
730 u32 ver;
731 static int iommu_allocated = 0;
732 int agaw = 0;
733 int msagaw = 0;
735 if (!drhd->reg_base_addr) {
736 warn_invalid_dmar(0, "");
737 return -EINVAL;
740 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
741 if (!iommu)
742 return -ENOMEM;
744 iommu->seq_id = iommu_allocated++;
745 sprintf (iommu->name, "dmar%d", iommu->seq_id);
747 iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
748 if (!iommu->reg) {
749 printk(KERN_ERR "IOMMU: can't map the region\n");
750 goto error;
752 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
753 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
755 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
756 warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
757 goto err_unmap;
760 #ifdef CONFIG_DMAR
761 agaw = iommu_calculate_agaw(iommu);
762 if (agaw < 0) {
763 printk(KERN_ERR
764 "Cannot get a valid agaw for iommu (seq_id = %d)\n",
765 iommu->seq_id);
766 goto err_unmap;
768 msagaw = iommu_calculate_max_sagaw(iommu);
769 if (msagaw < 0) {
770 printk(KERN_ERR
771 "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
772 iommu->seq_id);
773 goto err_unmap;
775 #endif
776 iommu->agaw = agaw;
777 iommu->msagaw = msagaw;
779 iommu->node = -1;
781 /* the registers might be more than one page */
782 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
783 cap_max_fault_reg_offset(iommu->cap));
784 map_size = VTD_PAGE_ALIGN(map_size);
785 if (map_size > VTD_PAGE_SIZE) {
786 iounmap(iommu->reg);
787 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
788 if (!iommu->reg) {
789 printk(KERN_ERR "IOMMU: can't map the region\n");
790 goto error;
794 ver = readl(iommu->reg + DMAR_VER_REG);
795 pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
796 iommu->seq_id,
797 (unsigned long long)drhd->reg_base_addr,
798 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
799 (unsigned long long)iommu->cap,
800 (unsigned long long)iommu->ecap);
802 spin_lock_init(&iommu->register_lock);
804 drhd->iommu = iommu;
805 return 0;
807 err_unmap:
808 iounmap(iommu->reg);
809 error:
810 kfree(iommu);
811 return -1;
814 void free_iommu(struct intel_iommu *iommu)
816 if (!iommu)
817 return;
819 #ifdef CONFIG_DMAR
820 free_dmar_iommu(iommu);
821 #endif
823 if (iommu->reg)
824 iounmap(iommu->reg);
825 kfree(iommu);
829 * Reclaim all the submitted descriptors which have completed its work.
831 static inline void reclaim_free_desc(struct q_inval *qi)
833 while (qi->desc_status[qi->free_tail] == QI_DONE ||
834 qi->desc_status[qi->free_tail] == QI_ABORT) {
835 qi->desc_status[qi->free_tail] = QI_FREE;
836 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
837 qi->free_cnt++;
841 static int qi_check_fault(struct intel_iommu *iommu, int index)
843 u32 fault;
844 int head, tail;
845 struct q_inval *qi = iommu->qi;
846 int wait_index = (index + 1) % QI_LENGTH;
848 if (qi->desc_status[wait_index] == QI_ABORT)
849 return -EAGAIN;
851 fault = readl(iommu->reg + DMAR_FSTS_REG);
854 * If IQE happens, the head points to the descriptor associated
855 * with the error. No new descriptors are fetched until the IQE
856 * is cleared.
858 if (fault & DMA_FSTS_IQE) {
859 head = readl(iommu->reg + DMAR_IQH_REG);
860 if ((head >> DMAR_IQ_SHIFT) == index) {
861 printk(KERN_ERR "VT-d detected invalid descriptor: "
862 "low=%llx, high=%llx\n",
863 (unsigned long long)qi->desc[index].low,
864 (unsigned long long)qi->desc[index].high);
865 memcpy(&qi->desc[index], &qi->desc[wait_index],
866 sizeof(struct qi_desc));
867 __iommu_flush_cache(iommu, &qi->desc[index],
868 sizeof(struct qi_desc));
869 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
870 return -EINVAL;
875 * If ITE happens, all pending wait_desc commands are aborted.
876 * No new descriptors are fetched until the ITE is cleared.
878 if (fault & DMA_FSTS_ITE) {
879 head = readl(iommu->reg + DMAR_IQH_REG);
880 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
881 head |= 1;
882 tail = readl(iommu->reg + DMAR_IQT_REG);
883 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
885 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
887 do {
888 if (qi->desc_status[head] == QI_IN_USE)
889 qi->desc_status[head] = QI_ABORT;
890 head = (head - 2 + QI_LENGTH) % QI_LENGTH;
891 } while (head != tail);
893 if (qi->desc_status[wait_index] == QI_ABORT)
894 return -EAGAIN;
897 if (fault & DMA_FSTS_ICE)
898 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
900 return 0;
904 * Submit the queued invalidation descriptor to the remapping
905 * hardware unit and wait for its completion.
907 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
909 int rc;
910 struct q_inval *qi = iommu->qi;
911 struct qi_desc *hw, wait_desc;
912 int wait_index, index;
913 unsigned long flags;
915 if (!qi)
916 return 0;
918 hw = qi->desc;
920 restart:
921 rc = 0;
923 spin_lock_irqsave(&qi->q_lock, flags);
924 while (qi->free_cnt < 3) {
925 spin_unlock_irqrestore(&qi->q_lock, flags);
926 cpu_relax();
927 spin_lock_irqsave(&qi->q_lock, flags);
930 index = qi->free_head;
931 wait_index = (index + 1) % QI_LENGTH;
933 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
935 hw[index] = *desc;
937 wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
938 QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
939 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
941 hw[wait_index] = wait_desc;
943 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
944 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
946 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
947 qi->free_cnt -= 2;
950 * update the HW tail register indicating the presence of
951 * new descriptors.
953 writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
955 while (qi->desc_status[wait_index] != QI_DONE) {
957 * We will leave the interrupts disabled, to prevent interrupt
958 * context to queue another cmd while a cmd is already submitted
959 * and waiting for completion on this cpu. This is to avoid
960 * a deadlock where the interrupt context can wait indefinitely
961 * for free slots in the queue.
963 rc = qi_check_fault(iommu, index);
964 if (rc)
965 break;
967 spin_unlock(&qi->q_lock);
968 cpu_relax();
969 spin_lock(&qi->q_lock);
972 qi->desc_status[index] = QI_DONE;
974 reclaim_free_desc(qi);
975 spin_unlock_irqrestore(&qi->q_lock, flags);
977 if (rc == -EAGAIN)
978 goto restart;
980 return rc;
984 * Flush the global interrupt entry cache.
986 void qi_global_iec(struct intel_iommu *iommu)
988 struct qi_desc desc;
990 desc.low = QI_IEC_TYPE;
991 desc.high = 0;
993 /* should never fail */
994 qi_submit_sync(&desc, iommu);
997 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
998 u64 type)
1000 struct qi_desc desc;
1002 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1003 | QI_CC_GRAN(type) | QI_CC_TYPE;
1004 desc.high = 0;
1006 qi_submit_sync(&desc, iommu);
1009 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1010 unsigned int size_order, u64 type)
1012 u8 dw = 0, dr = 0;
1014 struct qi_desc desc;
1015 int ih = 0;
1017 if (cap_write_drain(iommu->cap))
1018 dw = 1;
1020 if (cap_read_drain(iommu->cap))
1021 dr = 1;
1023 desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1024 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1025 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1026 | QI_IOTLB_AM(size_order);
1028 qi_submit_sync(&desc, iommu);
1031 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
1032 u64 addr, unsigned mask)
1034 struct qi_desc desc;
1036 if (mask) {
1037 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
1038 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1039 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1040 } else
1041 desc.high = QI_DEV_IOTLB_ADDR(addr);
1043 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1044 qdep = 0;
1046 desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1047 QI_DIOTLB_TYPE;
1049 qi_submit_sync(&desc, iommu);
1053 * Disable Queued Invalidation interface.
1055 void dmar_disable_qi(struct intel_iommu *iommu)
1057 unsigned long flags;
1058 u32 sts;
1059 cycles_t start_time = get_cycles();
1061 if (!ecap_qis(iommu->ecap))
1062 return;
1064 spin_lock_irqsave(&iommu->register_lock, flags);
1066 sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
1067 if (!(sts & DMA_GSTS_QIES))
1068 goto end;
1071 * Give a chance to HW to complete the pending invalidation requests.
1073 while ((readl(iommu->reg + DMAR_IQT_REG) !=
1074 readl(iommu->reg + DMAR_IQH_REG)) &&
1075 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1076 cpu_relax();
1078 iommu->gcmd &= ~DMA_GCMD_QIE;
1079 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1081 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1082 !(sts & DMA_GSTS_QIES), sts);
1083 end:
1084 spin_unlock_irqrestore(&iommu->register_lock, flags);
1088 * Enable queued invalidation.
1090 static void __dmar_enable_qi(struct intel_iommu *iommu)
1092 u32 sts;
1093 unsigned long flags;
1094 struct q_inval *qi = iommu->qi;
1096 qi->free_head = qi->free_tail = 0;
1097 qi->free_cnt = QI_LENGTH;
1099 spin_lock_irqsave(&iommu->register_lock, flags);
1101 /* write zero to the tail reg */
1102 writel(0, iommu->reg + DMAR_IQT_REG);
1104 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1106 iommu->gcmd |= DMA_GCMD_QIE;
1107 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1109 /* Make sure hardware complete it */
1110 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1112 spin_unlock_irqrestore(&iommu->register_lock, flags);
1116 * Enable Queued Invalidation interface. This is a must to support
1117 * interrupt-remapping. Also used by DMA-remapping, which replaces
1118 * register based IOTLB invalidation.
1120 int dmar_enable_qi(struct intel_iommu *iommu)
1122 struct q_inval *qi;
1123 struct page *desc_page;
1125 if (!ecap_qis(iommu->ecap))
1126 return -ENOENT;
1129 * queued invalidation is already setup and enabled.
1131 if (iommu->qi)
1132 return 0;
1134 iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1135 if (!iommu->qi)
1136 return -ENOMEM;
1138 qi = iommu->qi;
1141 desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1142 if (!desc_page) {
1143 kfree(qi);
1144 iommu->qi = 0;
1145 return -ENOMEM;
1148 qi->desc = page_address(desc_page);
1150 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1151 if (!qi->desc_status) {
1152 free_page((unsigned long) qi->desc);
1153 kfree(qi);
1154 iommu->qi = 0;
1155 return -ENOMEM;
1158 qi->free_head = qi->free_tail = 0;
1159 qi->free_cnt = QI_LENGTH;
1161 spin_lock_init(&qi->q_lock);
1163 __dmar_enable_qi(iommu);
1165 return 0;
1168 /* iommu interrupt handling. Most stuff are MSI-like. */
1170 enum faulttype {
1171 DMA_REMAP,
1172 INTR_REMAP,
1173 UNKNOWN,
1176 static const char *dma_remap_fault_reasons[] =
1178 "Software",
1179 "Present bit in root entry is clear",
1180 "Present bit in context entry is clear",
1181 "Invalid context entry",
1182 "Access beyond MGAW",
1183 "PTE Write access is not set",
1184 "PTE Read access is not set",
1185 "Next page table ptr is invalid",
1186 "Root table address invalid",
1187 "Context table ptr is invalid",
1188 "non-zero reserved fields in RTP",
1189 "non-zero reserved fields in CTP",
1190 "non-zero reserved fields in PTE",
1193 static const char *intr_remap_fault_reasons[] =
1195 "Detected reserved fields in the decoded interrupt-remapped request",
1196 "Interrupt index exceeded the interrupt-remapping table size",
1197 "Present field in the IRTE entry is clear",
1198 "Error accessing interrupt-remapping table pointed by IRTA_REG",
1199 "Detected reserved fields in the IRTE entry",
1200 "Blocked a compatibility format interrupt request",
1201 "Blocked an interrupt request due to source-id verification failure",
1204 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1206 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1208 if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1209 ARRAY_SIZE(intr_remap_fault_reasons))) {
1210 *fault_type = INTR_REMAP;
1211 return intr_remap_fault_reasons[fault_reason - 0x20];
1212 } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1213 *fault_type = DMA_REMAP;
1214 return dma_remap_fault_reasons[fault_reason];
1215 } else {
1216 *fault_type = UNKNOWN;
1217 return "Unknown";
1221 void dmar_msi_unmask(struct irq_data *data)
1223 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1224 unsigned long flag;
1226 /* unmask it */
1227 spin_lock_irqsave(&iommu->register_lock, flag);
1228 writel(0, iommu->reg + DMAR_FECTL_REG);
1229 /* Read a reg to force flush the post write */
1230 readl(iommu->reg + DMAR_FECTL_REG);
1231 spin_unlock_irqrestore(&iommu->register_lock, flag);
1234 void dmar_msi_mask(struct irq_data *data)
1236 unsigned long flag;
1237 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1239 /* mask it */
1240 spin_lock_irqsave(&iommu->register_lock, flag);
1241 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1242 /* Read a reg to force flush the post write */
1243 readl(iommu->reg + DMAR_FECTL_REG);
1244 spin_unlock_irqrestore(&iommu->register_lock, flag);
1247 void dmar_msi_write(int irq, struct msi_msg *msg)
1249 struct intel_iommu *iommu = irq_get_handler_data(irq);
1250 unsigned long flag;
1252 spin_lock_irqsave(&iommu->register_lock, flag);
1253 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1254 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1255 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1256 spin_unlock_irqrestore(&iommu->register_lock, flag);
1259 void dmar_msi_read(int irq, struct msi_msg *msg)
1261 struct intel_iommu *iommu = irq_get_handler_data(irq);
1262 unsigned long flag;
1264 spin_lock_irqsave(&iommu->register_lock, flag);
1265 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1266 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1267 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1268 spin_unlock_irqrestore(&iommu->register_lock, flag);
1271 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1272 u8 fault_reason, u16 source_id, unsigned long long addr)
1274 const char *reason;
1275 int fault_type;
1277 reason = dmar_get_fault_reason(fault_reason, &fault_type);
1279 if (fault_type == INTR_REMAP)
1280 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1281 "fault index %llx\n"
1282 "INTR-REMAP:[fault reason %02d] %s\n",
1283 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1284 PCI_FUNC(source_id & 0xFF), addr >> 48,
1285 fault_reason, reason);
1286 else
1287 printk(KERN_ERR
1288 "DMAR:[%s] Request device [%02x:%02x.%d] "
1289 "fault addr %llx \n"
1290 "DMAR:[fault reason %02d] %s\n",
1291 (type ? "DMA Read" : "DMA Write"),
1292 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1293 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1294 return 0;
1297 #define PRIMARY_FAULT_REG_LEN (16)
1298 irqreturn_t dmar_fault(int irq, void *dev_id)
1300 struct intel_iommu *iommu = dev_id;
1301 int reg, fault_index;
1302 u32 fault_status;
1303 unsigned long flag;
1305 spin_lock_irqsave(&iommu->register_lock, flag);
1306 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1307 if (fault_status)
1308 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1309 fault_status);
1311 /* TBD: ignore advanced fault log currently */
1312 if (!(fault_status & DMA_FSTS_PPF))
1313 goto clear_rest;
1315 fault_index = dma_fsts_fault_record_index(fault_status);
1316 reg = cap_fault_reg_offset(iommu->cap);
1317 while (1) {
1318 u8 fault_reason;
1319 u16 source_id;
1320 u64 guest_addr;
1321 int type;
1322 u32 data;
1324 /* highest 32 bits */
1325 data = readl(iommu->reg + reg +
1326 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1327 if (!(data & DMA_FRCD_F))
1328 break;
1330 fault_reason = dma_frcd_fault_reason(data);
1331 type = dma_frcd_type(data);
1333 data = readl(iommu->reg + reg +
1334 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1335 source_id = dma_frcd_source_id(data);
1337 guest_addr = dmar_readq(iommu->reg + reg +
1338 fault_index * PRIMARY_FAULT_REG_LEN);
1339 guest_addr = dma_frcd_page_addr(guest_addr);
1340 /* clear the fault */
1341 writel(DMA_FRCD_F, iommu->reg + reg +
1342 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1344 spin_unlock_irqrestore(&iommu->register_lock, flag);
1346 dmar_fault_do_one(iommu, type, fault_reason,
1347 source_id, guest_addr);
1349 fault_index++;
1350 if (fault_index >= cap_num_fault_regs(iommu->cap))
1351 fault_index = 0;
1352 spin_lock_irqsave(&iommu->register_lock, flag);
1354 clear_rest:
1355 /* clear all the other faults */
1356 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1357 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1359 spin_unlock_irqrestore(&iommu->register_lock, flag);
1360 return IRQ_HANDLED;
1363 int dmar_set_interrupt(struct intel_iommu *iommu)
1365 int irq, ret;
1368 * Check if the fault interrupt is already initialized.
1370 if (iommu->irq)
1371 return 0;
1373 irq = create_irq();
1374 if (!irq) {
1375 printk(KERN_ERR "IOMMU: no free vectors\n");
1376 return -EINVAL;
1379 irq_set_handler_data(irq, iommu);
1380 iommu->irq = irq;
1382 ret = arch_setup_dmar_msi(irq);
1383 if (ret) {
1384 irq_set_handler_data(irq, NULL);
1385 iommu->irq = 0;
1386 destroy_irq(irq);
1387 return ret;
1390 ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1391 if (ret)
1392 printk(KERN_ERR "IOMMU: can't request irq\n");
1393 return ret;
1396 int __init enable_drhd_fault_handling(void)
1398 struct dmar_drhd_unit *drhd;
1401 * Enable fault control interrupt.
1403 for_each_drhd_unit(drhd) {
1404 int ret;
1405 struct intel_iommu *iommu = drhd->iommu;
1406 ret = dmar_set_interrupt(iommu);
1408 if (ret) {
1409 printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1410 " interrupt, ret %d\n",
1411 (unsigned long long)drhd->reg_base_addr, ret);
1412 return -1;
1416 * Clear any previous faults.
1418 dmar_fault(iommu->irq, iommu);
1421 return 0;
1425 * Re-enable Queued Invalidation interface.
1427 int dmar_reenable_qi(struct intel_iommu *iommu)
1429 if (!ecap_qis(iommu->ecap))
1430 return -ENOENT;
1432 if (!iommu->qi)
1433 return -ENOENT;
1436 * First disable queued invalidation.
1438 dmar_disable_qi(iommu);
1440 * Then enable queued invalidation again. Since there is no pending
1441 * invalidation requests now, it's safe to re-enable queued
1442 * invalidation.
1444 __dmar_enable_qi(iommu);
1446 return 0;
1450 * Check interrupt remapping support in DMAR table description.
1452 int __init dmar_ir_support(void)
1454 struct acpi_table_dmar *dmar;
1455 dmar = (struct acpi_table_dmar *)dmar_tbl;
1456 if (!dmar)
1457 return 0;
1458 return dmar->flags & 0x1;
1460 IOMMU_INIT_POST(detect_intel_iommu);