PM / sleep: Asynchronous threads for suspend_noirq
[linux/fpc-iii.git] / drivers / iommu / dmar.c
blob1581565434106027dca83e1e7bae4c640b3febe9
1 /*
2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables.
26 * These routines are used by both DMA-remapping and Interrupt-remapping
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/iova.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/timer.h>
36 #include <linux/irq.h>
37 #include <linux/interrupt.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/slab.h>
41 #include <asm/irq_remapping.h>
42 #include <asm/iommu_table.h>
44 #include "irq_remapping.h"
46 /* No locks are needed as DMA remapping hardware unit
47 * list is constructed at boot time and hotplug of
48 * these units are not supported by the architecture.
50 LIST_HEAD(dmar_drhd_units);
52 struct acpi_table_header * __initdata dmar_tbl;
53 static acpi_size dmar_tbl_size;
55 static int alloc_iommu(struct dmar_drhd_unit *drhd);
56 static void free_iommu(struct intel_iommu *iommu);
58 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
61 * add INCLUDE_ALL at the tail, so scan the list will find it at
62 * the very end.
64 if (drhd->include_all)
65 list_add_tail(&drhd->list, &dmar_drhd_units);
66 else
67 list_add(&drhd->list, &dmar_drhd_units);
70 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
71 struct pci_dev **dev, u16 segment)
73 struct pci_bus *bus;
74 struct pci_dev *pdev = NULL;
75 struct acpi_dmar_pci_path *path;
76 int count;
78 bus = pci_find_bus(segment, scope->bus);
79 path = (struct acpi_dmar_pci_path *)(scope + 1);
80 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
81 / sizeof(struct acpi_dmar_pci_path);
83 while (count) {
84 if (pdev)
85 pci_dev_put(pdev);
87 * Some BIOSes list non-exist devices in DMAR table, just
88 * ignore it
90 if (!bus) {
91 pr_warn("Device scope bus [%d] not found\n", scope->bus);
92 break;
94 pdev = pci_get_slot(bus, PCI_DEVFN(path->device, path->function));
95 if (!pdev) {
96 /* warning will be printed below */
97 break;
99 path ++;
100 count --;
101 bus = pdev->subordinate;
103 if (!pdev) {
104 pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
105 segment, scope->bus, path->device, path->function);
106 return 0;
108 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
109 pdev->subordinate) || (scope->entry_type == \
110 ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
111 pci_dev_put(pdev);
112 pr_warn("Device scope type does not match for %s\n",
113 pci_name(pdev));
114 return -EINVAL;
116 *dev = pdev;
117 return 0;
120 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
121 struct pci_dev ***devices, u16 segment)
123 struct acpi_dmar_device_scope *scope;
124 void * tmp = start;
125 int index;
126 int ret;
128 *cnt = 0;
129 while (start < end) {
130 scope = start;
131 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
132 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
133 (*cnt)++;
134 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
135 scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
136 pr_warn("Unsupported device scope\n");
138 start += scope->length;
140 if (*cnt == 0)
141 return 0;
143 *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
144 if (!*devices)
145 return -ENOMEM;
147 start = tmp;
148 index = 0;
149 while (start < end) {
150 scope = start;
151 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
152 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
153 ret = dmar_parse_one_dev_scope(scope,
154 &(*devices)[index], segment);
155 if (ret) {
156 dmar_free_dev_scope(devices, cnt);
157 return ret;
159 index ++;
161 start += scope->length;
164 return 0;
167 void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt)
169 if (*devices && *cnt) {
170 while (--*cnt >= 0)
171 pci_dev_put((*devices)[*cnt]);
172 kfree(*devices);
173 *devices = NULL;
174 *cnt = 0;
179 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
180 * structure which uniquely represent one DMA remapping hardware unit
181 * present in the platform
183 static int __init
184 dmar_parse_one_drhd(struct acpi_dmar_header *header)
186 struct acpi_dmar_hardware_unit *drhd;
187 struct dmar_drhd_unit *dmaru;
188 int ret = 0;
190 drhd = (struct acpi_dmar_hardware_unit *)header;
191 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
192 if (!dmaru)
193 return -ENOMEM;
195 dmaru->hdr = header;
196 dmaru->reg_base_addr = drhd->address;
197 dmaru->segment = drhd->segment;
198 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
200 ret = alloc_iommu(dmaru);
201 if (ret) {
202 kfree(dmaru);
203 return ret;
205 dmar_register_drhd_unit(dmaru);
206 return 0;
209 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
211 if (dmaru->devices && dmaru->devices_cnt)
212 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
213 if (dmaru->iommu)
214 free_iommu(dmaru->iommu);
215 kfree(dmaru);
218 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
220 struct acpi_dmar_hardware_unit *drhd;
222 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
224 if (dmaru->include_all)
225 return 0;
227 return dmar_parse_dev_scope((void *)(drhd + 1),
228 ((void *)drhd) + drhd->header.length,
229 &dmaru->devices_cnt, &dmaru->devices,
230 drhd->segment);
233 #ifdef CONFIG_ACPI_NUMA
234 static int __init
235 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
237 struct acpi_dmar_rhsa *rhsa;
238 struct dmar_drhd_unit *drhd;
240 rhsa = (struct acpi_dmar_rhsa *)header;
241 for_each_drhd_unit(drhd) {
242 if (drhd->reg_base_addr == rhsa->base_address) {
243 int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
245 if (!node_online(node))
246 node = -1;
247 drhd->iommu->node = node;
248 return 0;
251 WARN_TAINT(
252 1, TAINT_FIRMWARE_WORKAROUND,
253 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
254 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
255 drhd->reg_base_addr,
256 dmi_get_system_info(DMI_BIOS_VENDOR),
257 dmi_get_system_info(DMI_BIOS_VERSION),
258 dmi_get_system_info(DMI_PRODUCT_VERSION));
260 return 0;
262 #endif
264 static void __init
265 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
267 struct acpi_dmar_hardware_unit *drhd;
268 struct acpi_dmar_reserved_memory *rmrr;
269 struct acpi_dmar_atsr *atsr;
270 struct acpi_dmar_rhsa *rhsa;
272 switch (header->type) {
273 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
274 drhd = container_of(header, struct acpi_dmar_hardware_unit,
275 header);
276 pr_info("DRHD base: %#016Lx flags: %#x\n",
277 (unsigned long long)drhd->address, drhd->flags);
278 break;
279 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
280 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
281 header);
282 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
283 (unsigned long long)rmrr->base_address,
284 (unsigned long long)rmrr->end_address);
285 break;
286 case ACPI_DMAR_TYPE_ATSR:
287 atsr = container_of(header, struct acpi_dmar_atsr, header);
288 pr_info("ATSR flags: %#x\n", atsr->flags);
289 break;
290 case ACPI_DMAR_HARDWARE_AFFINITY:
291 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
292 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
293 (unsigned long long)rhsa->base_address,
294 rhsa->proximity_domain);
295 break;
300 * dmar_table_detect - checks to see if the platform supports DMAR devices
302 static int __init dmar_table_detect(void)
304 acpi_status status = AE_OK;
306 /* if we could find DMAR table, then there are DMAR devices */
307 status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
308 (struct acpi_table_header **)&dmar_tbl,
309 &dmar_tbl_size);
311 if (ACPI_SUCCESS(status) && !dmar_tbl) {
312 pr_warn("Unable to map DMAR\n");
313 status = AE_NOT_FOUND;
316 return (ACPI_SUCCESS(status) ? 1 : 0);
320 * parse_dmar_table - parses the DMA reporting table
322 static int __init
323 parse_dmar_table(void)
325 struct acpi_table_dmar *dmar;
326 struct acpi_dmar_header *entry_header;
327 int ret = 0;
328 int drhd_count = 0;
331 * Do it again, earlier dmar_tbl mapping could be mapped with
332 * fixed map.
334 dmar_table_detect();
337 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
338 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
340 dmar_tbl = tboot_get_dmar_table(dmar_tbl);
342 dmar = (struct acpi_table_dmar *)dmar_tbl;
343 if (!dmar)
344 return -ENODEV;
346 if (dmar->width < PAGE_SHIFT - 1) {
347 pr_warn("Invalid DMAR haw\n");
348 return -EINVAL;
351 pr_info("Host address width %d\n", dmar->width + 1);
353 entry_header = (struct acpi_dmar_header *)(dmar + 1);
354 while (((unsigned long)entry_header) <
355 (((unsigned long)dmar) + dmar_tbl->length)) {
356 /* Avoid looping forever on bad ACPI tables */
357 if (entry_header->length == 0) {
358 pr_warn("Invalid 0-length structure\n");
359 ret = -EINVAL;
360 break;
363 dmar_table_print_dmar_entry(entry_header);
365 switch (entry_header->type) {
366 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
367 drhd_count++;
368 ret = dmar_parse_one_drhd(entry_header);
369 break;
370 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
371 ret = dmar_parse_one_rmrr(entry_header);
372 break;
373 case ACPI_DMAR_TYPE_ATSR:
374 ret = dmar_parse_one_atsr(entry_header);
375 break;
376 case ACPI_DMAR_HARDWARE_AFFINITY:
377 #ifdef CONFIG_ACPI_NUMA
378 ret = dmar_parse_one_rhsa(entry_header);
379 #endif
380 break;
381 default:
382 pr_warn("Unknown DMAR structure type %d\n",
383 entry_header->type);
384 ret = 0; /* for forward compatibility */
385 break;
387 if (ret)
388 break;
390 entry_header = ((void *)entry_header + entry_header->length);
392 if (drhd_count == 0)
393 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
394 return ret;
397 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
398 struct pci_dev *dev)
400 int index;
402 while (dev) {
403 for (index = 0; index < cnt; index++)
404 if (dev == devices[index])
405 return 1;
407 /* Check our parent */
408 dev = dev->bus->self;
411 return 0;
414 struct dmar_drhd_unit *
415 dmar_find_matched_drhd_unit(struct pci_dev *dev)
417 struct dmar_drhd_unit *dmaru = NULL;
418 struct acpi_dmar_hardware_unit *drhd;
420 dev = pci_physfn(dev);
422 for_each_drhd_unit(dmaru) {
423 drhd = container_of(dmaru->hdr,
424 struct acpi_dmar_hardware_unit,
425 header);
427 if (dmaru->include_all &&
428 drhd->segment == pci_domain_nr(dev->bus))
429 return dmaru;
431 if (dmar_pci_device_match(dmaru->devices,
432 dmaru->devices_cnt, dev))
433 return dmaru;
436 return NULL;
439 int __init dmar_dev_scope_init(void)
441 static int dmar_dev_scope_initialized;
442 struct dmar_drhd_unit *drhd;
443 int ret = -ENODEV;
445 if (dmar_dev_scope_initialized)
446 return dmar_dev_scope_initialized;
448 if (list_empty(&dmar_drhd_units))
449 goto fail;
451 list_for_each_entry(drhd, &dmar_drhd_units, list) {
452 ret = dmar_parse_dev(drhd);
453 if (ret)
454 goto fail;
457 ret = dmar_parse_rmrr_atsr_dev();
458 if (ret)
459 goto fail;
461 dmar_dev_scope_initialized = 1;
462 return 0;
464 fail:
465 dmar_dev_scope_initialized = ret;
466 return ret;
470 int __init dmar_table_init(void)
472 static int dmar_table_initialized;
473 int ret;
475 if (dmar_table_initialized == 0) {
476 ret = parse_dmar_table();
477 if (ret < 0) {
478 if (ret != -ENODEV)
479 pr_info("parse DMAR table failure.\n");
480 } else if (list_empty(&dmar_drhd_units)) {
481 pr_info("No DMAR devices found\n");
482 ret = -ENODEV;
485 if (ret < 0)
486 dmar_table_initialized = ret;
487 else
488 dmar_table_initialized = 1;
491 return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
494 static void warn_invalid_dmar(u64 addr, const char *message)
496 WARN_TAINT_ONCE(
497 1, TAINT_FIRMWARE_WORKAROUND,
498 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
499 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
500 addr, message,
501 dmi_get_system_info(DMI_BIOS_VENDOR),
502 dmi_get_system_info(DMI_BIOS_VERSION),
503 dmi_get_system_info(DMI_PRODUCT_VERSION));
506 static int __init check_zero_address(void)
508 struct acpi_table_dmar *dmar;
509 struct acpi_dmar_header *entry_header;
510 struct acpi_dmar_hardware_unit *drhd;
512 dmar = (struct acpi_table_dmar *)dmar_tbl;
513 entry_header = (struct acpi_dmar_header *)(dmar + 1);
515 while (((unsigned long)entry_header) <
516 (((unsigned long)dmar) + dmar_tbl->length)) {
517 /* Avoid looping forever on bad ACPI tables */
518 if (entry_header->length == 0) {
519 pr_warn("Invalid 0-length structure\n");
520 return 0;
523 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
524 void __iomem *addr;
525 u64 cap, ecap;
527 drhd = (void *)entry_header;
528 if (!drhd->address) {
529 warn_invalid_dmar(0, "");
530 goto failed;
533 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
534 if (!addr ) {
535 printk("IOMMU: can't validate: %llx\n", drhd->address);
536 goto failed;
538 cap = dmar_readq(addr + DMAR_CAP_REG);
539 ecap = dmar_readq(addr + DMAR_ECAP_REG);
540 early_iounmap(addr, VTD_PAGE_SIZE);
541 if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
542 warn_invalid_dmar(drhd->address,
543 " returns all ones");
544 goto failed;
548 entry_header = ((void *)entry_header + entry_header->length);
550 return 1;
552 failed:
553 return 0;
556 int __init detect_intel_iommu(void)
558 int ret;
560 ret = dmar_table_detect();
561 if (ret)
562 ret = check_zero_address();
564 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
565 iommu_detected = 1;
566 /* Make sure ACS will be enabled */
567 pci_request_acs();
570 #ifdef CONFIG_X86
571 if (ret)
572 x86_init.iommu.iommu_init = intel_iommu_init;
573 #endif
575 early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
576 dmar_tbl = NULL;
578 return ret ? 1 : -ENODEV;
582 static void unmap_iommu(struct intel_iommu *iommu)
584 iounmap(iommu->reg);
585 release_mem_region(iommu->reg_phys, iommu->reg_size);
589 * map_iommu: map the iommu's registers
590 * @iommu: the iommu to map
591 * @phys_addr: the physical address of the base resgister
593 * Memory map the iommu's registers. Start w/ a single page, and
594 * possibly expand if that turns out to be insufficent.
596 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
598 int map_size, err=0;
600 iommu->reg_phys = phys_addr;
601 iommu->reg_size = VTD_PAGE_SIZE;
603 if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
604 pr_err("IOMMU: can't reserve memory\n");
605 err = -EBUSY;
606 goto out;
609 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
610 if (!iommu->reg) {
611 pr_err("IOMMU: can't map the region\n");
612 err = -ENOMEM;
613 goto release;
616 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
617 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
619 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
620 err = -EINVAL;
621 warn_invalid_dmar(phys_addr, " returns all ones");
622 goto unmap;
625 /* the registers might be more than one page */
626 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
627 cap_max_fault_reg_offset(iommu->cap));
628 map_size = VTD_PAGE_ALIGN(map_size);
629 if (map_size > iommu->reg_size) {
630 iounmap(iommu->reg);
631 release_mem_region(iommu->reg_phys, iommu->reg_size);
632 iommu->reg_size = map_size;
633 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
634 iommu->name)) {
635 pr_err("IOMMU: can't reserve memory\n");
636 err = -EBUSY;
637 goto out;
639 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
640 if (!iommu->reg) {
641 pr_err("IOMMU: can't map the region\n");
642 err = -ENOMEM;
643 goto release;
646 err = 0;
647 goto out;
649 unmap:
650 iounmap(iommu->reg);
651 release:
652 release_mem_region(iommu->reg_phys, iommu->reg_size);
653 out:
654 return err;
657 static int alloc_iommu(struct dmar_drhd_unit *drhd)
659 struct intel_iommu *iommu;
660 u32 ver, sts;
661 static int iommu_allocated = 0;
662 int agaw = 0;
663 int msagaw = 0;
664 int err;
666 if (!drhd->reg_base_addr) {
667 warn_invalid_dmar(0, "");
668 return -EINVAL;
671 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
672 if (!iommu)
673 return -ENOMEM;
675 iommu->seq_id = iommu_allocated++;
676 sprintf (iommu->name, "dmar%d", iommu->seq_id);
678 err = map_iommu(iommu, drhd->reg_base_addr);
679 if (err) {
680 pr_err("IOMMU: failed to map %s\n", iommu->name);
681 goto error;
684 err = -EINVAL;
685 agaw = iommu_calculate_agaw(iommu);
686 if (agaw < 0) {
687 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
688 iommu->seq_id);
689 goto err_unmap;
691 msagaw = iommu_calculate_max_sagaw(iommu);
692 if (msagaw < 0) {
693 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
694 iommu->seq_id);
695 goto err_unmap;
697 iommu->agaw = agaw;
698 iommu->msagaw = msagaw;
700 iommu->node = -1;
702 ver = readl(iommu->reg + DMAR_VER_REG);
703 pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
704 iommu->seq_id,
705 (unsigned long long)drhd->reg_base_addr,
706 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
707 (unsigned long long)iommu->cap,
708 (unsigned long long)iommu->ecap);
710 /* Reflect status in gcmd */
711 sts = readl(iommu->reg + DMAR_GSTS_REG);
712 if (sts & DMA_GSTS_IRES)
713 iommu->gcmd |= DMA_GCMD_IRE;
714 if (sts & DMA_GSTS_TES)
715 iommu->gcmd |= DMA_GCMD_TE;
716 if (sts & DMA_GSTS_QIES)
717 iommu->gcmd |= DMA_GCMD_QIE;
719 raw_spin_lock_init(&iommu->register_lock);
721 drhd->iommu = iommu;
722 return 0;
724 err_unmap:
725 unmap_iommu(iommu);
726 error:
727 kfree(iommu);
728 return err;
731 static void free_iommu(struct intel_iommu *iommu)
733 if (iommu->irq) {
734 free_irq(iommu->irq, iommu);
735 irq_set_handler_data(iommu->irq, NULL);
736 destroy_irq(iommu->irq);
739 if (iommu->qi) {
740 free_page((unsigned long)iommu->qi->desc);
741 kfree(iommu->qi->desc_status);
742 kfree(iommu->qi);
745 if (iommu->reg)
746 unmap_iommu(iommu);
748 kfree(iommu);
752 * Reclaim all the submitted descriptors which have completed its work.
754 static inline void reclaim_free_desc(struct q_inval *qi)
756 while (qi->desc_status[qi->free_tail] == QI_DONE ||
757 qi->desc_status[qi->free_tail] == QI_ABORT) {
758 qi->desc_status[qi->free_tail] = QI_FREE;
759 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
760 qi->free_cnt++;
764 static int qi_check_fault(struct intel_iommu *iommu, int index)
766 u32 fault;
767 int head, tail;
768 struct q_inval *qi = iommu->qi;
769 int wait_index = (index + 1) % QI_LENGTH;
771 if (qi->desc_status[wait_index] == QI_ABORT)
772 return -EAGAIN;
774 fault = readl(iommu->reg + DMAR_FSTS_REG);
777 * If IQE happens, the head points to the descriptor associated
778 * with the error. No new descriptors are fetched until the IQE
779 * is cleared.
781 if (fault & DMA_FSTS_IQE) {
782 head = readl(iommu->reg + DMAR_IQH_REG);
783 if ((head >> DMAR_IQ_SHIFT) == index) {
784 pr_err("VT-d detected invalid descriptor: "
785 "low=%llx, high=%llx\n",
786 (unsigned long long)qi->desc[index].low,
787 (unsigned long long)qi->desc[index].high);
788 memcpy(&qi->desc[index], &qi->desc[wait_index],
789 sizeof(struct qi_desc));
790 __iommu_flush_cache(iommu, &qi->desc[index],
791 sizeof(struct qi_desc));
792 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
793 return -EINVAL;
798 * If ITE happens, all pending wait_desc commands are aborted.
799 * No new descriptors are fetched until the ITE is cleared.
801 if (fault & DMA_FSTS_ITE) {
802 head = readl(iommu->reg + DMAR_IQH_REG);
803 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
804 head |= 1;
805 tail = readl(iommu->reg + DMAR_IQT_REG);
806 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
808 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
810 do {
811 if (qi->desc_status[head] == QI_IN_USE)
812 qi->desc_status[head] = QI_ABORT;
813 head = (head - 2 + QI_LENGTH) % QI_LENGTH;
814 } while (head != tail);
816 if (qi->desc_status[wait_index] == QI_ABORT)
817 return -EAGAIN;
820 if (fault & DMA_FSTS_ICE)
821 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
823 return 0;
827 * Submit the queued invalidation descriptor to the remapping
828 * hardware unit and wait for its completion.
830 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
832 int rc;
833 struct q_inval *qi = iommu->qi;
834 struct qi_desc *hw, wait_desc;
835 int wait_index, index;
836 unsigned long flags;
838 if (!qi)
839 return 0;
841 hw = qi->desc;
843 restart:
844 rc = 0;
846 raw_spin_lock_irqsave(&qi->q_lock, flags);
847 while (qi->free_cnt < 3) {
848 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
849 cpu_relax();
850 raw_spin_lock_irqsave(&qi->q_lock, flags);
853 index = qi->free_head;
854 wait_index = (index + 1) % QI_LENGTH;
856 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
858 hw[index] = *desc;
860 wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
861 QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
862 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
864 hw[wait_index] = wait_desc;
866 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
867 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
869 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
870 qi->free_cnt -= 2;
873 * update the HW tail register indicating the presence of
874 * new descriptors.
876 writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
878 while (qi->desc_status[wait_index] != QI_DONE) {
880 * We will leave the interrupts disabled, to prevent interrupt
881 * context to queue another cmd while a cmd is already submitted
882 * and waiting for completion on this cpu. This is to avoid
883 * a deadlock where the interrupt context can wait indefinitely
884 * for free slots in the queue.
886 rc = qi_check_fault(iommu, index);
887 if (rc)
888 break;
890 raw_spin_unlock(&qi->q_lock);
891 cpu_relax();
892 raw_spin_lock(&qi->q_lock);
895 qi->desc_status[index] = QI_DONE;
897 reclaim_free_desc(qi);
898 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
900 if (rc == -EAGAIN)
901 goto restart;
903 return rc;
907 * Flush the global interrupt entry cache.
909 void qi_global_iec(struct intel_iommu *iommu)
911 struct qi_desc desc;
913 desc.low = QI_IEC_TYPE;
914 desc.high = 0;
916 /* should never fail */
917 qi_submit_sync(&desc, iommu);
920 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
921 u64 type)
923 struct qi_desc desc;
925 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
926 | QI_CC_GRAN(type) | QI_CC_TYPE;
927 desc.high = 0;
929 qi_submit_sync(&desc, iommu);
932 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
933 unsigned int size_order, u64 type)
935 u8 dw = 0, dr = 0;
937 struct qi_desc desc;
938 int ih = 0;
940 if (cap_write_drain(iommu->cap))
941 dw = 1;
943 if (cap_read_drain(iommu->cap))
944 dr = 1;
946 desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
947 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
948 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
949 | QI_IOTLB_AM(size_order);
951 qi_submit_sync(&desc, iommu);
954 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
955 u64 addr, unsigned mask)
957 struct qi_desc desc;
959 if (mask) {
960 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
961 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
962 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
963 } else
964 desc.high = QI_DEV_IOTLB_ADDR(addr);
966 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
967 qdep = 0;
969 desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
970 QI_DIOTLB_TYPE;
972 qi_submit_sync(&desc, iommu);
976 * Disable Queued Invalidation interface.
978 void dmar_disable_qi(struct intel_iommu *iommu)
980 unsigned long flags;
981 u32 sts;
982 cycles_t start_time = get_cycles();
984 if (!ecap_qis(iommu->ecap))
985 return;
987 raw_spin_lock_irqsave(&iommu->register_lock, flags);
989 sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
990 if (!(sts & DMA_GSTS_QIES))
991 goto end;
994 * Give a chance to HW to complete the pending invalidation requests.
996 while ((readl(iommu->reg + DMAR_IQT_REG) !=
997 readl(iommu->reg + DMAR_IQH_REG)) &&
998 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
999 cpu_relax();
1001 iommu->gcmd &= ~DMA_GCMD_QIE;
1002 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1004 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1005 !(sts & DMA_GSTS_QIES), sts);
1006 end:
1007 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1011 * Enable queued invalidation.
1013 static void __dmar_enable_qi(struct intel_iommu *iommu)
1015 u32 sts;
1016 unsigned long flags;
1017 struct q_inval *qi = iommu->qi;
1019 qi->free_head = qi->free_tail = 0;
1020 qi->free_cnt = QI_LENGTH;
1022 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1024 /* write zero to the tail reg */
1025 writel(0, iommu->reg + DMAR_IQT_REG);
1027 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1029 iommu->gcmd |= DMA_GCMD_QIE;
1030 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1032 /* Make sure hardware complete it */
1033 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1035 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1039 * Enable Queued Invalidation interface. This is a must to support
1040 * interrupt-remapping. Also used by DMA-remapping, which replaces
1041 * register based IOTLB invalidation.
1043 int dmar_enable_qi(struct intel_iommu *iommu)
1045 struct q_inval *qi;
1046 struct page *desc_page;
1048 if (!ecap_qis(iommu->ecap))
1049 return -ENOENT;
1052 * queued invalidation is already setup and enabled.
1054 if (iommu->qi)
1055 return 0;
1057 iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1058 if (!iommu->qi)
1059 return -ENOMEM;
1061 qi = iommu->qi;
1064 desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1065 if (!desc_page) {
1066 kfree(qi);
1067 iommu->qi = NULL;
1068 return -ENOMEM;
1071 qi->desc = page_address(desc_page);
1073 qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1074 if (!qi->desc_status) {
1075 free_page((unsigned long) qi->desc);
1076 kfree(qi);
1077 iommu->qi = NULL;
1078 return -ENOMEM;
1081 qi->free_head = qi->free_tail = 0;
1082 qi->free_cnt = QI_LENGTH;
1084 raw_spin_lock_init(&qi->q_lock);
1086 __dmar_enable_qi(iommu);
1088 return 0;
1091 /* iommu interrupt handling. Most stuff are MSI-like. */
1093 enum faulttype {
1094 DMA_REMAP,
1095 INTR_REMAP,
1096 UNKNOWN,
1099 static const char *dma_remap_fault_reasons[] =
1101 "Software",
1102 "Present bit in root entry is clear",
1103 "Present bit in context entry is clear",
1104 "Invalid context entry",
1105 "Access beyond MGAW",
1106 "PTE Write access is not set",
1107 "PTE Read access is not set",
1108 "Next page table ptr is invalid",
1109 "Root table address invalid",
1110 "Context table ptr is invalid",
1111 "non-zero reserved fields in RTP",
1112 "non-zero reserved fields in CTP",
1113 "non-zero reserved fields in PTE",
1114 "PCE for translation request specifies blocking",
1117 static const char *irq_remap_fault_reasons[] =
1119 "Detected reserved fields in the decoded interrupt-remapped request",
1120 "Interrupt index exceeded the interrupt-remapping table size",
1121 "Present field in the IRTE entry is clear",
1122 "Error accessing interrupt-remapping table pointed by IRTA_REG",
1123 "Detected reserved fields in the IRTE entry",
1124 "Blocked a compatibility format interrupt request",
1125 "Blocked an interrupt request due to source-id verification failure",
1128 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1130 if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1131 ARRAY_SIZE(irq_remap_fault_reasons))) {
1132 *fault_type = INTR_REMAP;
1133 return irq_remap_fault_reasons[fault_reason - 0x20];
1134 } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1135 *fault_type = DMA_REMAP;
1136 return dma_remap_fault_reasons[fault_reason];
1137 } else {
1138 *fault_type = UNKNOWN;
1139 return "Unknown";
1143 void dmar_msi_unmask(struct irq_data *data)
1145 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1146 unsigned long flag;
1148 /* unmask it */
1149 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1150 writel(0, iommu->reg + DMAR_FECTL_REG);
1151 /* Read a reg to force flush the post write */
1152 readl(iommu->reg + DMAR_FECTL_REG);
1153 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1156 void dmar_msi_mask(struct irq_data *data)
1158 unsigned long flag;
1159 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1161 /* mask it */
1162 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1163 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1164 /* Read a reg to force flush the post write */
1165 readl(iommu->reg + DMAR_FECTL_REG);
1166 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1169 void dmar_msi_write(int irq, struct msi_msg *msg)
1171 struct intel_iommu *iommu = irq_get_handler_data(irq);
1172 unsigned long flag;
1174 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1175 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1176 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1177 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1178 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1181 void dmar_msi_read(int irq, struct msi_msg *msg)
1183 struct intel_iommu *iommu = irq_get_handler_data(irq);
1184 unsigned long flag;
1186 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1187 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1188 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1189 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1190 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1193 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1194 u8 fault_reason, u16 source_id, unsigned long long addr)
1196 const char *reason;
1197 int fault_type;
1199 reason = dmar_get_fault_reason(fault_reason, &fault_type);
1201 if (fault_type == INTR_REMAP)
1202 pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1203 "fault index %llx\n"
1204 "INTR-REMAP:[fault reason %02d] %s\n",
1205 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1206 PCI_FUNC(source_id & 0xFF), addr >> 48,
1207 fault_reason, reason);
1208 else
1209 pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1210 "fault addr %llx \n"
1211 "DMAR:[fault reason %02d] %s\n",
1212 (type ? "DMA Read" : "DMA Write"),
1213 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1214 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1215 return 0;
1218 #define PRIMARY_FAULT_REG_LEN (16)
1219 irqreturn_t dmar_fault(int irq, void *dev_id)
1221 struct intel_iommu *iommu = dev_id;
1222 int reg, fault_index;
1223 u32 fault_status;
1224 unsigned long flag;
1226 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1227 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1228 if (fault_status)
1229 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1231 /* TBD: ignore advanced fault log currently */
1232 if (!(fault_status & DMA_FSTS_PPF))
1233 goto unlock_exit;
1235 fault_index = dma_fsts_fault_record_index(fault_status);
1236 reg = cap_fault_reg_offset(iommu->cap);
1237 while (1) {
1238 u8 fault_reason;
1239 u16 source_id;
1240 u64 guest_addr;
1241 int type;
1242 u32 data;
1244 /* highest 32 bits */
1245 data = readl(iommu->reg + reg +
1246 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1247 if (!(data & DMA_FRCD_F))
1248 break;
1250 fault_reason = dma_frcd_fault_reason(data);
1251 type = dma_frcd_type(data);
1253 data = readl(iommu->reg + reg +
1254 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1255 source_id = dma_frcd_source_id(data);
1257 guest_addr = dmar_readq(iommu->reg + reg +
1258 fault_index * PRIMARY_FAULT_REG_LEN);
1259 guest_addr = dma_frcd_page_addr(guest_addr);
1260 /* clear the fault */
1261 writel(DMA_FRCD_F, iommu->reg + reg +
1262 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1264 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1266 dmar_fault_do_one(iommu, type, fault_reason,
1267 source_id, guest_addr);
1269 fault_index++;
1270 if (fault_index >= cap_num_fault_regs(iommu->cap))
1271 fault_index = 0;
1272 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1275 writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
1277 unlock_exit:
1278 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1279 return IRQ_HANDLED;
1282 int dmar_set_interrupt(struct intel_iommu *iommu)
1284 int irq, ret;
1287 * Check if the fault interrupt is already initialized.
1289 if (iommu->irq)
1290 return 0;
1292 irq = create_irq();
1293 if (!irq) {
1294 pr_err("IOMMU: no free vectors\n");
1295 return -EINVAL;
1298 irq_set_handler_data(irq, iommu);
1299 iommu->irq = irq;
1301 ret = arch_setup_dmar_msi(irq);
1302 if (ret) {
1303 irq_set_handler_data(irq, NULL);
1304 iommu->irq = 0;
1305 destroy_irq(irq);
1306 return ret;
1309 ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1310 if (ret)
1311 pr_err("IOMMU: can't request irq\n");
1312 return ret;
1315 int __init enable_drhd_fault_handling(void)
1317 struct dmar_drhd_unit *drhd;
1318 struct intel_iommu *iommu;
1321 * Enable fault control interrupt.
1323 for_each_iommu(iommu, drhd) {
1324 u32 fault_status;
1325 int ret = dmar_set_interrupt(iommu);
1327 if (ret) {
1328 pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1329 (unsigned long long)drhd->reg_base_addr, ret);
1330 return -1;
1334 * Clear any previous faults.
1336 dmar_fault(iommu->irq, iommu);
1337 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1338 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1341 return 0;
1345 * Re-enable Queued Invalidation interface.
1347 int dmar_reenable_qi(struct intel_iommu *iommu)
1349 if (!ecap_qis(iommu->ecap))
1350 return -ENOENT;
1352 if (!iommu->qi)
1353 return -ENOENT;
1356 * First disable queued invalidation.
1358 dmar_disable_qi(iommu);
1360 * Then enable queued invalidation again. Since there is no pending
1361 * invalidation requests now, it's safe to re-enable queued
1362 * invalidation.
1364 __dmar_enable_qi(iommu);
1366 return 0;
1370 * Check interrupt remapping support in DMAR table description.
1372 int __init dmar_ir_support(void)
1374 struct acpi_table_dmar *dmar;
1375 dmar = (struct acpi_table_dmar *)dmar_tbl;
1376 if (!dmar)
1377 return 0;
1378 return dmar->flags & 0x1;
1381 static int __init dmar_free_unused_resources(void)
1383 struct dmar_drhd_unit *dmaru, *dmaru_n;
1385 /* DMAR units are in use */
1386 if (irq_remapping_enabled || intel_iommu_enabled)
1387 return 0;
1389 list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1390 list_del(&dmaru->list);
1391 dmar_free_drhd(dmaru);
1394 return 0;
1397 late_initcall(dmar_free_unused_resources);
1398 IOMMU_INIT_POST(detect_intel_iommu);