2 * NFIT - Machine Check Handler
4 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 #include <linux/notifier.h>
16 #include <linux/acpi.h>
21 static int nfit_handle_mce(struct notifier_block
*nb
, unsigned long val
,
24 struct mce
*mce
= (struct mce
*)data
;
25 struct acpi_nfit_desc
*acpi_desc
;
26 struct nfit_spa
*nfit_spa
;
28 /* We only care about memory errors */
29 if (!mce_is_memory_error(mce
))
33 * mce->addr contains the physical addr accessed that caused the
34 * machine check. We need to walk through the list of NFITs, and see
35 * if any of them matches that address, and only then start a scrub.
37 mutex_lock(&acpi_desc_lock
);
38 list_for_each_entry(acpi_desc
, &acpi_descs
, list
) {
39 struct device
*dev
= acpi_desc
->dev
;
42 mutex_lock(&acpi_desc
->init_mutex
);
43 list_for_each_entry(nfit_spa
, &acpi_desc
->spas
, list
) {
44 struct acpi_nfit_system_address
*spa
= nfit_spa
->spa
;
46 if (nfit_spa_type(spa
) != NFIT_SPA_PM
)
48 /* find the spa that covers the mce addr */
49 if (spa
->address
> mce
->addr
)
51 if ((spa
->address
+ spa
->length
- 1) < mce
->addr
)
54 dev_dbg(dev
, "addr in SPA %d (0x%llx, 0x%llx)\n",
55 spa
->range_index
, spa
->address
, spa
->length
);
57 * We can break at the first match because we're going
58 * to rescan all the SPA ranges. There shouldn't be any
63 mutex_unlock(&acpi_desc
->init_mutex
);
68 /* If this fails due to an -ENOMEM, there is little we can do */
69 nvdimm_bus_add_badrange(acpi_desc
->nvdimm_bus
,
70 ALIGN(mce
->addr
, L1_CACHE_BYTES
),
72 nvdimm_region_notify(nfit_spa
->nd_region
,
73 NVDIMM_REVALIDATE_POISON
);
75 if (acpi_desc
->scrub_mode
== HW_ERROR_SCRUB_ON
) {
77 * We can ignore an -EBUSY here because if an ARS is
78 * already in progress, just let that be the last
81 acpi_nfit_ars_rescan(acpi_desc
, 0);
86 mutex_unlock(&acpi_desc_lock
);
90 static struct notifier_block nfit_mce_dec
= {
91 .notifier_call
= nfit_handle_mce
,
92 .priority
= MCE_PRIO_NFIT
,
95 void nfit_mce_register(void)
97 mce_register_decode_chain(&nfit_mce_dec
);
100 void nfit_mce_unregister(void)
102 mce_unregister_decode_chain(&nfit_mce_dec
);