1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * FRU (Field-Replaceable Unit) Memory Poison Manager
5 * Copyright (c) 2024, Advanced Micro Devices, Inc.
9 * Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com>
10 * Muralidhara M K <muralidhara.mk@amd.com>
11 * Yazen Ghannam <Yazen.Ghannam@amd.com>
13 * Implementation notes, assumptions, and limitations:
15 * - FRU memory poison section and memory poison descriptor definitions are not yet
16 * included in the UEFI specification. So they are defined here. Afterwards, they
17 * may be moved to linux/cper.h, if appropriate.
19 * - Platforms based on AMD MI300 systems will be the first to use these structures.
20 * There are a number of assumptions made here that will need to be generalized
21 * to support other platforms.
23 * AMD MI300-based platform(s) assumptions:
24 * - Memory errors are reported through x86 MCA.
25 * - The entire DRAM row containing a memory error should be retired.
26 * - There will be (1) FRU memory poison section per CPER.
27 * - The FRU will be the CPU package (processor socket).
28 * - The default number of memory poison descriptor entries should be (8).
29 * - The platform will use ACPI ERST for persistent storage.
30 * - All FRU records should be saved to persistent storage. Module init will
31 * fail if any FRU record is not successfully written.
33 * - Boot time memory retirement may occur later than ideal due to dependencies
34 * on other libraries and drivers. This leaves a gap where bad memory may be
35 * accessed during early boot stages.
37 * - Enough memory should be pre-allocated for each FRU record to be able to hold
38 * the expected number of descriptor entries. This, mostly empty, record is
39 * written to storage during init time. Subsequent writes to the same record
40 * should allow the Platform to update the stored record in-place. Otherwise,
41 * if the record is extended, then the Platform may need to perform costly memory
42 * management operations on the storage. For example, the Platform may spend time
43 * in Firmware copying and invalidating memory on a relatively slow SPI ROM.
46 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
48 #include <linux/cper.h>
49 #include <linux/ras.h>
50 #include <linux/cpu.h>
52 #include <acpi/apei.h>
54 #include <asm/cpu_device_id.h>
57 #include "../debugfs.h"
59 #include "atl/internal.h"
61 #define INVALID_CPU UINT_MAX
64 #define FMP_VALID_ARCH_TYPE BIT_ULL(0)
65 #define FMP_VALID_ARCH BIT_ULL(1)
66 #define FMP_VALID_ID_TYPE BIT_ULL(2)
67 #define FMP_VALID_ID BIT_ULL(3)
68 #define FMP_VALID_LIST_ENTRIES BIT_ULL(4)
69 #define FMP_VALID_LIST BIT_ULL(5)
71 /* FRU Architecture Types */
72 #define FMP_ARCH_TYPE_X86_CPUID_1_EAX 0
75 #define FMP_ID_TYPE_X86_PPIN 0
77 /* FRU Memory Poison Section */
78 struct cper_sec_fru_mem_poison
{
88 /* FRU Descriptor ID Types */
89 #define FPD_HW_ID_TYPE_MCA_IPID 0
91 /* FRU Descriptor Address Types */
92 #define FPD_ADDR_TYPE_MCA_ADDR 0
94 /* Memory Poison Descriptor */
95 struct cper_fru_poison_desc
{
103 /* Collection of headers and sections for easy pointer use. */
105 struct cper_record_header hdr
;
106 struct cper_section_descriptor sec_desc
;
107 struct cper_sec_fru_mem_poison fmp
;
108 struct cper_fru_poison_desc entries
[];
112 * Pointers to the complete CPER record of each FRU.
114 * Memory allocation will include padded space for descriptor entries.
116 static struct fru_rec
**fru_records
;
118 /* system physical addresses array */
119 static u64
*spa_entries
;
121 static struct dentry
*fmpm_dfs_dir
;
122 static struct dentry
*fmpm_dfs_entries
;
124 #define CPER_CREATOR_FMP \
125 GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \
126 0xa0, 0x33, 0x08, 0x75)
128 #define CPER_SECTION_TYPE_FMP \
129 GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2, \
130 0x12, 0x0a, 0x44, 0x58)
133 * DOC: max_nr_entries (byte)
134 * Maximum number of descriptor entries possible for each FRU.
136 * Values between '1' and '255' are valid.
137 * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES.
139 static u8 max_nr_entries
;
140 module_param(max_nr_entries
, byte
, 0644);
141 MODULE_PARM_DESC(max_nr_entries
,
142 "Maximum number of memory poison descriptor entries per FRU");
144 #define FMPM_DEFAULT_MAX_NR_ENTRIES 8
146 /* Maximum number of FRUs in the system. */
147 #define FMPM_MAX_NR_FRU 256
148 static unsigned int max_nr_fru
;
150 /* Total length of record including headers and list of descriptor entries. */
151 static size_t max_rec_len
;
153 #define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255))
155 /* Total number of SPA entries across all FRUs. */
156 static unsigned int spa_nr_entries
;
159 * Protect the local records cache in fru_records and prevent concurrent
160 * writes to storage. This is only needed after init once notifier block
161 * registration is done.
163 * The majority of a record is fixed at module init and will not change
164 * during run time. The entries within a record will be updated as new
165 * errors are reported. The mutex should be held whenever the entries are
166 * accessed during run time.
168 static DEFINE_MUTEX(fmpm_update_mutex
);
170 #define for_each_fru(i, rec) \
171 for (i = 0; rec = fru_records[i], i < max_nr_fru; i++)
173 static inline u32
get_fmp_len(struct fru_rec
*rec
)
175 return rec
->sec_desc
.section_length
- sizeof(struct cper_section_descriptor
);
178 static struct fru_rec
*get_fru_record(u64 fru_id
)
183 for_each_fru(i
, rec
) {
184 if (rec
->fmp
.fru_id
== fru_id
)
188 pr_debug("Record not found for FRU 0x%016llx\n", fru_id
);
194 * Sum up all bytes within the FRU Memory Poison Section including the Memory
195 * Poison Descriptor entries.
197 * Don't include the old checksum here. It's a u32 value, so summing each of its
198 * bytes will give the wrong total.
200 static u32
do_fmp_checksum(struct cper_sec_fru_mem_poison
*fmp
, u32 len
)
205 /* Skip old checksum. */
206 buf
= (u8
*)fmp
+ sizeof(u32
);
210 checksum
+= (u8
)(*(buf
++));
215 static int update_record_on_storage(struct fru_rec
*rec
)
220 /* Calculate a new checksum. */
221 len
= get_fmp_len(rec
);
223 /* Get the current total. */
224 checksum
= do_fmp_checksum(&rec
->fmp
, len
);
226 /* Use the complement value. */
227 rec
->fmp
.checksum
= -checksum
;
229 pr_debug("Writing to storage\n");
231 ret
= erst_write(&rec
->hdr
);
233 pr_warn("Storage update failed for FRU 0x%016llx\n", rec
->fmp
.fru_id
);
236 pr_warn("Not enough space on storage\n");
242 static bool rec_has_valid_entries(struct fru_rec
*rec
)
244 if (!(rec
->fmp
.validation_bits
& FMP_VALID_LIST_ENTRIES
))
247 if (!(rec
->fmp
.validation_bits
& FMP_VALID_LIST
))
253 static bool fpds_equal(struct cper_fru_poison_desc
*old
, struct cper_fru_poison_desc
*new)
256 * Ignore timestamp field.
257 * The same physical error may be reported multiple times due to stuck bits, etc.
259 * Also, order the checks from most->least likely to fail to shortcut the code.
261 if (old
->addr
!= new->addr
)
264 if (old
->hw_id
!= new->hw_id
)
267 if (old
->addr_type
!= new->addr_type
)
270 if (old
->hw_id_type
!= new->hw_id_type
)
276 static bool rec_has_fpd(struct fru_rec
*rec
, struct cper_fru_poison_desc
*fpd
)
280 for (i
= 0; i
< rec
->fmp
.nr_entries
; i
++) {
281 struct cper_fru_poison_desc
*fpd_i
= &rec
->entries
[i
];
283 if (fpds_equal(fpd_i
, fpd
)) {
284 pr_debug("Found duplicate record\n");
292 static void save_spa(struct fru_rec
*rec
, unsigned int entry
,
293 u64 addr
, u64 id
, unsigned int cpu
)
295 unsigned int i
, fru_idx
, spa_entry
;
296 struct atl_err a_err
;
299 if (entry
>= max_nr_entries
) {
300 pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n",
301 entry
, max_nr_entries
);
305 /* spa_nr_entries is always multiple of max_nr_entries */
306 for (i
= 0; i
< spa_nr_entries
; i
+= max_nr_entries
) {
307 fru_idx
= i
/ max_nr_entries
;
308 if (fru_records
[fru_idx
] == rec
)
312 if (i
>= spa_nr_entries
) {
313 pr_warn_once("FRU record %d not found\n", i
);
317 spa_entry
= i
+ entry
;
318 if (spa_entry
>= spa_nr_entries
) {
319 pr_warn_once("spa_entries[] index out-of-bounds\n");
323 memset(&a_err
, 0, sizeof(struct atl_err
));
329 spa
= amd_convert_umc_mca_addr_to_sys_addr(&a_err
);
330 if (IS_ERR_VALUE(spa
)) {
331 pr_debug("Failed to get system address\n");
335 spa_entries
[spa_entry
] = spa
;
336 pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
337 fru_idx
, entry
, spa_entry
, spa_entries
[spa_entry
]);
340 static void update_fru_record(struct fru_rec
*rec
, struct mce
*m
)
342 struct cper_sec_fru_mem_poison
*fmp
= &rec
->fmp
;
343 struct cper_fru_poison_desc fpd
, *fpd_dest
;
346 mutex_lock(&fmpm_update_mutex
);
348 memset(&fpd
, 0, sizeof(struct cper_fru_poison_desc
));
350 fpd
.timestamp
= m
->time
;
351 fpd
.hw_id_type
= FPD_HW_ID_TYPE_MCA_IPID
;
353 fpd
.addr_type
= FPD_ADDR_TYPE_MCA_ADDR
;
356 /* This is the first entry, so just save it. */
357 if (!rec_has_valid_entries(rec
))
360 /* Ignore already recorded errors. */
361 if (rec_has_fpd(rec
, &fpd
))
364 if (rec
->fmp
.nr_entries
>= max_nr_entries
) {
365 pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec
->fmp
.fru_id
);
369 entry
= fmp
->nr_entries
;
372 save_spa(rec
, entry
, m
->addr
, m
->ipid
, m
->extcpu
);
373 fpd_dest
= &rec
->entries
[entry
];
374 memcpy(fpd_dest
, &fpd
, sizeof(struct cper_fru_poison_desc
));
376 fmp
->nr_entries
= entry
+ 1;
377 fmp
->validation_bits
|= FMP_VALID_LIST_ENTRIES
;
378 fmp
->validation_bits
|= FMP_VALID_LIST
;
380 pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp
->fru_id
, entry
);
382 update_record_on_storage(rec
);
385 mutex_unlock(&fmpm_update_mutex
);
388 static void retire_dram_row(u64 addr
, u64 id
, u32 cpu
)
390 struct atl_err a_err
;
392 memset(&a_err
, 0, sizeof(struct atl_err
));
398 amd_retire_dram_row(&a_err
);
401 static int fru_handle_mem_poison(struct notifier_block
*nb
, unsigned long val
, void *data
)
403 struct mce
*m
= (struct mce
*)data
;
406 if (!mce_is_memory_error(m
))
409 retire_dram_row(m
->addr
, m
->ipid
, m
->extcpu
);
412 * An invalid FRU ID should not happen on real errors. But it
413 * could happen from software error injection, etc.
415 rec
= get_fru_record(m
->ppin
);
419 update_fru_record(rec
, m
);
424 static struct notifier_block fru_mem_poison_nb
= {
425 .notifier_call
= fru_handle_mem_poison
,
426 .priority
= MCE_PRIO_LOWEST
,
429 static void retire_mem_fmp(struct fru_rec
*rec
)
431 struct cper_sec_fru_mem_poison
*fmp
= &rec
->fmp
;
434 for (i
= 0; i
< fmp
->nr_entries
; i
++) {
435 struct cper_fru_poison_desc
*fpd
= &rec
->entries
[i
];
436 unsigned int err_cpu
= INVALID_CPU
;
438 if (fpd
->hw_id_type
!= FPD_HW_ID_TYPE_MCA_IPID
)
441 if (fpd
->addr_type
!= FPD_ADDR_TYPE_MCA_ADDR
)
445 for_each_online_cpu(cpu
) {
446 if (topology_ppin(cpu
) == fmp
->fru_id
) {
453 if (err_cpu
== INVALID_CPU
)
456 retire_dram_row(fpd
->addr
, fpd
->hw_id
, err_cpu
);
457 save_spa(rec
, i
, fpd
->addr
, fpd
->hw_id
, err_cpu
);
461 static void retire_mem_records(void)
466 for_each_fru(i
, rec
) {
467 if (!rec_has_valid_entries(rec
))
474 /* Set the CPER Record Header and CPER Section Descriptor fields. */
475 static void set_rec_fields(struct fru_rec
*rec
)
477 struct cper_section_descriptor
*sec_desc
= &rec
->sec_desc
;
478 struct cper_record_header
*hdr
= &rec
->hdr
;
481 * This is a saved record created with fewer max_nr_entries.
482 * Update the record lengths and keep everything else as-is.
484 if (hdr
->record_length
&& hdr
->record_length
< max_rec_len
) {
485 pr_debug("Growing record 0x%016llx from %u to %zu bytes\n",
486 hdr
->record_id
, hdr
->record_length
, max_rec_len
);
490 memcpy(hdr
->signature
, CPER_SIG_RECORD
, CPER_SIG_SIZE
);
491 hdr
->revision
= CPER_RECORD_REV
;
492 hdr
->signature_end
= CPER_SIG_END
;
495 * Currently, it is assumed that there is one FRU Memory Poison
496 * section per CPER. But this may change for other implementations.
498 hdr
->section_count
= 1;
500 /* The logged errors are recoverable. Otherwise, they'd never make it here. */
501 hdr
->error_severity
= CPER_SEV_RECOVERABLE
;
503 hdr
->validation_bits
= 0;
504 hdr
->creator_id
= CPER_CREATOR_FMP
;
505 hdr
->notification_type
= CPER_NOTIFY_MCE
;
506 hdr
->record_id
= cper_next_record_id();
507 hdr
->flags
= CPER_HW_ERROR_FLAGS_PREVERR
;
509 sec_desc
->section_offset
= sizeof(struct cper_record_header
);
510 sec_desc
->revision
= CPER_SEC_REV
;
511 sec_desc
->validation_bits
= 0;
512 sec_desc
->flags
= CPER_SEC_PRIMARY
;
513 sec_desc
->section_type
= CPER_SECTION_TYPE_FMP
;
514 sec_desc
->section_severity
= CPER_SEV_RECOVERABLE
;
517 hdr
->record_length
= max_rec_len
;
518 sec_desc
->section_length
= max_rec_len
- sizeof(struct cper_record_header
);
521 static int save_new_records(void)
523 DECLARE_BITMAP(new_records
, FMPM_MAX_NR_FRU
);
528 for_each_fru(i
, rec
) {
529 /* No need to update saved records that match the current record size. */
530 if (rec
->hdr
.record_length
== max_rec_len
)
533 if (!rec
->hdr
.record_length
)
534 set_bit(i
, new_records
);
538 ret
= update_record_on_storage(rec
);
546 for_each_fru(i
, rec
) {
547 if (!test_bit(i
, new_records
))
550 erst_clear(rec
->hdr
.record_id
);
556 /* Check that the record matches expected types for the current system.*/
557 static bool fmp_is_usable(struct fru_rec
*rec
)
559 struct cper_sec_fru_mem_poison
*fmp
= &rec
->fmp
;
562 pr_debug("Validation bits: 0x%016llx\n", fmp
->validation_bits
);
564 if (!(fmp
->validation_bits
& FMP_VALID_ARCH_TYPE
)) {
565 pr_debug("Arch type unknown\n");
569 if (fmp
->fru_arch_type
!= FMP_ARCH_TYPE_X86_CPUID_1_EAX
) {
570 pr_debug("Arch type not 'x86 Family/Model/Stepping'\n");
574 if (!(fmp
->validation_bits
& FMP_VALID_ARCH
)) {
575 pr_debug("Arch value unknown\n");
579 cpuid
= cpuid_eax(1);
580 if (fmp
->fru_arch
!= cpuid
) {
581 pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n",
582 fmp
->fru_arch
, cpuid
);
586 if (!(fmp
->validation_bits
& FMP_VALID_ID_TYPE
)) {
587 pr_debug("FRU ID type unknown\n");
591 if (fmp
->fru_id_type
!= FMP_ID_TYPE_X86_PPIN
) {
592 pr_debug("FRU ID type is not 'x86 PPIN'\n");
596 if (!(fmp
->validation_bits
& FMP_VALID_ID
)) {
597 pr_debug("FRU ID value unknown\n");
604 static bool fmp_is_valid(struct fru_rec
*rec
)
606 struct cper_sec_fru_mem_poison
*fmp
= &rec
->fmp
;
609 len
= get_fmp_len(rec
);
610 if (len
< sizeof(struct cper_sec_fru_mem_poison
)) {
611 pr_debug("fmp length is too small\n");
615 /* Checksum must sum to zero for the entire section. */
616 checksum
= do_fmp_checksum(fmp
, len
) + fmp
->checksum
;
618 pr_debug("fmp checksum failed: sum = 0x%x\n", checksum
);
619 print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE
, 16, 1, fmp
, len
, false);
623 if (!fmp_is_usable(rec
))
629 static struct fru_rec
*get_valid_record(struct fru_rec
*old
)
633 if (!fmp_is_valid(old
)) {
634 pr_debug("Ignoring invalid record\n");
638 new = get_fru_record(old
->fmp
.fru_id
);
640 pr_debug("Ignoring record for absent FRU\n");
646 * Fetch saved records from persistent storage.
648 * For each found record:
649 * - If it was not created by this module, then ignore it.
650 * - If it is valid, then copy its data to the local cache.
651 * - If it is not valid, then erase it.
653 static int get_saved_records(void)
655 struct fru_rec
*old
, *new;
660 old
= kmalloc(FMPM_MAX_REC_LEN
, GFP_KERNEL
);
666 ret
= erst_get_record_id_begin(&pos
);
670 while (!erst_get_record_id_next(&pos
, &record_id
)) {
671 if (record_id
== APEI_ERST_INVALID_RECORD_ID
)
674 * Make sure to clear temporary buffer between reads to avoid
675 * leftover data from records of various sizes.
677 memset(old
, 0, FMPM_MAX_REC_LEN
);
679 len
= erst_read_record(record_id
, &old
->hdr
, FMPM_MAX_REC_LEN
,
680 sizeof(struct fru_rec
), &CPER_CREATOR_FMP
);
684 new = get_valid_record(old
);
686 erst_clear(record_id
);
690 if (len
> max_rec_len
) {
691 unsigned int saved_nr_entries
;
693 saved_nr_entries
= len
- sizeof(struct fru_rec
);
694 saved_nr_entries
/= sizeof(struct cper_fru_poison_desc
);
696 pr_warn("Saved record found with %u entries.\n", saved_nr_entries
);
697 pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries
);
703 /* Restore the record */
704 memcpy(new, old
, len
);
708 erst_get_record_id_end();
714 static void set_fmp_fields(struct fru_rec
*rec
, unsigned int cpu
)
716 struct cper_sec_fru_mem_poison
*fmp
= &rec
->fmp
;
718 fmp
->fru_arch_type
= FMP_ARCH_TYPE_X86_CPUID_1_EAX
;
719 fmp
->validation_bits
|= FMP_VALID_ARCH_TYPE
;
721 /* Assume all CPUs in the system have the same value for now. */
722 fmp
->fru_arch
= cpuid_eax(1);
723 fmp
->validation_bits
|= FMP_VALID_ARCH
;
725 fmp
->fru_id_type
= FMP_ID_TYPE_X86_PPIN
;
726 fmp
->validation_bits
|= FMP_VALID_ID_TYPE
;
728 fmp
->fru_id
= topology_ppin(cpu
);
729 fmp
->validation_bits
|= FMP_VALID_ID
;
732 static int init_fmps(void)
738 for_each_fru(i
, rec
) {
739 unsigned int fru_cpu
= INVALID_CPU
;
742 for_each_online_cpu(cpu
) {
743 if (topology_physical_package_id(cpu
) == i
) {
750 if (fru_cpu
== INVALID_CPU
) {
751 pr_debug("Failed to find matching CPU for FRU #%u\n", i
);
756 set_fmp_fields(rec
, fru_cpu
);
762 static int get_system_info(void)
764 /* Only load on MI300A systems for now. */
765 if (!(boot_cpu_data
.x86_model
>= 0x90 &&
766 boot_cpu_data
.x86_model
<= 0x9f))
769 if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN
)) {
770 pr_debug("PPIN feature not available\n");
774 /* Use CPU socket as FRU for MI300 systems. */
775 max_nr_fru
= topology_max_packages();
779 if (max_nr_fru
> FMPM_MAX_NR_FRU
) {
780 pr_warn("Too many FRUs to manage: found: %u, max: %u\n",
781 max_nr_fru
, FMPM_MAX_NR_FRU
);
786 max_nr_entries
= FMPM_DEFAULT_MAX_NR_ENTRIES
;
788 spa_nr_entries
= max_nr_fru
* max_nr_entries
;
790 max_rec_len
= sizeof(struct fru_rec
);
791 max_rec_len
+= sizeof(struct cper_fru_poison_desc
) * max_nr_entries
;
793 pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n",
794 max_nr_fru
, max_nr_entries
, max_rec_len
);
799 static void free_records(void)
811 static int allocate_records(void)
815 fru_records
= kcalloc(max_nr_fru
, sizeof(struct fru_rec
*), GFP_KERNEL
);
821 for (i
= 0; i
< max_nr_fru
; i
++) {
822 fru_records
[i
] = kzalloc(max_rec_len
, GFP_KERNEL
);
823 if (!fru_records
[i
]) {
829 spa_entries
= kcalloc(spa_nr_entries
, sizeof(u64
), GFP_KERNEL
);
835 for (i
= 0; i
< spa_nr_entries
; i
++)
836 spa_entries
[i
] = INVALID_SPA
;
842 kfree(fru_records
[i
]);
849 static void *fmpm_start(struct seq_file
*f
, loff_t
*pos
)
851 if (*pos
>= (spa_nr_entries
+ 1))
856 static void *fmpm_next(struct seq_file
*f
, void *data
, loff_t
*pos
)
858 if (++(*pos
) >= (spa_nr_entries
+ 1))
863 static void fmpm_stop(struct seq_file
*f
, void *data
)
867 #define SHORT_WIDTH 8
869 #define TIMESTAMP_WIDTH 19
870 #define LONG_WIDTH 24
871 #define U64_PAD (LONG_WIDTH - U64_WIDTH)
872 #define TS_PAD (LONG_WIDTH - TIMESTAMP_WIDTH)
873 static int fmpm_show(struct seq_file
*f
, void *data
)
875 unsigned int fru_idx
, entry
, spa_entry
, line
;
876 struct cper_fru_poison_desc
*fpd
;
879 line
= *(loff_t
*)data
;
881 seq_printf(f
, "%-*s", SHORT_WIDTH
, "fru_idx");
882 seq_printf(f
, "%-*s", LONG_WIDTH
, "fru_id");
883 seq_printf(f
, "%-*s", SHORT_WIDTH
, "entry");
884 seq_printf(f
, "%-*s", LONG_WIDTH
, "timestamp");
885 seq_printf(f
, "%-*s", LONG_WIDTH
, "hw_id");
886 seq_printf(f
, "%-*s", LONG_WIDTH
, "addr");
887 seq_printf(f
, "%-*s", LONG_WIDTH
, "spa");
891 spa_entry
= line
- 1;
892 fru_idx
= spa_entry
/ max_nr_entries
;
893 entry
= spa_entry
% max_nr_entries
;
895 rec
= fru_records
[fru_idx
];
899 seq_printf(f
, "%-*u", SHORT_WIDTH
, fru_idx
);
900 seq_printf(f
, "0x%016llx%-*s", rec
->fmp
.fru_id
, U64_PAD
, "");
901 seq_printf(f
, "%-*u", SHORT_WIDTH
, entry
);
903 mutex_lock(&fmpm_update_mutex
);
905 if (entry
>= rec
->fmp
.nr_entries
) {
906 seq_printf(f
, "%-*s", LONG_WIDTH
, "*");
907 seq_printf(f
, "%-*s", LONG_WIDTH
, "*");
908 seq_printf(f
, "%-*s", LONG_WIDTH
, "*");
909 seq_printf(f
, "%-*s", LONG_WIDTH
, "*");
913 fpd
= &rec
->entries
[entry
];
915 seq_printf(f
, "%ptT%-*s", &fpd
->timestamp
, TS_PAD
, "");
916 seq_printf(f
, "0x%016llx%-*s", fpd
->hw_id
, U64_PAD
, "");
917 seq_printf(f
, "0x%016llx%-*s", fpd
->addr
, U64_PAD
, "");
919 if (spa_entries
[spa_entry
] == INVALID_SPA
)
920 seq_printf(f
, "%-*s", LONG_WIDTH
, "*");
922 seq_printf(f
, "0x%016llx%-*s", spa_entries
[spa_entry
], U64_PAD
, "");
925 mutex_unlock(&fmpm_update_mutex
);
932 static const struct seq_operations fmpm_seq_ops
= {
939 static int fmpm_open(struct inode
*inode
, struct file
*file
)
941 return seq_open(file
, &fmpm_seq_ops
);
944 static const struct file_operations fmpm_fops
= {
946 .release
= seq_release
,
951 static void setup_debugfs(void)
953 struct dentry
*dfs
= ras_get_debugfs_root();
958 fmpm_dfs_dir
= debugfs_create_dir("fmpm", dfs
);
962 fmpm_dfs_entries
= debugfs_create_file("entries", 0400, fmpm_dfs_dir
, NULL
, &fmpm_fops
);
963 if (!fmpm_dfs_entries
)
964 debugfs_remove(fmpm_dfs_dir
);
967 static const struct x86_cpu_id fmpm_cpuids
[] = {
968 X86_MATCH_VENDOR_FAM(AMD
, 0x19, NULL
),
971 MODULE_DEVICE_TABLE(x86cpu
, fmpm_cpuids
);
973 static int __init
fru_mem_poison_init(void)
977 if (!x86_match_cpu(fmpm_cpuids
)) {
983 pr_debug("ERST not available\n");
988 ret
= get_system_info();
992 ret
= allocate_records();
1000 ret
= get_saved_records();
1004 ret
= save_new_records();
1010 retire_mem_records();
1012 mce_register_decode_chain(&fru_mem_poison_nb
);
1014 pr_info("FRU Memory Poison Manager initialized\n");
1023 static void __exit
fru_mem_poison_exit(void)
1025 mce_unregister_decode_chain(&fru_mem_poison_nb
);
1026 debugfs_remove(fmpm_dfs_dir
);
1030 module_init(fru_mem_poison_init
);
1031 module_exit(fru_mem_poison_exit
);
1033 MODULE_LICENSE("GPL");
1034 MODULE_DESCRIPTION("FRU Memory Poison Manager");