4 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
5 * Rest from unknown author(s).
6 * 2004 Andi Kleen. Rewrote most of it.
7 * Copyright 2008 Intel Corporation
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/miscdevice.h>
14 #include <linux/slab.h>
15 #include <linux/kmod.h>
16 #include <linux/poll.h>
18 #include "mce-internal.h"
20 static BLOCKING_NOTIFIER_HEAD(mce_injector_chain
);
22 static DEFINE_MUTEX(mce_chrdev_read_mutex
);
24 static char mce_helper
[128];
25 static char *mce_helper_argv
[2] = { mce_helper
, NULL
};
28 * Lockless MCE logging infrastructure.
29 * This avoids deadlocks on printk locks without having to break locks. Also
30 * separate MCEs from kernel messages to avoid bogus bug reports.
33 static struct mce_log_buffer mcelog
= {
34 .signature
= MCE_LOG_SIGNATURE
,
36 .recordlen
= sizeof(struct mce
),
39 static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait
);
41 /* User mode helper program triggered by machine check event */
42 extern char mce_helper
[128];
44 static int dev_mce_log(struct notifier_block
*nb
, unsigned long val
,
47 struct mce
*mce
= (struct mce
*)data
;
50 mutex_lock(&mce_chrdev_read_mutex
);
55 * When the buffer fills up discard new entries. Assume that the
56 * earlier errors are the more interesting ones:
58 if (entry
>= MCE_LOG_LEN
) {
59 set_bit(MCE_OVERFLOW
, (unsigned long *)&mcelog
.flags
);
63 mcelog
.next
= entry
+ 1;
65 memcpy(mcelog
.entry
+ entry
, mce
, sizeof(struct mce
));
66 mcelog
.entry
[entry
].finished
= 1;
68 /* wake processes polling /dev/mcelog */
69 wake_up_interruptible(&mce_chrdev_wait
);
72 mutex_unlock(&mce_chrdev_read_mutex
);
77 static struct notifier_block dev_mcelog_nb
= {
78 .notifier_call
= dev_mce_log
,
79 .priority
= MCE_PRIO_MCELOG
,
82 static void mce_do_trigger(struct work_struct
*work
)
84 call_usermodehelper(mce_helper
, mce_helper_argv
, NULL
, UMH_NO_WAIT
);
87 static DECLARE_WORK(mce_trigger_work
, mce_do_trigger
);
90 void mce_work_trigger(void)
93 schedule_work(&mce_trigger_work
);
97 show_trigger(struct device
*s
, struct device_attribute
*attr
, char *buf
)
99 strcpy(buf
, mce_helper
);
101 return strlen(mce_helper
) + 1;
104 static ssize_t
set_trigger(struct device
*s
, struct device_attribute
*attr
,
105 const char *buf
, size_t siz
)
109 strncpy(mce_helper
, buf
, sizeof(mce_helper
));
110 mce_helper
[sizeof(mce_helper
)-1] = 0;
111 p
= strchr(mce_helper
, '\n');
116 return strlen(mce_helper
) + !!p
;
119 DEVICE_ATTR(trigger
, 0644, show_trigger
, set_trigger
);
122 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
125 static DEFINE_SPINLOCK(mce_chrdev_state_lock
);
126 static int mce_chrdev_open_count
; /* #times opened */
127 static int mce_chrdev_open_exclu
; /* already open exclusive? */
129 static int mce_chrdev_open(struct inode
*inode
, struct file
*file
)
131 spin_lock(&mce_chrdev_state_lock
);
133 if (mce_chrdev_open_exclu
||
134 (mce_chrdev_open_count
&& (file
->f_flags
& O_EXCL
))) {
135 spin_unlock(&mce_chrdev_state_lock
);
140 if (file
->f_flags
& O_EXCL
)
141 mce_chrdev_open_exclu
= 1;
142 mce_chrdev_open_count
++;
144 spin_unlock(&mce_chrdev_state_lock
);
146 return nonseekable_open(inode
, file
);
149 static int mce_chrdev_release(struct inode
*inode
, struct file
*file
)
151 spin_lock(&mce_chrdev_state_lock
);
153 mce_chrdev_open_count
--;
154 mce_chrdev_open_exclu
= 0;
156 spin_unlock(&mce_chrdev_state_lock
);
161 static int mce_apei_read_done
;
163 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
164 static int __mce_read_apei(char __user
**ubuf
, size_t usize
)
170 if (usize
< sizeof(struct mce
))
173 rc
= apei_read_mce(&m
, &record_id
);
174 /* Error or no more MCE record */
176 mce_apei_read_done
= 1;
178 * When ERST is disabled, mce_chrdev_read() should return
179 * "no record" instead of "no device."
186 if (copy_to_user(*ubuf
, &m
, sizeof(struct mce
)))
189 * In fact, we should have cleared the record after that has
190 * been flushed to the disk or sent to network in
191 * /sbin/mcelog, but we have no interface to support that now,
192 * so just clear it to avoid duplication.
194 rc
= apei_clear_mce(record_id
);
196 mce_apei_read_done
= 1;
199 *ubuf
+= sizeof(struct mce
);
204 static ssize_t
mce_chrdev_read(struct file
*filp
, char __user
*ubuf
,
205 size_t usize
, loff_t
*off
)
207 char __user
*buf
= ubuf
;
211 mutex_lock(&mce_chrdev_read_mutex
);
213 if (!mce_apei_read_done
) {
214 err
= __mce_read_apei(&buf
, usize
);
215 if (err
|| buf
!= ubuf
)
219 /* Only supports full reads right now */
221 if (*off
!= 0 || usize
< MCE_LOG_LEN
*sizeof(struct mce
))
227 for (i
= 0; i
< next
; i
++) {
228 struct mce
*m
= &mcelog
.entry
[i
];
230 err
|= copy_to_user(buf
, m
, sizeof(*m
));
234 memset(mcelog
.entry
, 0, next
* sizeof(struct mce
));
241 mutex_unlock(&mce_chrdev_read_mutex
);
243 return err
? err
: buf
- ubuf
;
246 static __poll_t
mce_chrdev_poll(struct file
*file
, poll_table
*wait
)
248 poll_wait(file
, &mce_chrdev_wait
, wait
);
249 if (READ_ONCE(mcelog
.next
))
250 return EPOLLIN
| EPOLLRDNORM
;
251 if (!mce_apei_read_done
&& apei_check_mce())
252 return EPOLLIN
| EPOLLRDNORM
;
256 static long mce_chrdev_ioctl(struct file
*f
, unsigned int cmd
,
259 int __user
*p
= (int __user
*)arg
;
261 if (!capable(CAP_SYS_ADMIN
))
265 case MCE_GET_RECORD_LEN
:
266 return put_user(sizeof(struct mce
), p
);
267 case MCE_GET_LOG_LEN
:
268 return put_user(MCE_LOG_LEN
, p
);
269 case MCE_GETCLEAR_FLAGS
: {
273 flags
= mcelog
.flags
;
274 } while (cmpxchg(&mcelog
.flags
, flags
, 0) != flags
);
276 return put_user(flags
, p
);
283 void mce_register_injector_chain(struct notifier_block
*nb
)
285 blocking_notifier_chain_register(&mce_injector_chain
, nb
);
287 EXPORT_SYMBOL_GPL(mce_register_injector_chain
);
289 void mce_unregister_injector_chain(struct notifier_block
*nb
)
291 blocking_notifier_chain_unregister(&mce_injector_chain
, nb
);
293 EXPORT_SYMBOL_GPL(mce_unregister_injector_chain
);
295 static ssize_t
mce_chrdev_write(struct file
*filp
, const char __user
*ubuf
,
296 size_t usize
, loff_t
*off
)
300 if (!capable(CAP_SYS_ADMIN
))
303 * There are some cases where real MSR reads could slip
306 if (!boot_cpu_has(X86_FEATURE_MCE
) || !boot_cpu_has(X86_FEATURE_MCA
))
309 if ((unsigned long)usize
> sizeof(struct mce
))
310 usize
= sizeof(struct mce
);
311 if (copy_from_user(&m
, ubuf
, usize
))
314 if (m
.extcpu
>= num_possible_cpus() || !cpu_online(m
.extcpu
))
318 * Need to give user space some time to set everything up,
319 * so do it a jiffie or two later everywhere.
323 blocking_notifier_call_chain(&mce_injector_chain
, 0, &m
);
328 static const struct file_operations mce_chrdev_ops
= {
329 .open
= mce_chrdev_open
,
330 .release
= mce_chrdev_release
,
331 .read
= mce_chrdev_read
,
332 .write
= mce_chrdev_write
,
333 .poll
= mce_chrdev_poll
,
334 .unlocked_ioctl
= mce_chrdev_ioctl
,
338 static struct miscdevice mce_chrdev_device
= {
344 static __init
int dev_mcelog_init_device(void)
348 /* register character device /dev/mcelog */
349 err
= misc_register(&mce_chrdev_device
);
352 /* Xen dom0 might have registered the device already. */
353 pr_info("Unable to init device /dev/mcelog, already registered");
355 pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err
);
360 mce_register_decode_chain(&dev_mcelog_nb
);
363 device_initcall_sync(dev_mcelog_init_device
);