1 /******************************************************************************
3 * Driver for receiving and transferring machine check error infomation
5 * Copyright (c) 2012 Intel Corporation
6 * Author: Liu, Jinsong <jinsong.liu@intel.com>
7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
8 * Author: Ke, Liping <liping.ke@intel.com>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 #define pr_fmt(fmt) "xen_mcelog: " fmt
37 #include <linux/init.h>
38 #include <linux/types.h>
39 #include <linux/kernel.h>
40 #include <linux/slab.h>
42 #include <linux/device.h>
43 #include <linux/miscdevice.h>
44 #include <linux/uaccess.h>
45 #include <linux/capability.h>
46 #include <linux/poll.h>
47 #include <linux/sched.h>
49 #include <xen/interface/xen.h>
50 #include <xen/events.h>
51 #include <xen/interface/vcpu.h>
53 #include <asm/xen/hypercall.h>
54 #include <asm/xen/hypervisor.h>
56 static struct mc_info g_mi
;
57 static struct mcinfo_logical_cpu
*g_physinfo
;
58 static uint32_t ncpus
;
60 static DEFINE_MUTEX(mcelog_lock
);
62 static struct xen_mce_log xen_mcelog
= {
63 .signature
= XEN_MCE_LOG_SIGNATURE
,
64 .len
= XEN_MCE_LOG_LEN
,
65 .recordlen
= sizeof(struct xen_mce
),
68 static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock
);
69 static int xen_mce_chrdev_open_count
; /* #times opened */
70 static int xen_mce_chrdev_open_exclu
; /* already open exclusive? */
72 static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait
);
74 static int xen_mce_chrdev_open(struct inode
*inode
, struct file
*file
)
76 spin_lock(&xen_mce_chrdev_state_lock
);
78 if (xen_mce_chrdev_open_exclu
||
79 (xen_mce_chrdev_open_count
&& (file
->f_flags
& O_EXCL
))) {
80 spin_unlock(&xen_mce_chrdev_state_lock
);
85 if (file
->f_flags
& O_EXCL
)
86 xen_mce_chrdev_open_exclu
= 1;
87 xen_mce_chrdev_open_count
++;
89 spin_unlock(&xen_mce_chrdev_state_lock
);
91 return nonseekable_open(inode
, file
);
94 static int xen_mce_chrdev_release(struct inode
*inode
, struct file
*file
)
96 spin_lock(&xen_mce_chrdev_state_lock
);
98 xen_mce_chrdev_open_count
--;
99 xen_mce_chrdev_open_exclu
= 0;
101 spin_unlock(&xen_mce_chrdev_state_lock
);
106 static ssize_t
xen_mce_chrdev_read(struct file
*filp
, char __user
*ubuf
,
107 size_t usize
, loff_t
*off
)
109 char __user
*buf
= ubuf
;
113 mutex_lock(&mcelog_lock
);
115 num
= xen_mcelog
.next
;
117 /* Only supports full reads right now */
119 if (*off
!= 0 || usize
< XEN_MCE_LOG_LEN
*sizeof(struct xen_mce
))
123 for (i
= 0; i
< num
; i
++) {
124 struct xen_mce
*m
= &xen_mcelog
.entry
[i
];
126 err
|= copy_to_user(buf
, m
, sizeof(*m
));
130 memset(xen_mcelog
.entry
, 0, num
* sizeof(struct xen_mce
));
137 mutex_unlock(&mcelog_lock
);
139 return err
? err
: buf
- ubuf
;
142 static __poll_t
xen_mce_chrdev_poll(struct file
*file
, poll_table
*wait
)
144 poll_wait(file
, &xen_mce_chrdev_wait
, wait
);
147 return EPOLLIN
| EPOLLRDNORM
;
152 static long xen_mce_chrdev_ioctl(struct file
*f
, unsigned int cmd
,
155 int __user
*p
= (int __user
*)arg
;
157 if (!capable(CAP_SYS_ADMIN
))
161 case MCE_GET_RECORD_LEN
:
162 return put_user(sizeof(struct xen_mce
), p
);
163 case MCE_GET_LOG_LEN
:
164 return put_user(XEN_MCE_LOG_LEN
, p
);
165 case MCE_GETCLEAR_FLAGS
: {
169 flags
= xen_mcelog
.flags
;
170 } while (cmpxchg(&xen_mcelog
.flags
, flags
, 0) != flags
);
172 return put_user(flags
, p
);
179 static const struct file_operations xen_mce_chrdev_ops
= {
180 .open
= xen_mce_chrdev_open
,
181 .release
= xen_mce_chrdev_release
,
182 .read
= xen_mce_chrdev_read
,
183 .poll
= xen_mce_chrdev_poll
,
184 .unlocked_ioctl
= xen_mce_chrdev_ioctl
,
187 static struct miscdevice xen_mce_chrdev_device
= {
194 * Caller should hold the mcelog_lock
196 static void xen_mce_log(struct xen_mce
*mce
)
200 entry
= xen_mcelog
.next
;
203 * When the buffer fills up discard new entries.
204 * Assume that the earlier errors are the more
207 if (entry
>= XEN_MCE_LOG_LEN
) {
208 set_bit(XEN_MCE_OVERFLOW
,
209 (unsigned long *)&xen_mcelog
.flags
);
213 memcpy(xen_mcelog
.entry
+ entry
, mce
, sizeof(struct xen_mce
));
218 static int convert_log(struct mc_info
*mi
)
220 struct mcinfo_common
*mic
;
221 struct mcinfo_global
*mc_global
;
222 struct mcinfo_bank
*mc_bank
;
227 x86_mcinfo_lookup(&mic
, mi
, MC_TYPE_GLOBAL
);
228 if (unlikely(!mic
)) {
229 pr_warn("Failed to find global error info\n");
233 memset(&m
, 0, sizeof(struct xen_mce
));
235 mc_global
= (struct mcinfo_global
*)mic
;
236 m
.mcgstatus
= mc_global
->mc_gstatus
;
237 m
.apicid
= mc_global
->mc_apicid
;
239 for (i
= 0; i
< ncpus
; i
++)
240 if (g_physinfo
[i
].mc_apicid
== m
.apicid
)
242 if (unlikely(i
== ncpus
)) {
243 pr_warn("Failed to match cpu with apicid %d\n", m
.apicid
);
247 m
.socketid
= g_physinfo
[i
].mc_chipid
;
248 m
.cpu
= m
.extcpu
= g_physinfo
[i
].mc_cpunr
;
249 m
.cpuvendor
= (__u8
)g_physinfo
[i
].mc_vendor
;
250 for (j
= 0; j
< g_physinfo
[i
].mc_nmsrvals
; ++j
)
251 switch (g_physinfo
[i
].mc_msrvalues
[j
].reg
) {
252 case MSR_IA32_MCG_CAP
:
253 m
.mcgcap
= g_physinfo
[i
].mc_msrvalues
[j
].value
;
258 m
.ppin
= g_physinfo
[i
].mc_msrvalues
[j
].value
;
263 x86_mcinfo_lookup(&mic
, mi
, MC_TYPE_BANK
);
264 if (unlikely(!mic
)) {
265 pr_warn("Fail to find bank error info\n");
270 if ((!mic
) || (mic
->size
== 0) ||
271 (mic
->type
!= MC_TYPE_GLOBAL
&&
272 mic
->type
!= MC_TYPE_BANK
&&
273 mic
->type
!= MC_TYPE_EXTENDED
&&
274 mic
->type
!= MC_TYPE_RECOVERY
))
277 if (mic
->type
== MC_TYPE_BANK
) {
278 mc_bank
= (struct mcinfo_bank
*)mic
;
279 m
.misc
= mc_bank
->mc_misc
;
280 m
.status
= mc_bank
->mc_status
;
281 m
.addr
= mc_bank
->mc_addr
;
282 m
.tsc
= mc_bank
->mc_tsc
;
283 m
.bank
= mc_bank
->mc_bank
;
288 mic
= x86_mcinfo_next(mic
);
294 static int mc_queue_handle(uint32_t flags
)
299 mc_op
.cmd
= XEN_MC_fetch
;
300 set_xen_guest_handle(mc_op
.u
.mc_fetch
.data
, &g_mi
);
302 mc_op
.u
.mc_fetch
.flags
= flags
;
303 ret
= HYPERVISOR_mca(&mc_op
);
305 pr_err("Failed to fetch %surgent error log\n",
306 flags
== XEN_MC_URGENT
? "" : "non");
310 if (mc_op
.u
.mc_fetch
.flags
& XEN_MC_NODATA
||
311 mc_op
.u
.mc_fetch
.flags
& XEN_MC_FETCHFAILED
)
314 ret
= convert_log(&g_mi
);
316 pr_warn("Failed to convert this error log, continue acking it anyway\n");
318 mc_op
.u
.mc_fetch
.flags
= flags
| XEN_MC_ACK
;
319 ret
= HYPERVISOR_mca(&mc_op
);
321 pr_err("Failed to ack previous error log\n");
330 /* virq handler for machine check error info*/
331 static void xen_mce_work_fn(struct work_struct
*work
)
335 mutex_lock(&mcelog_lock
);
338 err
= mc_queue_handle(XEN_MC_URGENT
);
340 pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n");
342 /* nonurgent mc_info */
343 err
= mc_queue_handle(XEN_MC_NONURGENT
);
345 pr_err("Failed to handle nonurgent mc_info queue\n");
347 /* wake processes polling /dev/mcelog */
348 wake_up_interruptible(&xen_mce_chrdev_wait
);
350 mutex_unlock(&mcelog_lock
);
352 static DECLARE_WORK(xen_mce_work
, xen_mce_work_fn
);
354 static irqreturn_t
xen_mce_interrupt(int irq
, void *dev_id
)
356 schedule_work(&xen_mce_work
);
360 static int bind_virq_for_mce(void)
365 memset(&mc_op
, 0, sizeof(struct xen_mc
));
367 /* Fetch physical CPU Numbers */
368 mc_op
.cmd
= XEN_MC_physcpuinfo
;
369 set_xen_guest_handle(mc_op
.u
.mc_physcpuinfo
.info
, g_physinfo
);
370 ret
= HYPERVISOR_mca(&mc_op
);
372 pr_err("Failed to get CPU numbers\n");
376 /* Fetch each CPU Physical Info for later reference*/
377 ncpus
= mc_op
.u
.mc_physcpuinfo
.ncpus
;
378 g_physinfo
= kcalloc(ncpus
, sizeof(struct mcinfo_logical_cpu
),
382 set_xen_guest_handle(mc_op
.u
.mc_physcpuinfo
.info
, g_physinfo
);
383 ret
= HYPERVISOR_mca(&mc_op
);
385 pr_err("Failed to get CPU info\n");
390 ret
= bind_virq_to_irqhandler(VIRQ_MCA
, 0,
391 xen_mce_interrupt
, 0, "mce", NULL
);
393 pr_err("Failed to bind virq\n");
401 static int __init
xen_late_init_mcelog(void)
405 /* Only DOM0 is responsible for MCE logging */
406 if (!xen_initial_domain())
409 /* register character device /dev/mcelog for xen mcelog */
410 ret
= misc_register(&xen_mce_chrdev_device
);
414 ret
= bind_virq_for_mce();
418 pr_info("/dev/mcelog registered by Xen\n");
423 misc_deregister(&xen_mce_chrdev_device
);
426 device_initcall(xen_late_init_mcelog
);