4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 * Copyright (c) 2010, Intel Corporation.
28 * All rights reserved.
32 * Native MCA polling. We establish an ommipresent cyclic to fire on all
33 * online cpus to check their MCA state and log any valid errors for
37 #include <sys/types.h>
38 #include <sys/atomic.h>
39 #include <sys/cyclic.h>
40 #include <sys/x86_archext.h>
41 #include <sys/mca_x86.h>
45 hrtime_t gcpu_mca_poll_interval
= NANOSEC
* 10ULL; /* tuneable */
46 static cyclic_id_t gcpu_mca_poll_cycid
;
47 static volatile uint_t gcpu_mca_poll_inits
;
48 extern int gcpu_poll_trace_always
;
49 extern uint_t gcpu_poll_trace_nent
;
52 * Return nonzero of the given handle should poll the MCH. We stick with
53 * the same handle as before unless the timestamp has not been updated
54 * for a while. There is no need to keep a hold on the mch_poll_owner
58 static kmutex_t mch_poll_lock
;
59 static hrtime_t mch_poll_timestamp
;
60 static cmi_hdl_t mch_poll_owner
;
63 mch_pollowner(cmi_hdl_t hdl
)
65 hrtime_t now
= gethrtime_waitfree();
68 mutex_enter(&mch_poll_lock
);
69 if (now
- mch_poll_timestamp
> 2 * gcpu_mca_poll_interval
||
70 mch_poll_timestamp
== 0) {
73 } else if (mch_poll_owner
== hdl
) {
78 mch_poll_timestamp
= now
;
80 mutex_exit(&mch_poll_lock
);
86 gcpu_ntv_mca_poll(cmi_hdl_t hdl
, int what
)
88 gcpu_data_t
*gcpu
= cmi_hdl_getcmidata(hdl
);
89 gcpu_mca_t
*mca
= &gcpu
->gcpu_mca
;
90 gcpu_mce_status_t mce
;
94 ASSERT(MUTEX_HELD(&gcpu
->gcpu_shared
->gcpus_poll_lock
));
96 /* Enable CMCI in first poll if is supported */
97 if (cmi_enable_cmci
&& (!mca
->gcpu_mca_first_poll_cmci_enabled
)) {
101 for (i
= 0; i
< mca
->gcpu_mca_nbanks
; i
++) {
102 if (mca
->gcpu_bank_cmci
[i
].cmci_cap
) {
103 (void) cmi_hdl_rdmsr(hdl
, IA32_MSR_MC_CTL2(i
),
105 ctl2
|= MSR_MC_CTL2_EN
;
106 (void) cmi_hdl_wrmsr(hdl
, IA32_MSR_MC_CTL2(i
),
108 mca
->gcpu_bank_cmci
[i
].cmci_enabled
= 1;
111 mca
->gcpu_mca_first_poll_cmci_enabled
= 1;
114 if (mca
->gcpu_mca_flags
& GCPU_MCA_F_UNFAULTING
) {
117 mca
->gcpu_mca_flags
&= ~GCPU_MCA_F_UNFAULTING
;
118 gcpu_poll_trace(&gcpu
->gcpu_mca
.gcpu_polltrace
,
119 GCPU_MPT_WHAT_UNFAULTING
, 0);
122 * On the first cyclic poll after unfaulting a CPU we
123 * clear the status registers; see gcpu_faulted_exit
124 * for details. We don't do this if the poll was
125 * initiated manually (presumably from some injection
128 if (what
== GCPU_MPT_WHAT_CYC_ERR
) {
129 for (i
= 0; i
< mca
->gcpu_mca_nbanks
; i
++) {
130 (void) cmi_hdl_wrmsr(hdl
,
131 IA32_MSR_MC(i
, STATUS
), 0ULL);
138 * Logout errors of the MCA banks of this cpu.
140 if (what
== GCPU_MPT_WHAT_CMCI_ERR
) {
142 * for CMCI, all banks should be scanned for log out
146 bankmask
= cms_poll_ownermask(hdl
, gcpu_mca_poll_interval
);
148 gcpu_mca_logout(hdl
, NULL
, bankmask
, &mce
, B_TRUE
, what
);
150 if (mce
.mce_nerr
!= 0)
151 gcpu_poll_trace(&gcpu
->gcpu_mca
.gcpu_polltrace
, what
,
154 mca
->gcpu_mca_lastpoll
= gethrtime_waitfree();
156 willpanic
= mce
.mce_disp
& CMI_ERRDISP_FORCEFATAL
&& cmi_panic_on_ue();
158 if (what
!= GCPU_MPT_WHAT_CMCI_ERR
) {
160 * Call to the memory-controller driver which may report some
161 * errors not visible under the MCA (for off-chip NB).
162 * Since there is typically a single MCH we arrange that
163 * just one cpu perform this task at each cyclic fire.
165 if (mch_pollowner(hdl
))
166 cmi_mc_logout(hdl
, 0, willpanic
);
170 * In the common case any polled error is considered non-fatal,
171 * even if it indicates PCC or UC etc. The only condition on which
172 * we will panic for a polled error is if model-specific support
173 * forces the error to be terminal regardless of how it is
178 cmn_err(CE_WARN
, "MCA Poll: %u errors, disp=0x%llx, "
180 "%u UC (%u ok, %u poisoned), "
181 "%u forcefatal, %u ignored",
182 mce
.mce_nerr
, (u_longlong_t
)mce
.mce_disp
,
183 mce
.mce_npcc
, mce
.mce_npcc_ok
,
184 mce
.mce_nuc
, mce
.mce_nuc_ok
, mce
.mce_nuc_poisoned
,
185 mce
.mce_forcefatal
, mce
.mce_ignored
);
188 fm_panic("Unrecoverable Machine-Check Exception (Polled)");
193 * See gcpu_mca_trap for an explanation of why preemption is disabled here.
194 * Note that we disable preemption and then contend for an adaptive mutex -
195 * we could block during the mutex operation, but once we return with the
196 * mutex held we nust perform no operation that can block and we cannot
197 * be preempted so we will stay on cpu for the duration. The disabling
198 * of preemption also means we cannot migrate cpus once we have returned
199 * with the mutex held - cyclic invocations can't migrate, anyway, but
200 * others could if they have failed to bind before this point.
203 gcpu_ntv_mca_poll_wrapper(cmi_hdl_t hdl
, int what
)
207 if (hdl
== NULL
|| (gcpu
= cmi_hdl_getcmidata(hdl
)) == NULL
||
208 gcpu
->gcpu_mca
.gcpu_mca_lgsz
== 0)
212 mutex_enter(&gcpu
->gcpu_shared
->gcpus_poll_lock
);
213 gcpu_ntv_mca_poll(hdl
, what
);
214 mutex_exit(&gcpu
->gcpu_shared
->gcpus_poll_lock
);
219 gcpu_ntv_mca_poll_cyclic(void *arg
)
221 gcpu_ntv_mca_poll_wrapper((cmi_hdl_t
)arg
, GCPU_MPT_WHAT_CYC_ERR
);
226 gcpu_ntv_mca_poll_online(void *arg
, cpu_t
*cp
, cyc_handler_t
*cyh
,
232 * Lookup and hold a handle for this cpu (any hold released in
233 * our offline function). If we chose not to initialize a handle
234 * for this cpu back at cmi_init time then this lookup will return
235 * NULL, so the cyh_func we appoint must be prepared for that.
237 hdl
= cmi_hdl_lookup(CMI_HDL_NATIVE
, cmi_ntv_hwchipid(cp
),
238 cmi_ntv_hwcoreid(cp
), cmi_ntv_hwstrandid(cp
));
241 cyt
->cyt_interval
= gcpu_mca_poll_interval
;
242 cyh
->cyh_func
= gcpu_ntv_mca_poll_cyclic
;
243 cyh
->cyh_arg
= (void *)hdl
;
244 cyh
->cyh_level
= CY_LOW_LEVEL
;
249 gcpu_ntv_mca_poll_offline(void *arg
, cpu_t
*cpu
, void *cyh_arg
)
251 cmi_hdl_t hdl
= (cmi_hdl_t
)cyh_arg
;
258 gcpu_ntv_mca_poll_start(void)
260 cyc_omni_handler_t cyo
;
262 if (gcpu_mca_poll_interval
== 0 || gcpu_mca_poll_inits
== 0)
265 cyo
.cyo_online
= gcpu_ntv_mca_poll_online
;
266 cyo
.cyo_offline
= gcpu_ntv_mca_poll_offline
;
269 mutex_enter(&cpu_lock
);
270 gcpu_mca_poll_cycid
= cyclic_add_omni(&cyo
);
271 mutex_exit(&cpu_lock
);
275 * gcpu_mca_poll_init is called from gcpu_mca_init for each cpu handle
276 * that we initialize for. It should prepare for polling by allocating
277 * control structures and the like, but must not kick polling off yet.
281 gcpu_mca_poll_init(cmi_hdl_t hdl
)
283 gcpu_data_t
*gcpu
= cmi_hdl_getcmidata(hdl
);
284 gcpu_poll_trace_ctl_t
*ptc
= &gcpu
->gcpu_mca
.gcpu_polltrace
;
286 ASSERT(cmi_hdl_class(hdl
) == CMI_HDL_NATIVE
);
288 gcpu_poll_trace_init(ptc
);
290 atomic_inc_uint(&gcpu_mca_poll_inits
);
293 /* deconfigure gcpu_mca_poll_init() */
295 gcpu_mca_poll_fini(cmi_hdl_t hdl
)
297 gcpu_data_t
*gcpu
= cmi_hdl_getcmidata(hdl
);
298 gcpu_poll_trace_ctl_t
*ptc
= &gcpu
->gcpu_mca
.gcpu_polltrace
;
300 ASSERT(cmi_hdl_class(hdl
) == CMI_HDL_NATIVE
);
302 if (gcpu_poll_trace_always
&& (ptc
->mptc_tbufs
!= NULL
)) {
303 kmem_free(ptc
->mptc_tbufs
, sizeof (gcpu_poll_trace_t
) *
304 gcpu_poll_trace_nent
);
307 atomic_dec_uint(&gcpu_mca_poll_inits
);
311 gcpu_mca_poll_start(cmi_hdl_t hdl
)
313 ASSERT(cmi_hdl_class(hdl
) == CMI_HDL_NATIVE
);
314 gcpu_ntv_mca_poll_start();
318 gcpu_hdl_poke(cmi_hdl_t hdl
)
320 ASSERT(cmi_hdl_class(hdl
) == CMI_HDL_NATIVE
);
321 gcpu_ntv_mca_poll_wrapper(hdl
, GCPU_MPT_WHAT_POKE_ERR
);
325 gcpu_cmci_trap(cmi_hdl_t hdl
)
327 gcpu_ntv_mca_poll_wrapper(hdl
, GCPU_MPT_WHAT_CMCI_ERR
);