4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Page retirement can be an extended process due to the fact that a retirement
29 * may not be possible when the original request is made. The kernel will
30 * repeatedly attempt to retire a given page, but will not let us know when the
31 * page has been retired. We therefore have to poll to see if the retirement
32 * has been completed. This poll is implemented with a bounded exponential
33 * backoff to reduce the burden which we impose upon the system.
35 * To reduce the burden on fmd in the face of retirement storms, we schedule
36 * all retries as a group. In the simplest case, we attempt to retire a single
37 * page. When forced to retry, we initially schedule a retry at a configurable
38 * interval t. If the retry fails, we schedule another at 2 * t, and so on,
39 * until t reaches the maximum interval (also configurable). Future retries
40 * for that page will occur with t equal to the maximum interval value. We
41 * will never give up on a retirement.
43 * With multiple retirements, the situation gets slightly more complicated. As
44 * indicated above, we schedule retries as a group. We don't want to deny new
45 * pages their short retry intervals, so we'll (re)set the retry interval to the
46 * value appropriate for the newest page.
55 #include <fm/fmd_api.h>
56 #include <fm/libtopo.h>
57 #include <fm/fmd_fmri.h>
58 #include <fm/fmd_agent.h>
59 #include <sys/fm/protocol.h>
62 cma_page_free(fmd_hdl_t
*hdl
, cma_page_t
*page
)
64 nvlist_free(page
->pg_asru
);
65 nvlist_free(page
->pg_rsrc
);
66 fmd_hdl_free(hdl
, page
, sizeof (cma_page_t
));
70 * Retire the specified ASRU, referring to a memory page by PA or by DIMM
71 * offset (i.e. the encoded coordinates internal bank, row, and column).
72 * In the initial FMA implementation, fault.memory.page exported an ASRU
73 * with an explicit physical address, which is valid at the initial time of
74 * diagnosis but may not be later following DR, DIMM removal, or interleave
75 * changes. On SPARC, this issue was solved by exporting the DIMM offset
76 * and pushing the entire FMRI to the platform memory controller through
77 * /dev/fm so it can derive the current PA from the DIMM and offset.
78 * On x86, we also encode DIMM and offset in hc-specific, which is then used
79 * by the x64 memory controller driver.
80 * At some point these three approaches need to be rationalized: all platforms
81 * should use the same scheme, either with decoding in the kernel or decoding
82 * in userland (i.e. with a libtopo method to compute and update the PA).
86 cma_page_retire(fmd_hdl_t
*hdl
, nvlist_t
*nvl
, nvlist_t
*asru
,
87 const char *uuid
, boolean_t repair
)
91 const char *action
= repair
? "unretire" : "retire";
93 nvlist_t
*rsrc
= NULL
, *asrucp
= NULL
, *hcsp
;
95 (void) nvlist_lookup_nvlist(nvl
, FM_FAULT_RESOURCE
, &rsrc
);
97 if (nvlist_dup(asru
, &asrucp
, 0) != 0) {
98 fmd_hdl_debug(hdl
, "page retire nvlist dup failed\n");
99 return (CMA_RA_FAILURE
);
102 /* It should already be expanded, but we'll do it again anyway */
103 if (fmd_nvl_fmri_expand(hdl
, asrucp
) < 0) {
104 fmd_hdl_debug(hdl
, "failed to expand page asru\n");
105 cma_stats
.bad_flts
.fmds_value
.ui64
++;
107 return (CMA_RA_FAILURE
);
110 if (!repair
&& !fmd_nvl_fmri_present(hdl
, asrucp
)) {
111 fmd_hdl_debug(hdl
, "page retire overtaken by events\n");
112 cma_stats
.page_nonent
.fmds_value
.ui64
++;
114 return (CMA_RA_SUCCESS
);
117 /* Figure out physaddr from resource or asru */
119 nvlist_lookup_nvlist(rsrc
, FM_FMRI_HC_SPECIFIC
, &hcsp
) != 0 ||
120 (nvlist_lookup_uint64(hcsp
, "asru-" FM_FMRI_HC_SPECIFIC_PHYSADDR
,
121 &pageaddr
) != 0 && nvlist_lookup_uint64(hcsp
,
122 FM_FMRI_HC_SPECIFIC_PHYSADDR
, &pageaddr
) != 0)) {
123 if (nvlist_lookup_uint64(asrucp
, FM_FMRI_MEM_PHYSADDR
,
125 fmd_hdl_debug(hdl
, "mem fault missing 'physaddr'\n");
126 cma_stats
.bad_flts
.fmds_value
.ui64
++;
128 return (CMA_RA_FAILURE
);
133 if (!cma
.cma_page_dounretire
) {
134 fmd_hdl_debug(hdl
, "suppressed unretire of page %llx\n",
135 (u_longlong_t
)pageaddr
);
136 cma_stats
.page_supp
.fmds_value
.ui64
++;
138 return (CMA_RA_SUCCESS
);
140 /* If unretire via topo fails, we fall back to legacy way */
141 if (rsrc
== NULL
|| (rc
= fmd_nvl_fmri_unretire(hdl
, rsrc
)) < 0)
142 rc
= cma_fmri_page_unretire(hdl
, asrucp
);
144 if (!cma
.cma_page_doretire
) {
145 fmd_hdl_debug(hdl
, "suppressed retire of page %llx\n",
146 (u_longlong_t
)pageaddr
);
147 cma_stats
.page_supp
.fmds_value
.ui64
++;
149 return (CMA_RA_FAILURE
);
151 /* If retire via topo fails, we fall back to legacy way */
152 if (rsrc
== NULL
|| (rc
= fmd_nvl_fmri_retire(hdl
, rsrc
)) < 0)
153 rc
= cma_fmri_page_retire(hdl
, asrucp
);
156 if (rc
== FMD_AGENT_RETIRE_DONE
) {
157 fmd_hdl_debug(hdl
, "%sd page 0x%llx\n",
158 action
, (u_longlong_t
)pageaddr
);
160 cma_stats
.page_repairs
.fmds_value
.ui64
++;
162 cma_stats
.page_flts
.fmds_value
.ui64
++;
164 return (CMA_RA_SUCCESS
);
165 } else if (repair
|| rc
!= FMD_AGENT_RETIRE_ASYNC
) {
166 fmd_hdl_debug(hdl
, "%s of page 0x%llx failed, will not "
167 "retry: %s\n", action
, (u_longlong_t
)pageaddr
,
170 cma_stats
.page_fails
.fmds_value
.ui64
++;
172 return (CMA_RA_FAILURE
);
176 * The page didn't immediately retire. We'll need to periodically
177 * check to see if it has been retired.
179 fmd_hdl_debug(hdl
, "page didn't retire - sleeping\n");
181 page
= fmd_hdl_zalloc(hdl
, sizeof (cma_page_t
), FMD_SLEEP
);
182 page
->pg_addr
= pageaddr
;
184 (void) nvlist_dup(rsrc
, &page
->pg_rsrc
, 0);
185 page
->pg_asru
= asrucp
;
187 page
->pg_uuid
= fmd_hdl_strdup(hdl
, uuid
, FMD_SLEEP
);
189 page
->pg_next
= cma
.cma_pages
;
190 cma
.cma_pages
= page
;
192 if (cma
.cma_page_timerid
!= 0)
193 fmd_timer_remove(hdl
, cma
.cma_page_timerid
);
195 cma
.cma_page_curdelay
= cma
.cma_page_mindelay
;
197 cma
.cma_page_timerid
=
198 fmd_timer_install(hdl
, NULL
, NULL
, cma
.cma_page_curdelay
);
200 /* Don't free asrucp here. This FMRI will be needed for retry. */
201 return (CMA_RA_FAILURE
);
205 page_retry(fmd_hdl_t
*hdl
, cma_page_t
*page
)
209 if (page
->pg_asru
!= NULL
&&
210 !fmd_nvl_fmri_present(hdl
, page
->pg_asru
)) {
211 fmd_hdl_debug(hdl
, "page retire overtaken by events");
212 cma_stats
.page_nonent
.fmds_value
.ui64
++;
214 if (page
->pg_uuid
!= NULL
)
215 fmd_case_uuclose(hdl
, page
->pg_uuid
);
216 return (1); /* no longer a page to retire */
219 if (page
->pg_rsrc
== NULL
||
220 (rc
= fmd_nvl_fmri_service_state(hdl
, page
->pg_rsrc
)) < 0)
221 rc
= cma_fmri_page_service_state(hdl
, page
->pg_asru
);
223 if (rc
== FMD_SERVICE_STATE_UNUSABLE
) {
224 fmd_hdl_debug(hdl
, "retired page 0x%llx on retry %u\n",
225 page
->pg_addr
, page
->pg_nretries
);
226 cma_stats
.page_flts
.fmds_value
.ui64
++;
228 if (page
->pg_uuid
!= NULL
)
229 fmd_case_uuclose(hdl
, page
->pg_uuid
);
230 return (1); /* page retired */
233 if (rc
== FMD_SERVICE_STATE_ISOLATE_PENDING
) {
234 fmd_hdl_debug(hdl
, "scheduling another retry for 0x%llx\n",
236 return (0); /* schedule another retry */
238 fmd_hdl_debug(hdl
, "failed to retry page 0x%llx "
239 "retirement: %s\n", page
->pg_addr
,
242 cma_stats
.page_fails
.fmds_value
.ui64
++;
243 return (1); /* give up */
248 cma_page_retry(fmd_hdl_t
*hdl
)
252 cma
.cma_page_timerid
= 0;
254 fmd_hdl_debug(hdl
, "page_retry: timer fired\n");
256 pagep
= &cma
.cma_pages
;
257 while (*pagep
!= NULL
) {
258 cma_page_t
*page
= *pagep
;
260 if (page_retry(hdl
, page
)) {
262 * Successful retry or we're giving up - remove from
265 *pagep
= page
->pg_next
;
267 if (page
->pg_uuid
!= NULL
)
268 fmd_hdl_strfree(hdl
, page
->pg_uuid
);
270 cma_page_free(hdl
, page
);
273 pagep
= &page
->pg_next
;
277 if (cma
.cma_pages
== NULL
)
278 return; /* no more retirements */
281 * We still have retirements that haven't completed. Back the delay
282 * off, and schedule a retry.
284 cma
.cma_page_curdelay
= MIN(cma
.cma_page_curdelay
* 2,
285 cma
.cma_page_maxdelay
);
287 fmd_hdl_debug(hdl
, "scheduled page retirement retry for %llu secs\n",
288 (u_longlong_t
)(cma
.cma_page_curdelay
/ NANOSEC
));
290 cma
.cma_page_timerid
=
291 fmd_timer_install(hdl
, NULL
, NULL
, cma
.cma_page_curdelay
);
295 cma_page_fini(fmd_hdl_t
*hdl
)
299 while ((page
= cma
.cma_pages
) != NULL
) {
300 cma
.cma_pages
= page
->pg_next
;
301 if (page
->pg_uuid
!= NULL
)
302 fmd_hdl_strfree(hdl
, page
->pg_uuid
);
303 cma_page_free(hdl
, page
);