4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * i86pc Memory Scrubbing
30 * On detection of a correctable memory ECC error, the i86pc hardware
31 * returns the corrected data to the requester and may re-write it
32 * to memory (DRAM or NVRAM). Machines which do not re-write this to
33 * memory should add an NMI handler to correct and rewrite.
35 * Scrubbing thus reduces the likelyhood that multiple transient errors
36 * will occur in the same memory word, making uncorrectable errors due
37 * to transients less likely.
39 * Thus is born the desire that every memory location be periodically
42 * This file implements a memory scrubbing thread. This scrubber
43 * guarantees that all of physical memory is accessed periodically
44 * (memscrub_period_sec -- 12 hours).
46 * It attempts to do this as unobtrusively as possible. The thread
47 * schedules itself to wake up at an interval such that if it reads
48 * memscrub_span_pages (4MB) on each wakeup, it will read all of physical
49 * memory in in memscrub_period_sec (12 hours).
51 * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200).
52 * When it completes a span, if all the CPUs are idle, it reads another span.
53 * Typically it soaks up idle time this way to reach its deadline early
54 * -- and sleeps until the next period begins.
56 * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups
57 * that run for 0.15 seconds at intervals of 67 seconds.
59 * In practice, the scrubber finds enough idle time to finish in a few
60 * minutes, and sleeps until its 12 hour deadline.
62 * The scrubber maintains a private copy of the phys_install memory list
63 * to keep track of what memory should be scrubbed.
65 * The following parameters can be set via /etc/system
67 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB)
68 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
69 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0)
70 * memscrub_delay_start_sec = (10 seconds)
71 * disable_memscrub = (0)
73 * the scrubber will exit (or never be started) if it finds the variable
74 * "disable_memscrub" set.
76 * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec
77 * is a "good" amount of minimum time for the thread to run at a time.
79 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
80 * twice the frequency the hardware folk estimated would be necessary.
82 * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly
83 * any other use of the system should be higher priority than scrubbing.
86 #include <sys/types.h>
87 #include <sys/systm.h> /* timeout, types, t_lock */
88 #include <sys/cmn_err.h>
89 #include <sys/sysmacros.h> /* MIN */
90 #include <sys/memlist.h> /* memlist */
91 #include <sys/kmem.h> /* KMEM_NOSLEEP */
92 #include <sys/cpuvar.h> /* ncpus_online */
93 #include <sys/debug.h> /* ASSERTs */
96 #include <vm/seg_kmem.h>
97 #include <vm/seg_kpm.h>
98 #include <vm/hat_i86.h>
99 #include <sys/callb.h> /* CPR callback */
101 static caddr_t memscrub_window
;
102 static hat_mempte_t memscrub_pte
;
108 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
110 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */
113 * start only if at least MEMSCRUB_MIN_PAGES in system
115 #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE)
118 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
120 #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE)
123 * almost anything is higher priority than scrubbing
125 #define MEMSCRUB_DFL_THREAD_PRI 0
128 * we can patch these defaults in /etc/system if necessary
130 uint_t disable_memscrub
= 0;
131 static uint_t disable_memscrub_quietly
= 0;
132 pgcnt_t memscrub_min_pages
= MEMSCRUB_MIN_PAGES
;
133 pgcnt_t memscrub_span_pages
= MEMSCRUB_DFL_SPAN_PAGES
;
134 time_t memscrub_period_sec
= MEMSCRUB_DFL_PERIOD_SEC
;
135 uint_t memscrub_thread_pri
= MEMSCRUB_DFL_THREAD_PRI
;
136 time_t memscrub_delay_start_sec
= 10;
141 static void memscrubber(void);
142 static int system_is_idle(void);
143 static int memscrub_add_span(uint64_t, uint64_t);
148 static struct memlist
*memscrub_memlist
;
149 static uint_t memscrub_phys_pages
;
151 static kcondvar_t memscrub_cv
;
152 static kmutex_t memscrub_lock
;
155 * memscrub_lock protects memscrub_memlist
157 uint_t memscrub_scans_done
;
159 uint_t memscrub_done_early
;
160 uint_t memscrub_early_sec
;
162 uint_t memscrub_done_late
;
163 time_t memscrub_late_sec
;
166 * create memscrub_memlist from phys_install list
167 * initialize locks, set memscrub_phys_pages.
174 if (physmem
< memscrub_min_pages
)
178 memscrub_window
= vmem_alloc(heap_arena
, PAGESIZE
, VM_SLEEP
);
179 memscrub_pte
= hat_mempte_setup(memscrub_window
);
183 * copy phys_install to memscrub_memlist
185 for (src
= phys_install
; src
; src
= src
->ml_next
) {
186 if (memscrub_add_span(src
->ml_address
, src
->ml_size
)) {
188 "Software memory scrubber failed to initialize\n");
193 mutex_init(&memscrub_lock
, NULL
, MUTEX_DRIVER
, NULL
);
194 cv_init(&memscrub_cv
, NULL
, CV_DRIVER
, NULL
);
197 * create memscrubber thread
199 (void) thread_create(NULL
, 0, (void (*)())memscrubber
, NULL
, 0, &p0
,
200 TS_RUN
, memscrub_thread_pri
);
204 * Function to cause the software memscrubber to exit quietly if the
205 * platform support has located a hardware scrubber and enabled it.
208 memscrub_disable(void)
210 disable_memscrub_quietly
= 1;
213 #ifdef MEMSCRUB_DEBUG
215 memscrub_printmemlist(char *title
, struct memlist
*listp
)
217 struct memlist
*list
;
219 cmn_err(CE_CONT
, "%s:\n", title
);
221 for (list
= listp
; list
; list
= list
->next
) {
222 cmn_err(CE_CONT
, "addr = 0x%llx, size = 0x%llx\n",
223 list
->address
, list
->size
);
226 #endif /* MEMSCRUB_DEBUG */
230 memscrub_wakeup(void *c
)
233 * grab mutex to guarantee that our wakeup call
234 * arrives after we go to sleep -- so we can't sleep forever.
236 mutex_enter(&memscrub_lock
);
237 cv_signal(&memscrub_cv
);
238 mutex_exit(&memscrub_lock
);
242 * this calculation doesn't account for the time that the actual scan
243 * consumes -- so we'd fall slightly behind schedule with this
244 * interval_sec. but the idle loop optimization below usually makes us
245 * come in way ahead of schedule.
248 compute_interval_sec()
250 if (memscrub_phys_pages
<= memscrub_span_pages
)
251 return (memscrub_period_sec
);
253 return (memscrub_period_sec
/
254 (memscrub_phys_pages
/memscrub_span_pages
));
261 uint64_t mlp_last_addr
;
262 uint64_t mlp_next_addr
;
264 time_t interval_sec
= 0;
267 extern void scan_memory(caddr_t
, size_t);
271 * notify CPR of our existence
273 CALLB_CPR_INIT(&cprinfo
, &memscrub_lock
, callb_generic_cpr
, "memscrub");
275 if (memscrub_memlist
== NULL
) {
276 cmn_err(CE_WARN
, "memscrub_memlist not initialized.");
280 mlp
= memscrub_memlist
;
281 mlp_next_addr
= mlp
->ml_address
;
282 mlp_last_addr
= mlp
->ml_address
+ mlp
->ml_size
;
284 deadline
= gethrestime_sec() + memscrub_delay_start_sec
;
287 if (disable_memscrub
|| disable_memscrub_quietly
)
290 mutex_enter(&memscrub_lock
);
293 * did we just reach the end of memory?
296 time_t now
= gethrestime_sec();
298 if (now
>= deadline
) {
299 memscrub_done_late
++;
300 memscrub_late_sec
+= (now
- deadline
);
302 * past deadline, start right away
306 deadline
= now
+ memscrub_period_sec
;
309 * we finished ahead of schedule.
310 * wait till previous dealine before re-start.
312 interval_sec
= deadline
- now
;
313 memscrub_done_early
++;
314 memscrub_early_sec
+= interval_sec
;
315 deadline
+= memscrub_period_sec
;
318 interval_sec
= compute_interval_sec();
322 * it is safe from our standpoint for CPR to
325 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
330 (void) timeout(memscrub_wakeup
, NULL
, interval_sec
* hz
);
335 cv_wait(&memscrub_cv
, &memscrub_lock
);
337 /* we need to goto work */
338 CALLB_CPR_SAFE_END(&cprinfo
, &memscrub_lock
);
340 mutex_exit(&memscrub_lock
);
343 pgcnt_t pages
= memscrub_span_pages
;
344 uint64_t address
= mlp_next_addr
;
346 if (disable_memscrub
|| disable_memscrub_quietly
)
349 mutex_enter(&memscrub_lock
);
352 * Make sure we don't try to scan beyond the end of
353 * the current memlist. If we would, then resize
354 * our scan target for this iteration, and prepare
355 * to read the next memlist entry on the next
359 if (address
+ mmu_ptob(pages
) >= mlp_last_addr
) {
360 pages
= mmu_btop(mlp_last_addr
- address
);
364 mlp
= memscrub_memlist
;
366 mlp_next_addr
= mlp
->ml_address
;
367 mlp_last_addr
= mlp
->ml_address
+ mlp
->ml_size
;
369 mlp_next_addr
+= mmu_ptob(pages
);
372 mutex_exit(&memscrub_lock
);
375 pfn_t pfn
= btop(address
);
378 * Without segkpm, the memscrubber cannot
379 * be allowed to migrate across CPUs, as
380 * the CPU-specific mapping of
381 * memscrub_window would be incorrect.
382 * With segkpm, switching CPUs is legal, but
383 * inefficient. We don't use
384 * kpreempt_disable as it might hold a
385 * higher priority thread (eg, RT) too long
388 thread_affinity_set(curthread
, CPU_CURRENT
);
390 memscrub_window
= hat_kpm_pfn2va(pfn
);
392 hat_mempte_remap(pfn
, memscrub_window
,
394 PROT_READ
, HAT_LOAD_NOCONSIST
);
396 scan_memory(memscrub_window
, PAGESIZE
);
398 thread_affinity_clear(curthread
);
399 address
+= MMU_PAGESIZE
;
402 memscrub_scans_done
++;
403 } while (!reached_end
&& system_is_idle());
408 if (!disable_memscrub_quietly
)
409 cmn_err(CE_NOTE
, "Software memory scrubber exiting.");
411 * We are about to bail, but don't have the memscrub_lock,
412 * and it is needed for CALLB_CPR_EXIT.
414 mutex_enter(&memscrub_lock
);
415 CALLB_CPR_EXIT(&cprinfo
);
417 cv_destroy(&memscrub_cv
);
424 * return 1 if we're MP and all the other CPUs are idle
432 if (1 == ncpus_online
)
435 for (cpu_id
= 0; cpu_id
< NCPU
; ++cpu_id
) {
441 if (cpu
[cpu_id
]->cpu_thread
!= cpu
[cpu_id
]->cpu_idle_thread
) {
442 if (CPU
->cpu_id
== cpu_id
&&
443 CPU
->cpu_disp
->disp_nrunnable
== 0)
455 * add a span to the memscrub list
458 memscrub_add_span(uint64_t start
, uint64_t bytes
)
461 struct memlist
*prev
, *next
;
462 uint64_t end
= start
+ bytes
- 1;
465 mutex_enter(&memscrub_lock
);
467 #ifdef MEMSCRUB_DEBUG
468 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist
);
469 cmn_err(CE_CONT
, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages
);
470 cmn_err(CE_CONT
, "memscrub_add_span: address: 0x%llx"
471 " size: 0x%llx\n", start
, bytes
);
472 #endif /* MEMSCRUB_DEBUG */
475 * Scan through the list to find the proper place to install it.
478 next
= memscrub_memlist
;
480 uint64_t ns
= next
->ml_address
;
481 uint64_t ne
= next
->ml_address
+ next
->ml_size
- 1;
484 * If this span overlaps with an existing span, then
485 * something has gone horribly wrong with the phys_install
486 * list. In fact, I'm surprised we made it this far.
488 if ((start
>= ns
&& start
<= ne
) || (end
>= ns
&& end
<= ne
) ||
489 (start
< ns
&& end
> ne
))
490 panic("memscrub found overlapping memory ranges "
491 "(0x%p-0x%p) and (0x%p-0x%p)",
492 (void *)(uintptr_t)start
, (void *)(uintptr_t)end
,
493 (void *)(uintptr_t)ns
, (void *)(uintptr_t)ne
);
496 * New span can be appended to an existing one.
498 if (start
== ne
+ 1) {
499 next
->ml_size
+= bytes
;
504 * New span can be prepended to an existing one.
507 next
->ml_size
+= bytes
;
508 next
->ml_address
= start
;
513 * If the next span has a higher start address than the new
514 * one, then we have found the right spot for our
521 next
= next
->ml_next
;
525 * allocate a new struct memlist
527 dst
= kmem_alloc(sizeof (struct memlist
), KM_NOSLEEP
);
532 dst
->ml_address
= start
;
533 dst
->ml_size
= bytes
;
540 memscrub_memlist
= dst
;
548 memscrub_phys_pages
+= mmu_btop(bytes
);
550 #ifdef MEMSCRUB_DEBUG
551 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist
);
552 cmn_err(CE_CONT
, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages
);
553 #endif /* MEMSCRUB_DEBUG */
555 mutex_exit(&memscrub_lock
);