4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 #include <sys/t_lock.h>
28 #include <sys/cmn_err.h>
29 #include <sys/instance.h>
33 #include <sys/hwconf.h>
34 #include <sys/sunddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ndi_impldefs.h>
38 #include <sys/modctl.h>
40 #include <sys/promif.h>
41 #include <sys/cpuvar.h>
42 #include <sys/pathname.h>
44 #include <sys/devcache.h>
45 #include <sys/devcache_impl.h>
46 #include <sys/sysmacros.h>
47 #include <sys/varargs.h>
48 #include <sys/callb.h>
51 * This facility provides interfaces to clients to register,
52 * read and update cache data in persisted backing store files,
53 * usually in /etc/devices. The data persisted through this
54 * mechanism should be stateless data, functioning in the sense
55 * of a cache. Writes are performed by a background daemon
56 * thread, permitting a client to schedule an update without
57 * blocking, then continue updating the data state in
58 * parallel. The data is only locked by the daemon thread
59 * to pack the data in preparation for the write.
61 * Data persisted through this mechanism should be capable
62 * of being regenerated through normal system operation,
63 * for example attaching all disk devices would cause all
64 * devids to be registered for those devices. By caching
65 * a devid-device tuple, the system can operate in a
66 * more optimal way, directly attaching the device mapped
67 * to a devid, rather than burdensomely driving attach of
68 * the entire device tree to discover a single device.
70 * Note that a client should only need to include
71 * <sys/devcache.h> for the supported interfaces.
73 * The data per client is entirely within the control of
74 * the client. When reading, data unpacked from the backing
75 * store should be inserted in the list. The pointer to
76 * the list can be retrieved via nvf_list(). When writing,
77 * the data on the list is to be packed and returned to the
78 * nvpdaemon as an nvlist.
80 * Obvious restrictions are imposed by the limits of the
81 * nvlist format. The data cannot be read or written
82 * piecemeal, and large amounts of data aren't recommended.
83 * However, nvlists do allow that data be named and typed
84 * and can be size-of-int invariant, and the cached data
85 * can be versioned conveniently.
87 * The registration involves two steps: a handle is
88 * allocated by calling the registration function.
89 * This sets up the data referenced by the handle and
90 * initializes the lock. Following registration, the
91 * client must initialize the data list. The list
92 * interfaces require that the list element with offset
93 * to the node link be provided. The format of the
94 * list element is under the control of the client.
96 * Locking: the address of the data list r/w lock provided
97 * can be accessed with nvf_lock(). The lock must be held
98 * as reader when traversing the list or checking state,
99 * such as nvf_is_dirty(). The lock must be held as
100 * writer when updating the list or marking it dirty.
101 * The lock must not be held when waking the daemon.
103 * The data r/w lock is held as writer when the pack,
104 * unpack and free list handlers are called. The
105 * lock should not be dropped and must be still held
106 * upon return. The client should also hold the lock
107 * as reader when checking if the list is dirty, and
108 * as writer when marking the list dirty or initiating
111 * The asynchronous nature of updates allows for the
112 * possibility that the data may continue to be updated
113 * once the daemon has been notified that an update is
114 * desired. The data only needs to be locked against
115 * updates when packing the data into the form to be
116 * written. When the write of the packed data has
117 * completed, the daemon will automatically reschedule
118 * an update if the data was marked dirty after the
119 * point at which it was packed. Before beginning an
120 * update, the daemon attempts to lock the data as
121 * writer; if the writer lock is already held, it
122 * backs off and retries later. The model is to give
123 * priority to the kernel processes generating the
124 * data, and that the nature of the data is that
125 * it does not change often, can be re-generated when
126 * needed, so updates should not happen often and
127 * can be delayed until the data stops changing.
128 * The client may update the list or mark it dirty
129 * any time it is able to acquire the lock as
132 * A failed write will be retried after some delay,
133 * in the hope that the cause of the error will be
134 * transient, for example a filesystem with no space
135 * available. An update on a read-only filesystem
136 * is failed silently and not retried; this would be
137 * the case when booted off install media.
139 * There is no unregister mechanism as of yet, as it
140 * hasn't been needed so far.
144 * Global list of files registered and updated by the nvpflush
145 * daemon, protected by the nvf_cache_mutex. While an
146 * update is taking place, a file is temporarily moved to
147 * the dirty list to avoid locking the primary list for
148 * the duration of the update.
150 list_t nvf_cache_files
;
151 list_t nvf_dirty_files
;
152 kmutex_t nvf_cache_mutex
;
156 * Allow some delay from an update of the data before flushing
157 * to permit simultaneous updates of multiple changes.
158 * Changes in the data are expected to be bursty, ie
159 * reconfig or hot-plug of a new adapter.
161 * kfio_report_error (default 0)
162 * Set to 1 to enable some error messages related to low-level
163 * kernel file i/o operations.
165 * nvpflush_delay (default 10)
166 * The number of seconds after data is marked dirty before the
167 * flush daemon is triggered to flush the data. A longer period
168 * of time permits more data updates per write. Note that
169 * every update resets the timer so no repository write will
170 * occur while data is being updated continuously.
172 * nvpdaemon_idle_time (default 60)
173 * The number of seconds the daemon will sleep idle before exiting.
176 #define NVPFLUSH_DELAY 10
177 #define NVPDAEMON_IDLE_TIME 60
179 #define TICKS_PER_SECOND (drv_usectohz(1000000))
184 int kfio_report_error
= 0; /* kernel file i/o operations */
185 int kfio_disable_read
= 0; /* disable all reads */
186 int kfio_disable_write
= 0; /* disable all writes */
188 int nvpflush_delay
= NVPFLUSH_DELAY
;
189 int nvpdaemon_idle_time
= NVPDAEMON_IDLE_TIME
;
191 static timeout_id_t nvpflush_id
= 0;
192 static int nvpflush_timer_busy
= 0;
193 static int nvpflush_daemon_active
= 0;
194 static kthread_t
*nvpflush_thr_id
= 0;
196 static int do_nvpflush
= 0;
197 static int nvpbusy
= 0;
198 static kmutex_t nvpflush_lock
;
199 static kcondvar_t nvpflush_cv
;
200 static kthread_id_t nvpflush_thread
;
201 static clock_t nvpticks
;
203 static void nvpflush_daemon(void);
206 int nvpdaemon_debug
= 0;
210 extern int modrootloaded
;
211 extern void mdi_read_devices_files(void);
212 extern void mdi_clean_vhcache(void);
213 extern int sys_shutdown
;
216 * Initialize the overall cache file management
219 i_ddi_devices_init(void)
221 list_create(&nvf_cache_files
, sizeof (nvfd_t
),
222 offsetof(nvfd_t
, nvf_link
));
223 list_create(&nvf_dirty_files
, sizeof (nvfd_t
),
224 offsetof(nvfd_t
, nvf_link
));
225 mutex_init(&nvf_cache_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
232 * The files read here should be restricted to those
233 * that may be required to mount root.
236 i_ddi_read_devices_files(void)
239 * The retire store should be the first file read as it may need to
240 * offline devices. kfio_disable_read is not used for retire. For
241 * the rationale see the tunable ddi_retire_store_bypass and
242 * comments in: kernel/os/retire_store.c
247 if (!kfio_disable_read
) {
248 mdi_read_devices_files();
254 i_ddi_start_flush_daemon(void)
258 ASSERT(i_ddi_io_initialized());
260 mutex_init(&nvpflush_lock
, NULL
, MUTEX_DRIVER
, NULL
);
261 cv_init(&nvpflush_cv
, NULL
, CV_DRIVER
, NULL
);
263 mutex_enter(&nvf_cache_mutex
);
264 for (nvfdp
= list_head(&nvf_cache_files
); nvfdp
;
265 nvfdp
= list_next(&nvf_cache_files
, nvfdp
)) {
266 if (NVF_IS_DIRTY(nvfdp
)) {
271 mutex_exit(&nvf_cache_mutex
);
275 i_ddi_clean_devices_files(void)
277 devid_cache_cleanup();
282 * Register a cache file to be managed and updated by the nvpflush daemon.
283 * All operations are performed through the returned handle.
284 * There is no unregister mechanism for now.
287 nvf_register_file(nvf_ops_t
*ops
)
291 nvfdp
= kmem_zalloc(sizeof (*nvfdp
), KM_SLEEP
);
293 nvfdp
->nvf_ops
= ops
;
294 nvfdp
->nvf_flags
= 0;
295 rw_init(&nvfdp
->nvf_lock
, NULL
, RW_DRIVER
, NULL
);
297 mutex_enter(&nvf_cache_mutex
);
298 list_insert_tail(&nvf_cache_files
, nvfdp
);
299 mutex_exit(&nvf_cache_mutex
);
301 return ((nvf_handle_t
)nvfdp
);
306 nvf_error(const char *fmt
, ...)
310 if (kfio_report_error
) {
312 vcmn_err(CE_NOTE
, fmt
, ap
);
318 * Some operations clients may use to manage the data
319 * to be persisted in a cache file.
322 nvf_cache_name(nvf_handle_t handle
)
324 return (((nvfd_t
*)handle
)->nvf_cache_path
);
328 nvf_lock(nvf_handle_t handle
)
330 return (&(((nvfd_t
*)handle
)->nvf_lock
));
334 nvf_list(nvf_handle_t handle
)
336 return (&(((nvfd_t
*)handle
)->nvf_data_list
));
340 nvf_mark_dirty(nvf_handle_t handle
)
342 ASSERT(RW_WRITE_HELD(&(((nvfd_t
*)handle
)->nvf_lock
)));
343 NVF_MARK_DIRTY((nvfd_t
*)handle
);
347 nvf_is_dirty(nvf_handle_t handle
)
349 ASSERT(RW_LOCK_HELD(&(((nvfd_t
*)handle
)->nvf_lock
)));
350 return (NVF_IS_DIRTY((nvfd_t
*)handle
));
354 nvp_cksum(uchar_t
*buf
, int64_t buflen
)
357 uint16_t *p
= (uint16_t *)buf
;
360 if ((buflen
& 0x01) != 0) {
371 fread_nvlist(char *filename
, nvlist_t
**ret_nvlist
)
381 uint16_t cksum
, hdrsum
;
385 file
= kobj_open_file(filename
);
386 if (file
== (struct _buf
*)-1) {
387 KFDEBUG((CE_CONT
, "cannot open file: %s\n", filename
));
392 n
= kobj_read_file(file
, (char *)&hdr
, sizeof (hdr
), offset
);
393 if (n
!= sizeof (hdr
)) {
394 kobj_close_file(file
);
396 nvf_error("error reading header: %s\n", filename
);
399 KFDEBUG((CE_CONT
, "file empty: %s\n", filename
));
401 nvf_error("header size incorrect: %s\n", filename
);
407 KFDEBUG2((CE_CONT
, "nvpf_magic: 0x%x\n", hdr
.nvpf_magic
));
408 KFDEBUG2((CE_CONT
, "nvpf_version: %d\n", hdr
.nvpf_version
));
409 KFDEBUG2((CE_CONT
, "nvpf_size: %lld\n",
410 (longlong_t
)hdr
.nvpf_size
));
411 KFDEBUG2((CE_CONT
, "nvpf_hdr_chksum: 0x%x\n",
412 hdr
.nvpf_hdr_chksum
));
413 KFDEBUG2((CE_CONT
, "nvpf_chksum: 0x%x\n", hdr
.nvpf_chksum
));
415 cksum
= hdr
.nvpf_hdr_chksum
;
416 hdr
.nvpf_hdr_chksum
= 0;
417 hdrsum
= nvp_cksum((uchar_t
*)&hdr
, sizeof (hdr
));
419 if (hdr
.nvpf_magic
!= NVPF_HDR_MAGIC
||
420 hdr
.nvpf_version
!= NVPF_HDR_VERSION
|| hdrsum
!= cksum
) {
421 kobj_close_file(file
);
422 if (hdrsum
!= cksum
) {
423 nvf_error("%s: checksum error "
424 "(actual 0x%x, expected 0x%x)\n",
425 filename
, hdrsum
, cksum
);
427 nvf_error("%s: header information incorrect", filename
);
431 ASSERT(hdr
.nvpf_size
>= 0);
433 buf
= kmem_alloc(hdr
.nvpf_size
, KM_SLEEP
);
434 n
= kobj_read_file(file
, buf
, hdr
.nvpf_size
, offset
);
435 if (n
!= hdr
.nvpf_size
) {
436 kmem_free(buf
, hdr
.nvpf_size
);
437 kobj_close_file(file
);
439 nvf_error("%s: read error %d", filename
, n
);
441 nvf_error("%s: incomplete read %d/%lld",
442 filename
, n
, (longlong_t
)hdr
.nvpf_size
);
448 rval
= kobj_read_file(file
, &c
, 1, offset
);
449 kobj_close_file(file
);
451 nvf_error("%s is larger than %lld\n",
452 filename
, (longlong_t
)hdr
.nvpf_size
);
453 kmem_free(buf
, hdr
.nvpf_size
);
457 cksum
= nvp_cksum((uchar_t
*)buf
, hdr
.nvpf_size
);
458 if (hdr
.nvpf_chksum
!= cksum
) {
459 nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n",
460 filename
, hdr
.nvpf_chksum
, cksum
);
461 kmem_free(buf
, hdr
.nvpf_size
);
466 rval
= nvlist_unpack(buf
, hdr
.nvpf_size
, &nvl
, 0);
468 nvf_error("%s: error %d unpacking nvlist\n",
470 kmem_free(buf
, hdr
.nvpf_size
);
474 kmem_free(buf
, hdr
.nvpf_size
);
480 kfcreate(char *filename
, kfile_t
**kfilep
)
485 ASSERT(modrootloaded
);
487 fp
= kmem_alloc(sizeof (kfile_t
), KM_SLEEP
);
489 fp
->kf_vnflags
= FCREAT
| FWRITE
| FTRUNC
;
490 fp
->kf_fname
= filename
;
494 KFDEBUG((CE_CONT
, "create: %s flags 0x%x\n",
495 filename
, fp
->kf_vnflags
));
496 rval
= vn_open(filename
, UIO_SYSSPACE
, fp
->kf_vnflags
,
497 0444, &fp
->kf_vp
, CRCREAT
, 0);
499 kmem_free(fp
, sizeof (kfile_t
));
500 KFDEBUG((CE_CONT
, "%s: create error %d\n",
510 kfremove(char *filename
)
514 KFDEBUG((CE_CONT
, "remove: %s\n", filename
));
515 rval
= vn_remove(filename
, UIO_SYSSPACE
, RMFILE
);
517 KFDEBUG((CE_CONT
, "%s: remove error %d\n",
524 kfread(kfile_t
*fp
, char *buf
, ssize_t bufsiz
, ssize_t
*ret_n
)
530 ASSERT(modrootloaded
);
532 if (fp
->kf_state
!= 0)
533 return (fp
->kf_state
);
535 err
= vn_rdwr(UIO_READ
, fp
->kf_vp
, buf
, bufsiz
, fp
->kf_fpos
,
536 UIO_SYSSPACE
, 0, (rlim64_t
)0, kcred
, &resid
);
538 KFDEBUG((CE_CONT
, "%s: read error %d\n",
544 ASSERT(resid
>= 0 && resid
<= bufsiz
);
547 KFDEBUG1((CE_CONT
, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n",
548 fp
->kf_fname
, n
, bufsiz
, resid
));
556 kfwrite(kfile_t
*fp
, char *buf
, ssize_t bufsiz
, ssize_t
*ret_n
)
564 ASSERT(modrootloaded
);
566 if (fp
->kf_state
!= 0)
567 return (fp
->kf_state
);
572 err
= vn_rdwr(UIO_WRITE
, fp
->kf_vp
, buf
, len
, fp
->kf_fpos
,
573 UIO_SYSSPACE
, FSYNC
, rlimit
, kcred
, &resid
);
575 KFDEBUG((CE_CONT
, "%s: write error %d\n",
581 KFDEBUG1((CE_CONT
, "%s: write %ld bytes ok %ld resid\n",
582 fp
->kf_fname
, len
-resid
, resid
));
584 ASSERT(resid
>= 0 && resid
<= len
);
591 KFDEBUG((CE_CONT
, "%s: filesystem full?\n",
593 fp
->kf_state
= ENOSPC
;
604 KFDEBUG1((CE_CONT
, "%s: wrote %ld bytes ok\n", fp
->kf_fname
, n
));
616 KFDEBUG((CE_CONT
, "close: %s\n", fp
->kf_fname
));
618 if ((fp
->kf_vnflags
& FWRITE
) && fp
->kf_state
== 0) {
619 rval
= fop_fsync(fp
->kf_vp
, FSYNC
, kcred
, NULL
);
621 nvf_error("%s: sync error %d\n",
624 KFDEBUG((CE_CONT
, "%s: sync ok\n", fp
->kf_fname
));
627 rval
= fop_close(fp
->kf_vp
, fp
->kf_vnflags
, 1,
628 (offset_t
)0, kcred
, NULL
);
630 if (fp
->kf_state
== 0) {
631 nvf_error("%s: close error %d\n",
635 if (fp
->kf_state
== 0)
636 KFDEBUG((CE_CONT
, "%s: close ok\n", fp
->kf_fname
));
640 kmem_free(fp
, sizeof (kfile_t
));
645 kfrename(char *oldname
, char *newname
)
649 ASSERT(modrootloaded
);
651 KFDEBUG((CE_CONT
, "renaming %s to %s\n", oldname
, newname
));
653 if ((rval
= vn_rename(oldname
, newname
, UIO_SYSSPACE
)) != 0) {
654 KFDEBUG((CE_CONT
, "rename %s to %s: %d\n",
655 oldname
, newname
, rval
));
662 fwrite_nvlist(char *filename
, nvlist_t
*nvl
)
672 ASSERT(modrootloaded
);
675 err
= nvlist_pack(nvl
, &nvbuf
, &buflen
, NV_ENCODE_NATIVE
, 0);
677 nvf_error("%s: error %d packing nvlist\n",
682 buf
= kmem_alloc(sizeof (nvpf_hdr_t
) + buflen
, KM_SLEEP
);
683 bzero(buf
, sizeof (nvpf_hdr_t
));
685 ((nvpf_hdr_t
*)buf
)->nvpf_magic
= NVPF_HDR_MAGIC
;
686 ((nvpf_hdr_t
*)buf
)->nvpf_version
= NVPF_HDR_VERSION
;
687 ((nvpf_hdr_t
*)buf
)->nvpf_size
= buflen
;
688 ((nvpf_hdr_t
*)buf
)->nvpf_chksum
= nvp_cksum((uchar_t
*)nvbuf
, buflen
);
689 ((nvpf_hdr_t
*)buf
)->nvpf_hdr_chksum
=
690 nvp_cksum((uchar_t
*)buf
, sizeof (nvpf_hdr_t
));
692 bcopy(nvbuf
, buf
+ sizeof (nvpf_hdr_t
), buflen
);
693 kmem_free(nvbuf
, buflen
);
694 buflen
+= sizeof (nvpf_hdr_t
);
696 len
= strlen(filename
) + MAX_SUFFIX_LEN
+ 2;
697 newname
= kmem_alloc(len
, KM_SLEEP
);
700 (void) sprintf(newname
, "%s.%s", filename
, NEW_FILENAME_SUFFIX
);
703 * To make it unlikely we suffer data loss, write
704 * data to the new temporary file. Once successful
705 * complete the transaction by renaming the new file
706 * to replace the previous.
709 if ((err
= kfcreate(newname
, &fp
)) == 0) {
710 err
= kfwrite(fp
, buf
, buflen
, &n
);
712 nvf_error("%s: write error - %d\n",
717 "%s: partial write %ld of %ld bytes\n",
719 nvf_error("%s: filesystem may be full?\n",
724 if ((err1
= kfclose(fp
)) != 0) {
725 nvf_error("%s: close error\n", newname
);
730 if (kfremove(newname
) != 0) {
731 nvf_error("%s: remove failed\n",
736 nvf_error("%s: create failed - %d\n", filename
, err
);
740 if ((err
= kfrename(newname
, filename
)) != 0) {
741 nvf_error("%s: rename from %s failed\n",
746 kmem_free(newname
, len
);
747 kmem_free(buf
, buflen
);
753 e_fwrite_nvlist(nvfd_t
*nvfd
, nvlist_t
*nvl
)
757 if ((err
= fwrite_nvlist(nvfd
->nvf_cache_path
, nvl
)) == 0)
758 return (DDI_SUCCESS
);
761 NVF_MARK_READONLY(nvfd
);
762 return (DDI_FAILURE
);
767 nvp_list_free(nvfd_t
*nvf
)
769 ASSERT(RW_WRITE_HELD(&nvf
->nvf_lock
));
770 (nvf
->nvf_list_free
)((nvf_handle_t
)nvf
);
771 ASSERT(RW_WRITE_HELD(&nvf
->nvf_lock
));
775 * Read a file in the nvlist format
776 * EIO - i/o error during read
777 * ENOENT - file not found
778 * EINVAL - file contents corrupted
781 fread_nvp_list(nvfd_t
*nvfd
)
790 ASSERT(RW_WRITE_HELD(&(nvfd
->nvf_lock
)));
792 rval
= fread_nvlist(nvfd
->nvf_cache_path
, &nvl
);
798 while ((nvp
= nvlist_next_nvpair(nvl
, nvp
)) != NULL
) {
799 name
= nvpair_name(nvp
);
800 ASSERT(strlen(name
) > 0);
802 switch (nvpair_type(nvp
)) {
803 case DATA_TYPE_NVLIST
:
804 rval
= nvpair_value_nvlist(nvp
, &sublist
);
807 "nvpair_value_nvlist error %s %d\n",
813 * unpack nvlist for this device and
814 * add elements to data list.
816 ASSERT(RW_WRITE_HELD(&(nvfd
->nvf_lock
)));
817 rv
= (nvfd
->nvf_unpack_nvlist
)
818 ((nvf_handle_t
)nvfd
, sublist
, name
);
819 ASSERT(RW_WRITE_HELD(&(nvfd
->nvf_lock
)));
822 "%s: %s invalid list element\n",
823 nvfd
->nvf_cache_path
, name
);
830 nvf_error("%s: %s unsupported data type %d\n",
831 nvfd
->nvf_cache_path
, name
, nvpair_type(nvp
));
849 nvf_read_file(nvf_handle_t nvf_handle
)
851 nvfd_t
*nvfd
= (nvfd_t
*)nvf_handle
;
854 ASSERT(RW_WRITE_HELD(&nvfd
->nvf_lock
));
856 if (kfio_disable_read
)
859 KFDEBUG((CE_CONT
, "reading %s\n", nvfd
->nvf_cache_path
));
861 rval
= fread_nvp_list(nvfd
);
865 nvfd
->nvf_flags
|= NVF_F_REBUILD_MSG
;
866 cmn_err(CE_WARN
, "%s: I/O error",
867 nvfd
->nvf_cache_path
);
870 nvfd
->nvf_flags
|= NVF_F_CREATE_MSG
;
871 nvf_error("%s: not found\n",
872 nvfd
->nvf_cache_path
);
876 nvfd
->nvf_flags
|= NVF_F_REBUILD_MSG
;
877 cmn_err(CE_WARN
, "%s: data file corrupted",
878 nvfd
->nvf_cache_path
);
886 nvf_write_is_complete(nvfd_t
*fd
)
888 if (fd
->nvf_write_complete
) {
889 (fd
->nvf_write_complete
)((nvf_handle_t
)fd
);
895 nvpflush_timeout(void *arg
)
899 mutex_enter(&nvpflush_lock
);
900 nticks
= nvpticks
- ddi_get_lbolt();
902 nvpflush_timer_busy
= 1;
903 mutex_exit(&nvpflush_lock
);
904 nvpflush_id
= timeout(nvpflush_timeout
, NULL
, nticks
);
907 NVPDAEMON_DEBUG((CE_CONT
, "signal nvpdaemon\n"));
908 cv_signal(&nvpflush_cv
);
910 nvpflush_timer_busy
= 0;
911 mutex_exit(&nvpflush_lock
);
916 * After marking a list as dirty, wake the nvpflush daemon
917 * to perform the update.
920 nvf_wake_daemon(void)
925 * If the system isn't up yet or is shutting down,
926 * don't even think about starting a flush.
928 if (!i_ddi_io_initialized() || sys_shutdown
)
931 mutex_enter(&nvpflush_lock
);
933 if (nvpflush_daemon_active
== 0) {
934 nvpflush_daemon_active
= 1;
935 mutex_exit(&nvpflush_lock
);
936 NVPDAEMON_DEBUG((CE_CONT
, "starting nvpdaemon thread\n"));
937 nvpflush_thr_id
= thread_create(NULL
, 0,
938 (void (*)())nvpflush_daemon
,
939 NULL
, 0, &p0
, TS_RUN
, minclsyspri
);
940 mutex_enter(&nvpflush_lock
);
943 nticks
= nvpflush_delay
* TICKS_PER_SECOND
;
944 nvpticks
= ddi_get_lbolt() + nticks
;
945 if (nvpflush_timer_busy
== 0) {
946 nvpflush_timer_busy
= 1;
947 mutex_exit(&nvpflush_lock
);
948 nvpflush_id
= timeout(nvpflush_timeout
, NULL
, nticks
+ 4);
950 mutex_exit(&nvpflush_lock
);
954 nvpflush_one(nvfd_t
*nvfd
)
956 int rval
= DDI_SUCCESS
;
959 rw_enter(&nvfd
->nvf_lock
, RW_READER
);
961 ASSERT((nvfd
->nvf_flags
& NVF_F_FLUSHING
) == 0);
963 if (!NVF_IS_DIRTY(nvfd
) ||
964 NVF_IS_READONLY(nvfd
) || kfio_disable_write
|| sys_shutdown
) {
965 NVF_CLEAR_DIRTY(nvfd
);
966 rw_exit(&nvfd
->nvf_lock
);
967 return (DDI_SUCCESS
);
970 if (rw_tryupgrade(&nvfd
->nvf_lock
) == 0) {
971 nvf_error("nvpflush: "
972 "%s rw upgrade failed\n", nvfd
->nvf_cache_path
);
973 rw_exit(&nvfd
->nvf_lock
);
974 return (DDI_FAILURE
);
976 if (((nvfd
->nvf_pack_list
)
977 ((nvf_handle_t
)nvfd
, &nvl
)) != DDI_SUCCESS
) {
978 nvf_error("nvpflush: "
979 "%s nvlist construction failed\n", nvfd
->nvf_cache_path
);
980 ASSERT(RW_WRITE_HELD(&nvfd
->nvf_lock
));
981 rw_exit(&nvfd
->nvf_lock
);
982 return (DDI_FAILURE
);
984 ASSERT(RW_WRITE_HELD(&nvfd
->nvf_lock
));
986 NVF_CLEAR_DIRTY(nvfd
);
987 nvfd
->nvf_flags
|= NVF_F_FLUSHING
;
988 rw_exit(&nvfd
->nvf_lock
);
990 rval
= e_fwrite_nvlist(nvfd
, nvl
);
993 rw_enter(&nvfd
->nvf_lock
, RW_WRITER
);
994 nvfd
->nvf_flags
&= ~NVF_F_FLUSHING
;
995 if (rval
== DDI_FAILURE
) {
996 if (NVF_IS_READONLY(nvfd
)) {
998 nvfd
->nvf_flags
&= ~(NVF_F_ERROR
| NVF_F_DIRTY
);
999 } else if ((nvfd
->nvf_flags
& NVF_F_ERROR
) == 0) {
1001 "%s: update failed\n", nvfd
->nvf_cache_path
);
1002 nvfd
->nvf_flags
|= NVF_F_ERROR
| NVF_F_DIRTY
;
1005 if (nvfd
->nvf_flags
& NVF_F_CREATE_MSG
) {
1007 "!Creating %s\n", nvfd
->nvf_cache_path
);
1008 nvfd
->nvf_flags
&= ~NVF_F_CREATE_MSG
;
1010 if (nvfd
->nvf_flags
& NVF_F_REBUILD_MSG
) {
1012 "!Rebuilding %s\n", nvfd
->nvf_cache_path
);
1013 nvfd
->nvf_flags
&= ~NVF_F_REBUILD_MSG
;
1015 if (nvfd
->nvf_flags
& NVF_F_ERROR
) {
1017 "%s: update now ok\n", nvfd
->nvf_cache_path
);
1018 nvfd
->nvf_flags
&= ~NVF_F_ERROR
;
1021 * The file may need to be flushed again if the cached
1022 * data was touched while writing the earlier contents.
1024 if (NVF_IS_DIRTY(nvfd
))
1028 rw_exit(&nvfd
->nvf_lock
);
1034 nvpflush_daemon(void)
1036 callb_cpr_t cprinfo
;
1037 nvfd_t
*nvfdp
, *nextfdp
;
1043 ASSERT(modrootloaded
);
1045 nvpflush_thread
= curthread
;
1046 NVPDAEMON_DEBUG((CE_CONT
, "nvpdaemon: init\n"));
1048 CALLB_CPR_INIT(&cprinfo
, &nvpflush_lock
, callb_generic_cpr
, "nvp");
1049 mutex_enter(&nvpflush_lock
);
1051 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
1052 while (do_nvpflush
== 0) {
1053 clk
= cv_reltimedwait(&nvpflush_cv
, &nvpflush_lock
,
1054 (nvpdaemon_idle_time
* TICKS_PER_SECOND
),
1056 if ((clk
== -1 && do_nvpflush
== 0 &&
1057 nvpflush_timer_busy
== 0) || sys_shutdown
) {
1059 * Note that CALLB_CPR_EXIT calls mutex_exit()
1060 * on the lock passed in to CALLB_CPR_INIT,
1061 * so the lock must be held when invoking it.
1063 CALLB_CPR_SAFE_END(&cprinfo
, &nvpflush_lock
);
1064 NVPDAEMON_DEBUG((CE_CONT
, "nvpdaemon: exit\n"));
1065 ASSERT(mutex_owned(&nvpflush_lock
));
1066 nvpflush_thr_id
= NULL
;
1067 nvpflush_daemon_active
= 0;
1068 CALLB_CPR_EXIT(&cprinfo
);
1072 CALLB_CPR_SAFE_END(&cprinfo
, &nvpflush_lock
);
1077 mutex_exit(&nvpflush_lock
);
1080 * Try flushing what's dirty, reschedule if there's
1081 * a failure or data gets marked as dirty again.
1082 * First move each file marked dirty to the dirty
1083 * list to avoid locking the list across the write.
1085 mutex_enter(&nvf_cache_mutex
);
1086 for (nvfdp
= list_head(&nvf_cache_files
);
1087 nvfdp
; nvfdp
= nextfdp
) {
1088 nextfdp
= list_next(&nvf_cache_files
, nvfdp
);
1089 rw_enter(&nvfdp
->nvf_lock
, RW_READER
);
1090 if (NVF_IS_DIRTY(nvfdp
)) {
1091 list_remove(&nvf_cache_files
, nvfdp
);
1092 list_insert_tail(&nvf_dirty_files
, nvfdp
);
1093 rw_exit(&nvfdp
->nvf_lock
);
1095 NVPDAEMON_DEBUG((CE_CONT
,
1096 "nvpdaemon: not dirty %s\n",
1097 nvfdp
->nvf_cache_path
));
1098 rw_exit(&nvfdp
->nvf_lock
);
1101 mutex_exit(&nvf_cache_mutex
);
1104 * Now go through the dirty list
1106 for (nvfdp
= list_head(&nvf_dirty_files
);
1107 nvfdp
; nvfdp
= nextfdp
) {
1108 nextfdp
= list_next(&nvf_dirty_files
, nvfdp
);
1111 rw_enter(&nvfdp
->nvf_lock
, RW_READER
);
1112 if (NVF_IS_DIRTY(nvfdp
)) {
1113 NVPDAEMON_DEBUG((CE_CONT
,
1114 "nvpdaemon: flush %s\n",
1115 nvfdp
->nvf_cache_path
));
1116 rw_exit(&nvfdp
->nvf_lock
);
1117 rval
= nvpflush_one(nvfdp
);
1118 rw_enter(&nvfdp
->nvf_lock
, RW_READER
);
1119 if (rval
!= DDI_SUCCESS
||
1120 NVF_IS_DIRTY(nvfdp
)) {
1121 rw_exit(&nvfdp
->nvf_lock
);
1122 NVPDAEMON_DEBUG((CE_CONT
,
1123 "nvpdaemon: %s dirty again\n",
1124 nvfdp
->nvf_cache_path
));
1127 rw_exit(&nvfdp
->nvf_lock
);
1128 nvf_write_is_complete(nvfdp
);
1132 NVPDAEMON_DEBUG((CE_CONT
,
1133 "nvpdaemon: not dirty %s\n",
1134 nvfdp
->nvf_cache_path
));
1135 rw_exit(&nvfdp
->nvf_lock
);
1140 mutex_enter(&nvf_cache_mutex
);
1141 list_remove(&nvf_dirty_files
, nvfdp
);
1142 list_insert_tail(&nvf_cache_files
,
1144 mutex_exit(&nvf_cache_mutex
);
1151 mutex_enter(&nvpflush_lock
);