4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 #include <sys/debug.h>
28 #include <sys/types.h>
30 #include <sys/errno.h>
36 #include <sys/cmn_err.h>
37 #include <sys/modctl.h>
39 #include <sys/atomic.h>
40 #include <sys/filio.h>
41 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
42 #include <sys/kstat.h>
45 #include <sys/devops.h>
46 #include <sys/sunddi.h>
47 #include <sys/esunddi.h>
48 #include <sys/priv_names.h>
50 #include <sys/fssnap.h>
51 #include <sys/fssnap_if.h>
54 * This module implements the file system snapshot code, which provides a
55 * point-in-time image of a file system for the purposes of online backup.
56 * There are essentially two parts to this project: the driver half and the
57 * file system half. The driver half is a pseudo device driver called
58 * "fssnap" that represents the snapshot. Each snapshot is assigned a
59 * number that corresponds to the minor number of the device, and a control
60 * device with a high minor number is used to initiate snapshot creation and
61 * deletion. For all practical purposes the driver half acts like a
62 * read-only disk device whose contents are exactly the same as the master
63 * file system at the time the snapshot was created.
65 * The file system half provides interfaces necessary for performing the
66 * file system dependent operations required to create and delete snapshots
67 * and a special driver strategy routine that must always be used by the file
68 * system for snapshots to work correctly.
70 * When a snapshot is to be created, the user utility will send an ioctl to
71 * the control device of the driver half specifying the file system to be
72 * snapshotted, the file descriptor of a backing-store file which is used to
73 * hold old data before it is overwritten, and other snapshot parameters.
74 * This ioctl is passed on to the file system specified in the original
75 * ioctl request. The file system is expected to be able to flush
76 * everything out to make the file system consistent and lock it to ensure
77 * no changes occur while the snapshot is being created. It then calls
78 * fssnap_create() to create state for a new snapshot, from which an opaque
79 * handle is returned with the snapshot locked. Next, the file system must
80 * populate the "candidate bitmap", which tells the snapshot code which
81 * "chunks" should be considered for copy-on-write (a chunk is the unit of
82 * granularity used for copy-on-write, which is independent of the device
83 * and file system block sizes). This is typically done by scanning the
84 * file system allocation bitmaps to determine which chunks contain
85 * allocated blocks in the file system at the time the snapshot was created.
86 * If a chunk has no allocated blocks, it does not need to be copied before
87 * being written to. Once the candidate bitmap is populated with
88 * fssnap_set_candidate(), the file system calls fssnap_create_done() to
89 * complete the snapshot creation and unlock the snapshot. The file system
90 * may now be unlocked and modifications to it resumed.
92 * Once a snapshot is created, the file system must perform all writes
93 * through a special strategy routine, fssnap_strategy(). This strategy
94 * routine determines whether the chunks contained by the write must be
95 * copied before being overwritten by consulting the candidate bitmap
96 * described above, and the "hastrans bitmap" which tells it whether the chunk
97 * has been copied already or not. If the chunk is a candidate but has not
98 * been copied, it reads the old data in and adds it to a queue. The
99 * old data can then be overwritten with the new data. An asynchronous
100 * task queue is dispatched for each old chunk read in which writes the old
101 * data to the backing file specified at snapshot creation time. The
102 * backing file is a sparse file the same size as the file system that
103 * contains the old data at the offset that data originally had in the
104 * file system. If the queue containing in-memory chunks gets too large,
105 * writes to the file system may be throttled by a semaphore until the
106 * task queues have a chance to push some of the chunks to the backing file.
108 * With the candidate bitmap, the hastrans bitmap, the data on the master
109 * file system, and the old data in memory and in the backing file, the
110 * snapshot pseudo-driver can piece together the original file system
111 * information to satisfy read requests. If the requested chunk is not a
112 * candidate, it returns a zeroed buffer. If the chunk is a candidate but
113 * has not been copied it reads it from the master file system. If it is a
114 * candidate and has been copied, it either copies the data from the
115 * in-memory queue or it reads it in from the backing file. The result is
116 * a replication of the original file system that can be backed up, mounted,
117 * or manipulated by other file system utilities that work on a read-only
120 * This module is divided into three roughly logical sections:
122 * - The snapshot driver, which is a character/block driver
123 * representing the snapshot itself. These routines are
124 * prefixed with "snap_".
126 * - The library routines that are defined in fssnap_if.h that
127 * are used by file systems that use this snapshot implementation.
128 * These functions are prefixed with "fssnap_" and are called through
129 * a function vector from the file system.
131 * - The helper routines used by the snapshot driver and the fssnap
132 * library routines for managing the translation table and other
133 * useful functions. These routines are all static and are
134 * prefixed with either "fssnap_" or "transtbl_" if they
135 * are specifically used for translation table activities.
138 static dev_info_t
*fssnap_dip
= NULL
;
139 static struct snapshot_id
*snapshot
= NULL
;
140 static struct snapshot_id snap_ctl
;
141 static int num_snapshots
= 0;
142 static kmutex_t snapshot_mutex
;
143 static char snapname
[] = SNAP_NAME
;
145 /* "tunable" parameters */
146 static int fssnap_taskq_nthreads
= FSSNAP_TASKQ_THREADS
;
147 static uint_t fssnap_max_mem_chunks
= FSSNAP_MAX_MEM_CHUNKS
;
148 static int fssnap_taskq_maxtasks
= FSSNAP_TASKQ_MAXTASKS
;
150 /* static function prototypes */
152 /* snapshot driver */
153 static int snap_getinfo(dev_info_t
*, ddi_info_cmd_t
, void *, void **);
154 static int snap_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
);
155 static int snap_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
);
156 static int snap_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred
);
157 static int snap_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred
);
158 static int snap_strategy(struct buf
*bp
);
159 static int snap_read(dev_t dev
, struct uio
*uiop
, cred_t
*credp
);
160 static int snap_print(dev_t dev
, char *str
);
161 static int snap_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
,
162 cred_t
*credp
, int *rvalp
);
163 static int snap_prop_op(dev_t dev
, dev_info_t
*dip
, ddi_prop_op_t prop_op
,
164 int flags
, char *name
, caddr_t valuep
, int *lengthp
);
165 static int snap_getchunk(struct snapshot_id
*sidp
, chunknumber_t chunk
,
166 int offset
, int len
, char *buffer
);
169 /* fssnap interface implementations (see fssnap_if.h) */
170 static void fssnap_strategy_impl(void *, struct buf
*);
171 static void *fssnap_create_impl(chunknumber_t
, uint_t
, uoff_t
,
172 struct vnode
*, int, struct vnode
**, char *, uoff_t
);
173 static void fssnap_set_candidate_impl(void *, chunknumber_t
);
174 static int fssnap_is_candidate_impl(void *, uoff_t
);
175 static int fssnap_create_done_impl(void *);
176 static int fssnap_delete_impl(void *);
178 /* fssnap interface support routines */
179 static int fssnap_translate(struct snapshot_id
**, struct buf
*);
180 static void fssnap_write_taskq(void *);
181 static void fssnap_create_kstats(snapshot_id_t
*, int, const char *,
183 static int fssnap_update_kstat_num(kstat_t
*, int);
184 static void fssnap_delete_kstats(struct cow_info
*);
186 /* translation table prototypes */
187 static cow_map_node_t
*transtbl_add(cow_map_t
*, chunknumber_t
, caddr_t
);
188 static cow_map_node_t
*transtbl_get(cow_map_t
*, chunknumber_t
);
189 static void transtbl_delete(cow_map_t
*, cow_map_node_t
*);
190 static void transtbl_free(cow_map_t
*);
192 static kstat_t
*fssnap_highwater_kstat
;
194 /* ************************************************************************ */
196 /* Device and Module Structures */
198 static struct cb_ops snap_cb_ops
= {
203 nodev
, /* no snap_dump */
205 nodev
, /* no snap_write */
207 nodev
, /* no snap_devmap */
208 nodev
, /* no snap_mmap */
209 nodev
, /* no snap_segmap */
212 NULL
, /* streamtab */
213 D_64BIT
| D_NEW
| D_MP
, /* driver compatibility */
215 nodev
, /* async I/O read entry point */
216 nodev
/* async I/O write entry point */
219 static struct dev_ops snap_ops
= {
223 nulldev
, /* snap_identify obsolete */
224 nulldev
, /* no snap_probe */
227 nodev
, /* no snap_reset */
230 nulldev
, /* no snap_power() */
231 ddi_quiesce_not_needed
, /* quiesce */
234 extern struct mod_ops mod_driverops
;
236 static struct modldrv md
= {
237 &mod_driverops
, /* Type of module. This is a driver */
238 "snapshot driver", /* Name of the module */
242 static struct modlinkage ml
= {
255 kstat_named_t
*ksdata
;
257 error
= ddi_soft_state_init(&statep
, sizeof (struct snapshot_id
*), 1);
259 cmn_err(CE_WARN
, "_init: failed to init ddi_soft_state.");
263 error
= mod_install(&ml
);
266 cmn_err(CE_WARN
, "_init: failed to mod_install.");
267 ddi_soft_state_fini(&statep
);
272 * Fill in the snapshot operations vector for file systems
273 * (defined in fssnap_if.c)
276 snapops
.fssnap_create
= fssnap_create_impl
;
277 snapops
.fssnap_set_candidate
= fssnap_set_candidate_impl
;
278 snapops
.fssnap_is_candidate
= fssnap_is_candidate_impl
;
279 snapops
.fssnap_create_done
= fssnap_create_done_impl
;
280 snapops
.fssnap_delete
= fssnap_delete_impl
;
281 snapops
.fssnap_strategy
= fssnap_strategy_impl
;
283 mutex_init(&snapshot_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
286 * Initialize the fssnap highwater kstat
288 ksp
= kstat_create(snapname
, 0, FSSNAP_KSTAT_HIGHWATER
, "misc",
289 KSTAT_TYPE_NAMED
, 1, 0);
291 ksdata
= (kstat_named_t
*)ksp
->ks_data
;
292 kstat_named_init(ksdata
, FSSNAP_KSTAT_HIGHWATER
,
294 ksdata
->value
.ui32
= 0;
297 cmn_err(CE_WARN
, "_init: failed to create highwater kstat.");
299 fssnap_highwater_kstat
= ksp
;
305 _info(struct modinfo
*modinfop
)
307 return (mod_info(&ml
, modinfop
));
315 error
= mod_remove(&ml
);
318 ddi_soft_state_fini(&statep
);
321 * delete the fssnap highwater kstat
323 kstat_delete(fssnap_highwater_kstat
);
325 mutex_destroy(&snapshot_mutex
);
327 /* Clear out the file system operations vector */
328 snapops
.fssnap_create
= NULL
;
329 snapops
.fssnap_set_candidate
= NULL
;
330 snapops
.fssnap_create_done
= NULL
;
331 snapops
.fssnap_delete
= NULL
;
332 snapops
.fssnap_strategy
= NULL
;
337 /* ************************************************************************ */
340 * Snapshot Driver Routines
342 * This section implements the snapshot character and block drivers. The
343 * device will appear to be a consistent read-only file system to
344 * applications that wish to back it up or mount it. The snapshot driver
345 * communicates with the file system through the translation table, which
346 * tells the snapshot driver where to find the data necessary to piece
347 * together the frozen file system. The data may either be on the master
348 * device (no translation exists), in memory (a translation exists but has
349 * not been flushed to the backing store), or in the backing store file.
350 * The read request may require the snapshot driver to retrieve data from
351 * several different places and piece it together to look like a single
354 * The device minor number corresponds to the snapshot number in the list of
355 * snapshot identifiers. The soft state for each minor number is simply a
356 * pointer to the snapshot id, which holds all of the snapshot state. One
357 * minor number is designated as the control device. All snapshot create
358 * and delete requests go through the control device to ensure this module
359 * is properly loaded and attached before the file system starts calling
360 * routines defined here.
365 * snap_getinfo() - snapshot driver getinfo(9E) routine
370 snap_getinfo(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
373 case DDI_INFO_DEVT2DEVINFO
:
374 *result
= fssnap_dip
;
375 return (DDI_SUCCESS
);
376 case DDI_INFO_DEVT2INSTANCE
:
377 *result
= 0; /* we only have one instance */
378 return (DDI_SUCCESS
);
380 return (DDI_FAILURE
);
384 * snap_attach() - snapshot driver attach(9E) routine
386 * sets up snapshot control device and control state. The control state
387 * is a pointer to an "anonymous" snapshot_id for tracking opens and closes
390 snap_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
396 /* create the control device */
397 error
= ddi_create_priv_minor_node(dip
, SNAP_CTL_NODE
, S_IFCHR
,
398 SNAP_CTL_MINOR
, DDI_PSEUDO
, PRIVONLY_DEV
,
399 PRIV_SYS_CONFIG
, PRIV_SYS_CONFIG
, 0666);
400 if (error
== DDI_FAILURE
) {
401 return (DDI_FAILURE
);
404 rw_init(&snap_ctl
.sid_rwlock
, NULL
, RW_DEFAULT
, NULL
);
405 rw_enter(&snap_ctl
.sid_rwlock
, RW_WRITER
);
407 snap_ctl
.sid_snapnumber
= SNAP_CTL_MINOR
;
408 /* the control sid is not linked into the snapshot list */
409 snap_ctl
.sid_next
= NULL
;
410 snap_ctl
.sid_cowinfo
= NULL
;
411 snap_ctl
.sid_flags
= 0;
412 rw_exit(&snap_ctl
.sid_rwlock
);
415 return (DDI_SUCCESS
);
417 return (DDI_SUCCESS
);
420 return (DDI_SUCCESS
);
423 return (DDI_FAILURE
);
428 * snap_detach() - snapshot driver detach(9E) routine
430 * destroys snapshot control device and control state. If any snapshots
431 * are active (ie. num_snapshots != 0), the device will refuse to detach.
434 snap_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
436 struct snapshot_id
*sidp
, *sidnextp
;
440 /* do not detach if the device is active */
441 mutex_enter(&snapshot_mutex
);
442 if ((num_snapshots
!= 0) ||
443 ((snap_ctl
.sid_flags
& SID_CHAR_BUSY
) != 0)) {
444 mutex_exit(&snapshot_mutex
);
445 return (DDI_FAILURE
);
448 /* free up the snapshot list */
449 for (sidp
= snapshot
; sidp
!= NULL
; sidp
= sidnextp
) {
450 ASSERT(SID_AVAILABLE(sidp
) &&
451 !RW_LOCK_HELD(&sidp
->sid_rwlock
));
452 sidnextp
= sidp
->sid_next
;
453 rw_destroy(&sidp
->sid_rwlock
);
454 kmem_free(sidp
, sizeof (struct snapshot_id
));
458 /* delete the control device */
459 ddi_remove_minor_node(dip
, SNAP_CTL_NODE
);
462 ASSERT((snap_ctl
.sid_flags
& SID_CHAR_BUSY
) == 0);
463 rw_destroy(&snap_ctl
.sid_rwlock
);
464 mutex_exit(&snapshot_mutex
);
466 return (DDI_SUCCESS
);
469 return (DDI_FAILURE
);
474 * snap_open() - snapshot driver open(9E) routine
476 * marks the snapshot id as busy so it will not be recycled when deleted
477 * until the snapshot is closed.
481 snap_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred
)
484 struct snapshot_id
**sidpp
, *sidp
;
486 /* snapshots are read-only */
490 minor
= getminor(*devp
);
492 if (minor
== SNAP_CTL_MINOR
) {
493 /* control device must be opened exclusively */
494 if (((flag
& FEXCL
) != FEXCL
) || (otyp
!= OTYP_CHR
))
497 rw_enter(&snap_ctl
.sid_rwlock
, RW_WRITER
);
498 if ((snap_ctl
.sid_flags
& SID_CHAR_BUSY
) != 0) {
499 rw_exit(&snap_ctl
.sid_rwlock
);
503 snap_ctl
.sid_flags
|= SID_CHAR_BUSY
;
504 rw_exit(&snap_ctl
.sid_rwlock
);
509 sidpp
= ddi_get_soft_state(statep
, minor
);
510 if (sidpp
== NULL
|| *sidpp
== NULL
)
513 rw_enter(&sidp
->sid_rwlock
, RW_WRITER
);
515 if ((flag
& FEXCL
) && SID_BUSY(sidp
)) {
516 rw_exit(&sidp
->sid_rwlock
);
520 ASSERT(sidpp
!= NULL
&& sidp
!= NULL
);
521 /* check to see if this snapshot has been killed on us */
522 if (SID_INACTIVE(sidp
)) {
523 cmn_err(CE_WARN
, "snap_open: snapshot %d does not exist.",
525 rw_exit(&sidp
->sid_rwlock
);
531 sidp
->sid_flags
|= SID_CHAR_BUSY
;
534 sidp
->sid_flags
|= SID_BLOCK_BUSY
;
537 rw_exit(&sidp
->sid_rwlock
);
541 rw_exit(&sidp
->sid_rwlock
);
544 * at this point if a valid snapshot was found then it has
545 * been marked busy and we can use it.
551 * snap_close() - snapshot driver close(9E) routine
553 * unsets the busy bits in the snapshot id. If the snapshot has been
554 * deleted while the snapshot device was open, the close call will clean
555 * up the remaining state information.
559 snap_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred
)
561 struct snapshot_id
**sidpp
, *sidp
;
565 minor
= getminor(dev
);
567 /* if this is the control device, close it and return */
568 if (minor
== SNAP_CTL_MINOR
) {
569 rw_enter(&snap_ctl
.sid_rwlock
, RW_WRITER
);
570 snap_ctl
.sid_flags
&= ~(SID_CHAR_BUSY
);
571 rw_exit(&snap_ctl
.sid_rwlock
);
575 sidpp
= ddi_get_soft_state(statep
, minor
);
576 if (sidpp
== NULL
|| *sidpp
== NULL
) {
577 cmn_err(CE_WARN
, "snap_close: could not find state for "
578 "snapshot %d.", minor
);
582 mutex_enter(&snapshot_mutex
);
583 rw_enter(&sidp
->sid_rwlock
, RW_WRITER
);
585 /* Mark the snapshot as not being busy anymore */
588 sidp
->sid_flags
&= ~(SID_CHAR_BUSY
);
591 sidp
->sid_flags
&= ~(SID_BLOCK_BUSY
);
594 mutex_exit(&snapshot_mutex
);
595 rw_exit(&sidp
->sid_rwlock
);
599 if (SID_AVAILABLE(sidp
)) {
601 * if this is the last close on a snapshot that has been
602 * deleted, then free up the soft state. The snapdelete
603 * ioctl does not free this when the device is in use so
604 * we do it here after the last reference goes away.
607 /* remove the device nodes */
608 ASSERT(fssnap_dip
!= NULL
);
609 (void) snprintf(name
, sizeof (name
), "%d",
610 sidp
->sid_snapnumber
);
611 ddi_remove_minor_node(fssnap_dip
, name
);
612 (void) snprintf(name
, sizeof (name
), "%d,raw",
613 sidp
->sid_snapnumber
);
614 ddi_remove_minor_node(fssnap_dip
, name
);
616 /* delete the state structure */
617 ddi_soft_state_free(statep
, sidp
->sid_snapnumber
);
621 mutex_exit(&snapshot_mutex
);
622 rw_exit(&sidp
->sid_rwlock
);
628 * snap_read() - snapshot driver read(9E) routine
630 * reads data from the snapshot by calling snap_strategy() through physio()
634 snap_read(dev_t dev
, struct uio
*uiop
, cred_t
*credp
)
637 struct snapshot_id
**sidpp
;
639 minor
= getminor(dev
);
640 sidpp
= ddi_get_soft_state(statep
, minor
);
641 if (sidpp
== NULL
|| *sidpp
== NULL
) {
643 "snap_read: could not find state for snapshot %d.", minor
);
646 return (physio(snap_strategy
, NULL
, dev
, B_READ
, minphys
, uiop
));
650 * snap_strategy() - snapshot driver strategy(9E) routine
652 * cycles through each chunk in the requested buffer and calls
653 * snap_getchunk() on each chunk to retrieve it from the appropriate
654 * place. Once all of the parts are put together the requested buffer
655 * is returned. The snapshot driver is read-only, so a write is invalid.
658 snap_strategy(struct buf
*bp
)
660 struct snapshot_id
**sidpp
, *sidp
;
669 /* snapshot device is read-only */
670 if (bp
->b_flags
& B_WRITE
) {
672 bp
->b_resid
= bp
->b_bcount
;
677 minor
= getminor(bp
->b_edev
);
678 sidpp
= ddi_get_soft_state(statep
, minor
);
679 if (sidpp
== NULL
|| *sidpp
== NULL
) {
681 "snap_strategy: could not find state for snapshot %d.",
684 bp
->b_resid
= bp
->b_bcount
;
690 rw_enter(&sidp
->sid_rwlock
, RW_READER
);
692 if (SID_INACTIVE(sidp
)) {
694 bp
->b_resid
= bp
->b_bcount
;
696 rw_exit(&sidp
->sid_rwlock
);
700 if (bp
->b_flags
& (B_PAGEIO
|B_PHYS
))
703 bp
->b_resid
= bp
->b_bcount
;
704 ASSERT(bp
->b_un
.b_addr
);
705 buf
= bp
->b_un
.b_addr
;
707 chunksz
= sidp
->sid_cowinfo
->cow_map
.cmap_chunksz
;
709 /* reqptr is the current DEV_BSIZE offset into the device */
710 /* chunk is the chunk containing reqptr */
711 /* len is the length of the request (in the current chunk) in bytes */
712 /* off is the byte offset into the current chunk */
713 reqptr
= bp
->b_lblkno
;
714 while (bp
->b_resid
> 0) {
715 chunk
= dbtocowchunk(&sidp
->sid_cowinfo
->cow_map
, reqptr
);
716 off
= (reqptr
% (chunksz
>> DEV_BSHIFT
)) << DEV_BSHIFT
;
717 len
= min(chunksz
- off
, bp
->b_resid
);
718 ASSERT((off
+ len
) <= chunksz
);
720 if ((error
= snap_getchunk(sidp
, chunk
, off
, len
, buf
)) != 0) {
722 * EINVAL means the user tried to go out of range.
723 * Anything else means it's likely that we're
726 if (error
!= EINVAL
) {
727 cmn_err(CE_WARN
, "snap_strategy: error "
728 "calling snap_getchunk, chunk = %llu, "
729 "offset = %d, len = %d, resid = %lu, "
731 chunk
, off
, len
, bp
->b_resid
, error
);
735 rw_exit(&sidp
->sid_rwlock
);
739 reqptr
+= (len
>> DEV_BSHIFT
);
743 ASSERT(bp
->b_resid
== 0);
746 rw_exit(&sidp
->sid_rwlock
);
751 * snap_getchunk() - helper function for snap_strategy()
753 * gets the requested data from the appropriate place and fills in the
754 * buffer. chunk is the chunk number of the request, offset is the
755 * offset into that chunk and must be less than the chunk size. len is
756 * the length of the request starting at offset, and must not exceed a
757 * chunk boundary. buffer is the address to copy the data to. len
758 * bytes are copied into the buffer starting at the location specified.
760 * A chunk is located according to the following algorithm:
761 * - If the chunk does not have a translation or is not a candidate
762 * for translation, it is read straight from the master device.
763 * - If the chunk does have a translation, then it is either on
765 * o If it is in memory the requested data is simply copied out
766 * of the in-memory buffer.
767 * o If it is in the backing store, it is read from there.
769 * This function does the real work of the snapshot driver.
772 snap_getchunk(struct snapshot_id
*sidp
, chunknumber_t chunk
, int offset
,
773 int len
, char *buffer
)
775 cow_map_t
*cmap
= &sidp
->sid_cowinfo
->cow_map
;
783 ASSERT(RW_READ_HELD(&sidp
->sid_rwlock
));
784 ASSERT(offset
+ len
<= cmap
->cmap_chunksz
);
787 * Check if the chunk number is out of range and if so bail out
789 if (chunk
>= (cmap
->cmap_bmsize
* NBBY
)) {
794 * If the chunk is not a candidate for translation, then the chunk
795 * was not allocated when the snapshot was taken. Since it does
796 * not contain data associated with this snapshot, just return a
797 * zero buffer instead.
799 if (isclr(cmap
->cmap_candidate
, chunk
)) {
805 * if the chunk is a candidate for translation but a
806 * translation does not exist, then read through to the
807 * original file system. The rwlock is held until the read
808 * completes if it hasn't been translated to make sure the
809 * file system does not translate the block before we
810 * access it. If it has already been translated we don't
811 * need the lock, because the translation will never go away.
813 rw_enter(&cmap
->cmap_rwlock
, RW_READER
);
814 if (isclr(cmap
->cmap_hastrans
, chunk
)) {
815 snapbuf
= getrbuf(KM_SLEEP
);
817 * Reading into the buffer saves having to do a copy,
818 * but gets tricky if the request size is not a
819 * multiple of DEV_BSIZE. However, we are filling the
820 * buffer left to right, so future reads will write
821 * over any extra data we might have read.
824 partial
= len
% DEV_BSIZE
;
826 snapbuf
->b_bcount
= len
;
827 snapbuf
->b_lblkno
= lbtodb(chunk
* cmap
->cmap_chunksz
+ offset
);
828 snapbuf
->b_un
.b_addr
= buffer
;
830 snapbuf
->b_iodone
= NULL
;
831 snapbuf
->b_proc
= NULL
; /* i.e. the kernel */
832 snapbuf
->b_flags
= B_READ
| B_BUSY
;
833 snapbuf
->b_edev
= sidp
->sid_fvp
->v_vfsp
->vfs_dev
;
837 * Partial block read in progress.
838 * This is bad as modules further down the line
839 * assume buf's are exact multiples of DEV_BSIZE
840 * and we end up with fewer, or zero, bytes read.
841 * To get round this we need to round up to the
842 * nearest full block read and then return only
845 newlen
= (len
- partial
) + DEV_BSIZE
;
846 newbuffer
= kmem_alloc(newlen
, KM_SLEEP
);
848 snapbuf
->b_bcount
= newlen
;
849 snapbuf
->b_un
.b_addr
= newbuffer
;
852 (void) bdev_strategy(snapbuf
);
853 (void) biowait(snapbuf
);
855 error
= geterror(snapbuf
);
859 * Partial block read. Now we need to bcopy the
860 * correct number of bytes back into the
861 * supplied buffer, and tidy up our temp
864 bcopy(newbuffer
, buffer
, len
);
865 kmem_free(newbuffer
, newlen
);
869 rw_exit(&cmap
->cmap_rwlock
);
875 * finally, if the chunk is a candidate for translation and it
876 * has been translated, then we clone the chunk of the buffer
877 * that was copied aside by the file system.
878 * The cmap_rwlock does not need to be held after we know the
879 * data has already been copied. Once a chunk has been copied
880 * to the backing file, it is stable read only data.
882 cmn
= transtbl_get(cmap
, chunk
);
884 /* check whether the data is in memory or in the backing file */
886 ASSERT(cmn
->cmn_buf
);
887 /* already in memory */
888 bcopy(cmn
->cmn_buf
+ offset
, buffer
, len
);
889 rw_exit(&cmap
->cmap_rwlock
);
894 * can cause deadlock with writer if we don't drop the
895 * cmap_rwlock before trying to get the backing store file
898 rw_exit(&cmap
->cmap_rwlock
);
900 bf_index
= chunk
/ cmap
->cmap_chunksperbf
;
902 /* read buffer from backing file */
903 error
= vn_rdwr(UIO_READ
,
904 (sidp
->sid_cowinfo
->cow_backfile_array
)[bf_index
],
905 buffer
, len
, ((chunk
% cmap
->cmap_chunksperbf
) *
906 cmap
->cmap_chunksz
) + offset
, UIO_SYSSPACE
, 0,
907 RLIM_INFINITY
, kcred
, &resid
);
914 * snap_print() - snapshot driver print(9E) routine
916 * prints the device identification string.
919 snap_print(dev_t dev
, char *str
)
921 struct snapshot_id
**sidpp
;
924 minor
= getminor(dev
);
925 sidpp
= ddi_get_soft_state(statep
, minor
);
926 if (sidpp
== NULL
|| *sidpp
== NULL
) {
928 "snap_print: could not find state for snapshot %d.", minor
);
932 cmn_err(CE_NOTE
, "snap_print: snapshot %d: %s", minor
, str
);
938 * snap_prop_op() - snapshot driver prop_op(9E) routine
940 * get 32-bit and 64-bit values for size (character driver) and nblocks
944 snap_prop_op(dev_t dev
, dev_info_t
*dip
, ddi_prop_op_t prop_op
,
945 int flags
, char *name
, caddr_t valuep
, int *lengthp
)
948 struct snapshot_id
**sidpp
;
953 minor
= getminor(dev
);
956 * If this is the control device just check for .conf properties,
957 * if the wildcard DDI_DEV_T_ANY was passed in via the dev_t
958 * just fall back to the defaults.
960 if ((minor
== SNAP_CTL_MINOR
) || (dev
== DDI_DEV_T_ANY
))
961 return (ddi_prop_op(dev
, dip
, prop_op
, flags
, name
,
964 /* check to see if there is a master device plumbed */
965 sidpp
= ddi_get_soft_state(statep
, minor
);
966 if (sidpp
== NULL
|| *sidpp
== NULL
) {
968 "snap_prop_op: could not find state for "
969 "snapshot %d.", minor
);
970 return (DDI_PROP_NOT_FOUND
);
973 if (((*sidpp
)->sid_fvp
== NULL
) || ((*sidpp
)->sid_fvp
->v_vfsp
== NULL
))
974 return (ddi_prop_op(dev
, dip
, prop_op
, flags
, name
,
977 /* hold master device and pass operation down */
978 mdev
= (*sidpp
)->sid_fvp
->v_vfsp
->vfs_dev
;
979 if (mdip
= e_ddi_hold_devi_by_dev(mdev
, 0)) {
981 /* get size information from the master device. */
982 error
= cdev_prop_op(mdev
, mdip
,
983 prop_op
, flags
, name
, valuep
, lengthp
);
984 ddi_release_devi(mdip
);
985 if (error
== DDI_PROP_SUCCESS
)
989 /* master device did not service the request, try framework */
990 return (ddi_prop_op(dev
, dip
, prop_op
, flags
, name
, valuep
, lengthp
));
995 * snap_ioctl() - snapshot driver ioctl(9E) routine
997 * only applies to the control device. The control device accepts two
998 * ioctl requests: create a snapshot or delete a snapshot. In either
999 * case, the vnode for the requested file system is extracted, and the
1000 * request is passed on to the file system via the same ioctl. The file
1001 * system is responsible for doing the things necessary for creating or
1002 * destroying a snapshot, including any file system specific operations
1003 * that must be performed as well as setting up and deleting the snapshot
1004 * state through the fssnap interfaces.
1007 snap_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
, cred_t
*credp
,
1013 minor
= getminor(dev
);
1015 if (minor
!= SNAP_CTL_MINOR
) {
1020 case _FIOSNAPSHOTCREATE
:
1022 struct fiosnapcreate fc
;
1026 if (ddi_copyin((void *)arg
, &fc
, sizeof (fc
), mode
))
1029 /* get vnode for file system mount point */
1030 if ((fp
= getf(fc
.rootfiledesc
)) == NULL
)
1033 ASSERT(fp
->f_vnode
);
1036 releasef(fc
.rootfiledesc
);
1038 /* pass ioctl request to file system */
1039 error
= fop_ioctl(vp
, cmd
, arg
, 0, credp
, rvalp
, NULL
);
1043 case _FIOSNAPSHOTCREATE_MULTI
:
1045 struct fiosnapcreate_multi fc
;
1049 if (ddi_copyin((void *)arg
, &fc
, sizeof (fc
), mode
))
1052 /* get vnode for file system mount point */
1053 if ((fp
= getf(fc
.rootfiledesc
)) == NULL
)
1056 ASSERT(fp
->f_vnode
);
1059 releasef(fc
.rootfiledesc
);
1061 /* pass ioctl request to file system */
1062 error
= fop_ioctl(vp
, cmd
, arg
, 0, credp
, rvalp
, NULL
);
1066 case _FIOSNAPSHOTDELETE
:
1069 struct fiosnapdelete fc
;
1070 snapshot_id_t
*sidp
= NULL
;
1071 snapshot_id_t
*sidnextp
= NULL
;
1072 struct file
*fp
= NULL
;
1073 struct vnode
*vp
= NULL
;
1074 struct vfs
*vfsp
= NULL
;
1075 const struct vfsops
*vfsops
= &EIO_vfsops
;
1077 if (ddi_copyin((void *)arg
, &fc
, sizeof (fc
), mode
))
1080 /* get vnode for file system mount point */
1081 if ((fp
= getf(fc
.rootfiledesc
)) == NULL
)
1084 ASSERT(fp
->f_vnode
);
1087 releasef(fc
.rootfiledesc
);
1089 * Test for two formats of delete and set correct minor/vp:
1091 * fssnap -d [/dev/fssnap/x]
1094 * fssnap -d [/mntpt]
1095 * Note that minor is verified to be equal to SNAP_CTL_MINOR
1096 * at this point which is an invalid minor number.
1098 ASSERT(fssnap_dip
!= NULL
);
1099 major
= ddi_driver_major(fssnap_dip
);
1100 mutex_enter(&snapshot_mutex
);
1101 for (sidp
= snapshot
; sidp
!= NULL
; sidp
= sidnextp
) {
1102 rw_enter(&sidp
->sid_rwlock
, RW_READER
);
1103 sidnextp
= sidp
->sid_next
;
1104 /* pseudo device: */
1105 if (major
== getmajor(vp
->v_rdev
)) {
1106 minor
= getminor(vp
->v_rdev
);
1107 if (sidp
->sid_snapnumber
== (uint_t
)minor
&&
1112 rw_exit(&sidp
->sid_rwlock
);
1117 if (sidp
->sid_fvp
== vp
) {
1118 minor
= sidp
->sid_snapnumber
;
1119 rw_exit(&sidp
->sid_rwlock
);
1123 rw_exit(&sidp
->sid_rwlock
);
1125 mutex_exit(&snapshot_mutex
);
1126 /* Verify minor got set correctly above */
1127 if (minor
== SNAP_CTL_MINOR
) {
1131 dev
= makedevice(major
, minor
);
1133 * Create dummy vfs entry
1134 * to use as a locking semaphore across the IOCTL
1135 * for mount in progress cases...
1137 vfsp
= vfs_alloc(KM_SLEEP
);
1138 VFS_INIT(vfsp
, vfsops
, NULL
);
1140 vfs_addmip(dev
, vfsp
);
1141 if ((vfs_devmounting(dev
, vfsp
)) ||
1142 (vfs_devismounted(dev
))) {
1149 * Nobody mounted but do not release mount in progress lock
1150 * until IOCTL complete to prohibit a mount sneaking
1153 error
= fop_ioctl(vp
, cmd
, arg
, 0, credp
, rvalp
, NULL
);
1160 cmn_err(CE_WARN
, "snap_ioctl: Invalid ioctl cmd %d, minor %d.",
1169 /* ************************************************************************ */
1172 * Translation Table Routines
1174 * These support routines implement a simple doubly linked list
1175 * to keep track of chunks that are currently in memory. The maximum
1176 * size of the list is determined by the fssnap_max_mem_chunks variable.
1177 * The cmap_rwlock is used to protect the linkage of the list.
1181 * transtbl_add() - add a node to the translation table
1183 * allocates a new node and points it at the buffer passed in. The node
1184 * is added to the beginning of the doubly linked list and the head of
1185 * the list is moved. The cmap_rwlock must be held as a writer through
1188 static cow_map_node_t
*
1189 transtbl_add(cow_map_t
*cmap
, chunknumber_t chunk
, caddr_t buf
)
1191 cow_map_node_t
*cmnode
;
1193 ASSERT(RW_WRITE_HELD(&cmap
->cmap_rwlock
));
1195 cmnode
= kmem_alloc(sizeof (cow_map_node_t
), KM_SLEEP
);
1198 * insert new translations at the beginning so cmn_table is always
1201 cmnode
->cmn_chunk
= chunk
;
1202 cmnode
->cmn_buf
= buf
;
1203 cmnode
->cmn_prev
= NULL
;
1204 cmnode
->cmn_next
= cmap
->cmap_table
;
1205 if (cmnode
->cmn_next
)
1206 cmnode
->cmn_next
->cmn_prev
= cmnode
;
1207 cmap
->cmap_table
= cmnode
;
1213 * transtbl_get() - look up a node in the translation table
1215 * called by the snapshot driver to find data that has been translated.
1216 * The lookup is done by the chunk number, and the node is returned.
1217 * If the node was not found, NULL is returned.
1219 static cow_map_node_t
*
1220 transtbl_get(cow_map_t
*cmap
, chunknumber_t chunk
)
1222 cow_map_node_t
*cmn
;
1224 ASSERT(RW_READ_HELD(&cmap
->cmap_rwlock
));
1227 /* search the translation table */
1228 for (cmn
= cmap
->cmap_table
; cmn
!= NULL
; cmn
= cmn
->cmn_next
) {
1229 if (cmn
->cmn_chunk
== chunk
)
1238 * transtbl_delete() - delete a node from the translation table
1240 * called when a node's data has been written out to disk. The
1241 * cmap_rwlock must be held as a writer for this operation. If the node
1242 * being deleted is the head of the list, then the head is moved to the
1243 * next node. Both the node's data and the node itself are freed.
1246 transtbl_delete(cow_map_t
*cmap
, cow_map_node_t
*cmn
)
1248 ASSERT(RW_WRITE_HELD(&cmap
->cmap_rwlock
));
1250 ASSERT(cmap
->cmap_table
);
1252 /* if the head of the list is being deleted, then move the head up */
1253 if (cmap
->cmap_table
== cmn
) {
1254 ASSERT(cmn
->cmn_prev
== NULL
);
1255 cmap
->cmap_table
= cmn
->cmn_next
;
1259 /* make previous node's next pointer skip over current node */
1260 if (cmn
->cmn_prev
!= NULL
) {
1261 ASSERT(cmn
->cmn_prev
->cmn_next
== cmn
);
1262 cmn
->cmn_prev
->cmn_next
= cmn
->cmn_next
;
1265 /* make next node's previous pointer skip over current node */
1266 if (cmn
->cmn_next
!= NULL
) {
1267 ASSERT(cmn
->cmn_next
->cmn_prev
== cmn
);
1268 cmn
->cmn_next
->cmn_prev
= cmn
->cmn_prev
;
1271 /* free the data and the node */
1272 ASSERT(cmn
->cmn_buf
);
1273 kmem_free(cmn
->cmn_buf
, cmap
->cmap_chunksz
);
1274 kmem_free(cmn
, sizeof (cow_map_node_t
));
1278 * transtbl_free() - free the entire translation table
1280 * called when the snapshot is deleted. This frees all of the nodes in
1281 * the translation table (but not the bitmaps).
1284 transtbl_free(cow_map_t
*cmap
)
1286 cow_map_node_t
*curnode
;
1287 cow_map_node_t
*tempnode
;
1289 for (curnode
= cmap
->cmap_table
; curnode
!= NULL
; curnode
= tempnode
) {
1290 tempnode
= curnode
->cmn_next
;
1292 kmem_free(curnode
->cmn_buf
, cmap
->cmap_chunksz
);
1293 kmem_free(curnode
, sizeof (cow_map_node_t
));
1298 /* ************************************************************************ */
1301 * Interface Implementation Routines
1303 * The following functions implement snapshot interface routines that are
1304 * called by the file system to create, delete, and use a snapshot. The
1305 * interfaces are defined in fssnap_if.c and are filled in by this driver
1306 * when it is loaded. This technique allows the file system to depend on
1307 * the interface module without having to load the full implementation and
1308 * snapshot device drivers.
1312 * fssnap_strategy_impl() - strategy routine called by the file system
1314 * called by the file system to handle copy-on-write when necessary. All
1315 * reads and writes that the file system performs should go through this
1316 * function. If the file system calls the underlying device's strategy
1317 * routine without going through fssnap_strategy() (eg. by calling
1318 * bdev_strategy()), the snapshot may not be consistent.
1320 * This function starts by doing significant sanity checking to insure
1321 * the snapshot was not deleted out from under it or deleted and then
1322 * recreated. To do this, it checks the actual pointer passed into it
1323 * (ie. the handle held by the file system). NOTE that the parameter is
1324 * a POINTER TO A POINTER to the snapshot id. Once the snapshot id is
1325 * locked, it knows things are ok and that this snapshot is really for
1328 * If the request is a write, fssnap_translate() is called to determine
1329 * whether a copy-on-write is required. If it is a read, the read is
1330 * simply passed on to the underlying device.
1333 fssnap_strategy_impl(void *snapshot_id
, buf_t
*bp
)
1335 struct snapshot_id
**sidpp
;
1336 struct snapshot_id
*sidp
;
1339 /* read requests are always passed through */
1340 if (bp
->b_flags
& B_READ
) {
1341 (void) bdev_strategy(bp
);
1346 * Because we were not able to take the snapshot read lock BEFORE
1347 * checking for a snapshot back in the file system, things may have
1348 * drastically changed out from under us. For instance, the snapshot
1349 * may have been deleted, deleted and recreated, or worse yet, deleted
1350 * for this file system but now the snapshot number is in use by another
1353 * Having a pointer to the file system's snapshot id pointer allows us
1354 * to sanity check most of this, though it assumes the file system is
1355 * keeping track of a pointer to the snapshot_id somewhere.
1357 sidpp
= (struct snapshot_id
**)snapshot_id
;
1361 * if this file system's snapshot was disabled, just pass the
1365 (void) bdev_strategy(bp
);
1370 * Once we have the reader lock the snapshot will not magically go
1371 * away. But things may have changed on us before this so double check.
1373 rw_enter(&sidp
->sid_rwlock
, RW_READER
);
1376 * if an error was founds somewhere the DELETE flag will be
1377 * set to indicate the snapshot should be deleted and no new
1378 * translations should occur.
1380 if (sidp
->sid_flags
& SID_DELETE
) {
1381 rw_exit(&sidp
->sid_rwlock
);
1382 (void) fssnap_delete_impl(sidpp
);
1383 (void) bdev_strategy(bp
);
1388 * If the file system is no longer pointing to the snapshot we were
1389 * called with, then it should not attempt to translate this buffer as
1390 * it may be going to a snapshot for a different file system.
1391 * Even if the file system snapshot pointer is still the same, the
1392 * snapshot may have been disabled before we got the reader lock.
1394 if (sidp
!= *sidpp
|| SID_INACTIVE(sidp
)) {
1395 rw_exit(&sidp
->sid_rwlock
);
1396 (void) bdev_strategy(bp
);
1401 * At this point we're sure the snapshot will not go away while the
1402 * reader lock is held, and we are reasonably certain that we are
1403 * writing to the correct snapshot.
1405 if ((error
= fssnap_translate(sidpp
, bp
)) != 0) {
1407 * fssnap_translate can release the reader lock if it
1408 * has to wait for a semaphore. In this case it is possible
1409 * for the snapshot to be deleted in this time frame. If this
1410 * happens just sent the buf thru to the filesystems device.
1412 if (sidp
!= *sidpp
|| SID_INACTIVE(sidp
)) {
1413 rw_exit(&sidp
->sid_rwlock
);
1414 (void) bdev_strategy(bp
);
1417 bioerror(bp
, error
);
1420 rw_exit(&sidp
->sid_rwlock
);
1424 * fssnap_translate() - helper function for fssnap_strategy()
1426 * performs the actual copy-on-write for write requests, if required.
1427 * This function does the real work of the file system side of things.
1429 * It first checks the candidate bitmap to quickly determine whether any
1430 * action is necessary. If the candidate bitmap indicates the chunk was
1431 * allocated when the snapshot was created, then it checks to see whether
1432 * a translation already exists. If a translation already exists then no
1433 * action is required. If the chunk is a candidate for copy-on-write,
1434 * and a translation does not already exist, then the chunk is read in
1435 * and a node is added to the translation table.
1437 * Once all of the chunks in the request range have been copied (if they
1438 * needed to be), then the original request can be satisfied and the old
1439 * data can be overwritten.
1442 fssnap_translate(struct snapshot_id
**sidpp
, struct buf
*wbp
)
1444 snapshot_id_t
*sidp
= *sidpp
;
1445 struct buf
*oldbp
; /* buffer to store old data in */
1446 struct cow_info
*cowp
= sidp
->sid_cowinfo
;
1447 cow_map_t
*cmap
= &cowp
->cow_map
;
1448 cow_map_node_t
*cmn
;
1449 chunknumber_t cowchunk
, startchunk
, endchunk
;
1451 int throttle_write
= 0;
1453 /* make sure the snapshot is active */
1454 ASSERT(RW_READ_HELD(&sidp
->sid_rwlock
));
1456 startchunk
= dbtocowchunk(cmap
, wbp
->b_lblkno
);
1457 endchunk
= dbtocowchunk(cmap
, wbp
->b_lblkno
+
1458 ((wbp
->b_bcount
-1) >> DEV_BSHIFT
));
1461 * Do not throttle the writes of the fssnap taskq thread and
1462 * the log roll (trans_roll) thread. Furthermore the writes to
1463 * the on-disk log are also not subject to throttling.
1464 * The fssnap_write_taskq thread's write can block on the throttling
1465 * semaphore which leads to self-deadlock as this same thread
1466 * releases the throttling semaphore after completing the IO.
1467 * If the trans_roll thread's write is throttled then we can deadlock
1468 * because the fssnap_taskq_thread which releases the throttling
1469 * semaphore can block waiting for log space which can only be
1470 * released by the trans_roll thread.
1473 throttle_write
= !(taskq_member(cowp
->cow_taskq
, curthread
) ||
1474 tsd_get(bypass_snapshot_throttle_key
));
1477 * Iterate through all chunks covered by this write and perform the
1478 * copy-aside if necessary. Once all chunks have been safely
1479 * stowed away, the new data may be written in a single sweep.
1481 * For each chunk in the range, the following sequence is performed:
1482 * - Is the chunk a candidate for translation?
1483 * o If not, then no translation is necessary, continue
1484 * - If it is a candidate, then does it already have a translation?
1485 * o If so, then no translation is necessary, continue
1486 * - If it is a candidate, but does not yet have a translation,
1487 * then read the old data and schedule an asynchronous taskq
1488 * to write the old data to the backing file.
1490 * Once this has been performed over the entire range of chunks, then
1491 * it is safe to overwrite the data that is there.
1493 * Note that no lock is required to check the candidate bitmap because
1494 * it never changes once the snapshot is created. The reader lock is
1495 * taken to check the hastrans bitmap since it may change. If it
1496 * turns out a copy is required, then the lock is upgraded to a
1497 * writer, and the bitmap is re-checked as it may have changed while
1498 * the lock was released. Finally, the write lock is held while
1499 * reading the old data to make sure it is not translated out from
1502 * This locking mechanism should be sufficient to handle multiple
1503 * threads writing to overlapping chunks simultaneously.
1505 for (cowchunk
= startchunk
; cowchunk
<= endchunk
; cowchunk
++) {
1507 * If the cowchunk is outside of the range of our
1508 * candidate maps, then simply break out of the
1509 * loop and pass the I/O through to bdev_strategy.
1510 * This would occur if the file system has grown
1511 * larger since the snapshot was taken.
1513 if (cowchunk
>= (cmap
->cmap_bmsize
* NBBY
))
1517 * If no disk blocks were allocated in this chunk when the
1518 * snapshot was created then no copy-on-write will be
1519 * required. Since this bitmap is read-only no locks are
1522 if (isclr(cmap
->cmap_candidate
, cowchunk
)) {
1527 * If a translation already exists, the data can be written
1528 * through since the old data has already been saved off.
1530 if (isset(cmap
->cmap_hastrans
, cowchunk
)) {
1536 * Throttle translations if there are too many outstanding
1537 * chunks in memory. The semaphore is sema_v'd by the taskq.
1539 * You can't keep the sid_rwlock if you would go to sleep.
1540 * This will result in deadlock when someone tries to delete
1541 * the snapshot (wants the sid_rwlock as a writer, but can't
1544 if (throttle_write
) {
1545 if (sema_tryp(&cmap
->cmap_throttle_sem
) == 0) {
1546 rw_exit(&sidp
->sid_rwlock
);
1547 atomic_inc_32(&cmap
->cmap_waiters
);
1548 sema_p(&cmap
->cmap_throttle_sem
);
1549 atomic_dec_32(&cmap
->cmap_waiters
);
1550 rw_enter(&sidp
->sid_rwlock
, RW_READER
);
1553 * Now since we released the sid_rwlock the state may
1554 * have transitioned underneath us. so check that again.
1556 if (sidp
!= *sidpp
|| SID_INACTIVE(sidp
)) {
1557 sema_v(&cmap
->cmap_throttle_sem
);
1564 * Acquire the lock as a writer and check to see if a
1565 * translation has been added in the meantime.
1567 rw_enter(&cmap
->cmap_rwlock
, RW_WRITER
);
1568 if (isset(cmap
->cmap_hastrans
, cowchunk
)) {
1570 sema_v(&cmap
->cmap_throttle_sem
);
1571 rw_exit(&cmap
->cmap_rwlock
);
1572 continue; /* go to the next chunk */
1576 * read a full chunk of data from the requested offset rounded
1577 * down to the nearest chunk size.
1579 oldbp
= getrbuf(KM_SLEEP
);
1580 oldbp
->b_lblkno
= cowchunktodb(cmap
, cowchunk
);
1581 oldbp
->b_edev
= wbp
->b_edev
;
1582 oldbp
->b_bcount
= cmap
->cmap_chunksz
;
1583 oldbp
->b_bufsize
= cmap
->cmap_chunksz
;
1584 oldbp
->b_iodone
= NULL
;
1585 oldbp
->b_proc
= NULL
;
1586 oldbp
->b_flags
= B_READ
;
1587 oldbp
->b_un
.b_addr
= kmem_alloc(cmap
->cmap_chunksz
, KM_SLEEP
);
1589 (void) bdev_strategy(oldbp
);
1590 (void) biowait(oldbp
);
1593 * It's ok to bail in the middle of translating the range
1594 * because the extra copy-asides will not hurt anything
1595 * (except by using extra space in the backing store).
1597 if ((error
= geterror(oldbp
)) != 0) {
1598 cmn_err(CE_WARN
, "fssnap_translate: error reading "
1599 "old data for snapshot %d, chunk %llu, disk block "
1600 "%lld, size %lu, error %d.", sidp
->sid_snapnumber
,
1601 cowchunk
, oldbp
->b_lblkno
, oldbp
->b_bcount
, error
);
1602 kmem_free(oldbp
->b_un
.b_addr
, cmap
->cmap_chunksz
);
1604 rw_exit(&cmap
->cmap_rwlock
);
1606 sema_v(&cmap
->cmap_throttle_sem
);
1611 * add the node to the translation table and save a reference
1612 * to pass to the taskq for writing out to the backing file
1614 cmn
= transtbl_add(cmap
, cowchunk
, oldbp
->b_un
.b_addr
);
1618 * Add a reference to the snapshot id so the lower level
1619 * processing (ie. the taskq) can get back to the state
1622 cmn
->cmn_sid
= sidp
;
1623 cmn
->release_sem
= throttle_write
;
1624 setbit(cmap
->cmap_hastrans
, cowchunk
);
1626 rw_exit(&cmap
->cmap_rwlock
);
1629 * schedule the asynchronous write to the backing file
1631 if (cowp
->cow_backfile_array
!= NULL
)
1632 (void) taskq_dispatch(cowp
->cow_taskq
,
1633 fssnap_write_taskq
, cmn
, TQ_SLEEP
);
1637 * Write new data in place of the old data. At this point all of the
1638 * chunks touched by this write have been copied aside and so the new
1639 * data can be written out all at once.
1641 (void) bdev_strategy(wbp
);
1647 * fssnap_write_taskq() - write in-memory translations to the backing file
1649 * writes in-memory translations to the backing file asynchronously. A
1650 * task is dispatched each time a new translation is created. The task
1651 * writes the data to the backing file and removes it from the memory
1652 * list. The throttling semaphore is released only if the particular
1653 * translation was throttled in fssnap_translate.
1656 fssnap_write_taskq(void *arg
)
1658 cow_map_node_t
*cmn
= (cow_map_node_t
*)arg
;
1659 snapshot_id_t
*sidp
= cmn
->cmn_sid
;
1660 cow_info_t
*cowp
= sidp
->sid_cowinfo
;
1661 cow_map_t
*cmap
= &cowp
->cow_map
;
1664 int release_sem
= cmn
->release_sem
;
1667 * The sid_rwlock does not need to be held here because the taskqs
1668 * are destroyed explicitly by fssnap_delete (with the sid_rwlock
1669 * held as a writer). taskq_destroy() will flush all of the tasks
1670 * out before fssnap_delete frees up all of the structures.
1673 /* if the snapshot was disabled from under us, drop the request. */
1674 rw_enter(&sidp
->sid_rwlock
, RW_READER
);
1675 if (SID_INACTIVE(sidp
)) {
1676 rw_exit(&sidp
->sid_rwlock
);
1678 sema_v(&cmap
->cmap_throttle_sem
);
1681 rw_exit(&sidp
->sid_rwlock
);
1683 atomic_inc_64((uint64_t *)&cmap
->cmap_nchunks
);
1685 if ((cmap
->cmap_maxsize
!= 0) &&
1686 ((cmap
->cmap_nchunks
* cmap
->cmap_chunksz
) > cmap
->cmap_maxsize
)) {
1687 cmn_err(CE_WARN
, "fssnap_write_taskq: snapshot %d (%s) has "
1688 "reached the maximum backing file size specified (%llu "
1689 "bytes) and will be deleted.", sidp
->sid_snapnumber
,
1690 (char *)cowp
->cow_kstat_mntpt
->ks_data
,
1691 cmap
->cmap_maxsize
);
1693 sema_v(&cmap
->cmap_throttle_sem
);
1694 atomic_or_uint(&sidp
->sid_flags
, SID_DELETE
);
1698 /* perform the write */
1699 bf_index
= cmn
->cmn_chunk
/ cmap
->cmap_chunksperbf
;
1701 if (error
= vn_rdwr(UIO_WRITE
, (cowp
->cow_backfile_array
)[bf_index
],
1702 cmn
->cmn_buf
, cmap
->cmap_chunksz
,
1703 (cmn
->cmn_chunk
% cmap
->cmap_chunksperbf
) * cmap
->cmap_chunksz
,
1704 UIO_SYSSPACE
, 0, RLIM_INFINITY
, kcred
, NULL
)) {
1705 cmn_err(CE_WARN
, "fssnap_write_taskq: error writing to "
1706 "backing file. DELETING SNAPSHOT %d, backing file path "
1707 "%s, offset %llu bytes, error %d.", sidp
->sid_snapnumber
,
1708 (char *)cowp
->cow_kstat_bfname
->ks_data
,
1709 cmn
->cmn_chunk
* cmap
->cmap_chunksz
, error
);
1711 sema_v(&cmap
->cmap_throttle_sem
);
1712 atomic_or_uint(&sidp
->sid_flags
, SID_DELETE
);
1717 * now remove the node and buffer from memory
1719 rw_enter(&cmap
->cmap_rwlock
, RW_WRITER
);
1720 transtbl_delete(cmap
, cmn
);
1721 rw_exit(&cmap
->cmap_rwlock
);
1723 /* Allow more translations */
1725 sema_v(&cmap
->cmap_throttle_sem
);
1730 * fssnap_create_impl() - called from the file system to create a new snapshot
1732 * allocates and initializes the structures needed for a new snapshot.
1733 * This is called by the file system when it receives an ioctl request to
1734 * create a new snapshot. An unused snapshot identifier is either found
1735 * or created, and eventually returned as the opaque handle the file
1736 * system will use to identify this snapshot. The snapshot number
1737 * associated with the snapshot identifier is the same as the minor
1738 * number for the snapshot device that is used to access that snapshot.
1740 * The snapshot can not be used until the candidate bitmap is populated
1741 * by the file system (see fssnap_set_candidate_impl()), and the file
1742 * system finishes the setup process by calling fssnap_create_done().
1743 * Nearly all of the snapshot locks are held for the duration of the
1744 * create, and are not released until fssnap_create_done is called().
1747 fssnap_create_impl(chunknumber_t nchunks
, uint_t chunksz
, uoff_t maxsize
,
1748 struct vnode
*fsvp
, int backfilecount
, struct vnode
**bfvpp
, char *backpath
,
1749 uoff_t max_backfile_size
)
1751 refstr_t
*mountpoint
;
1753 struct cow_info
*cowp
;
1754 struct cow_map
*cmap
;
1755 struct snapshot_id
*sidp
;
1759 * Sanity check the parameters we care about
1760 * (we don't care about the informational parameters)
1762 if ((nchunks
== 0) ||
1763 ((chunksz
% DEV_BSIZE
) != 0) ||
1769 * Look for unused snapshot identifiers. Snapshot ids are never
1770 * freed, but deleted snapshot ids will be recycled as needed.
1772 mutex_enter(&snapshot_mutex
);
1776 for (sidp
= snapshot
; sidp
!= NULL
; sidp
= sidp
->sid_next
) {
1777 if (sidp
->sid_snapnumber
> lastsnap
)
1778 lastsnap
= sidp
->sid_snapnumber
;
1781 * The sid_rwlock is taken as a reader initially so that
1782 * activity on each snapshot is not stalled while searching
1783 * for a free snapshot id.
1785 rw_enter(&sidp
->sid_rwlock
, RW_READER
);
1788 * If the snapshot has been deleted and nobody is using the
1789 * snapshot device than we can reuse this snapshot_id. If
1790 * the snapshot is marked to be deleted (SID_DELETE), then
1791 * it hasn't been deleted yet so don't reuse it.
1793 if (SID_AVAILABLE(sidp
))
1794 break; /* This spot is unused, so take it */
1795 rw_exit(&sidp
->sid_rwlock
);
1799 * add a new snapshot identifier if there are no deleted
1800 * entries. Since it doesn't matter what order the entries
1801 * are in we can just add it to the beginning of the list.
1804 if (rw_tryupgrade(&sidp
->sid_rwlock
) == 0) {
1805 /* someone else grabbed it as a writer, try again */
1806 rw_exit(&sidp
->sid_rwlock
);
1810 /* Create a new node if we didn't find an unused one */
1811 sidp
= kmem_alloc(sizeof (struct snapshot_id
), KM_SLEEP
);
1812 rw_init(&sidp
->sid_rwlock
, NULL
, RW_DEFAULT
, NULL
);
1813 rw_enter(&sidp
->sid_rwlock
, RW_WRITER
);
1814 sidp
->sid_snapnumber
= (snapshot
== NULL
) ? 0 : lastsnap
+ 1;
1815 sidp
->sid_cowinfo
= NULL
;
1816 sidp
->sid_flags
= 0;
1817 sidp
->sid_next
= snapshot
;
1821 ASSERT(RW_WRITE_HELD(&sidp
->sid_rwlock
));
1822 ASSERT(sidp
->sid_cowinfo
== NULL
);
1823 ASSERT(sidp
->sid_snapnumber
<= (lastsnap
+ 1));
1825 sidp
->sid_flags
|= SID_CREATING
;
1826 /* The root vnode is held until snap_delete_impl() is called */
1828 sidp
->sid_fvp
= fsvp
;
1831 /* allocate and initialize structures */
1833 cowp
= kmem_zalloc(sizeof (struct cow_info
), KM_SLEEP
);
1835 cowp
->cow_backfile_array
= bfvpp
;
1836 cowp
->cow_backcount
= backfilecount
;
1837 cowp
->cow_backfile_sz
= max_backfile_size
;
1840 * Initialize task queues for this snapshot. Only a small number
1841 * of threads are required because they will be serialized on the
1842 * backing file's reader/writer lock anyway.
1844 (void) snprintf(taskqname
, sizeof (taskqname
), "%s_taskq_%d", snapname
,
1845 sidp
->sid_snapnumber
);
1846 cowp
->cow_taskq
= taskq_create(taskqname
, fssnap_taskq_nthreads
,
1847 minclsyspri
, 1, fssnap_taskq_maxtasks
, 0);
1849 /* don't allow tasks to start until after everything is ready */
1850 taskq_suspend(cowp
->cow_taskq
);
1852 /* initialize translation table */
1853 cmap
= &cowp
->cow_map
;
1854 rw_init(&cmap
->cmap_rwlock
, NULL
, RW_DEFAULT
, NULL
);
1855 rw_enter(&cmap
->cmap_rwlock
, RW_WRITER
);
1857 sema_init(&cmap
->cmap_throttle_sem
, fssnap_max_mem_chunks
, NULL
,
1858 SEMA_DEFAULT
, NULL
);
1860 cmap
->cmap_chunksz
= chunksz
;
1861 cmap
->cmap_maxsize
= maxsize
;
1862 cmap
->cmap_chunksperbf
= max_backfile_size
/ chunksz
;
1865 * allocate one bit per chunk for the bitmaps, round up
1867 cmap
->cmap_bmsize
= (nchunks
+ (NBBY
- 1)) / NBBY
;
1868 cmap
->cmap_hastrans
= kmem_zalloc(cmap
->cmap_bmsize
, KM_SLEEP
);
1869 cmap
->cmap_candidate
= kmem_zalloc(cmap
->cmap_bmsize
, KM_SLEEP
);
1871 sidp
->sid_cowinfo
= cowp
;
1873 /* initialize kstats for this snapshot */
1874 mountpoint
= vfs_getmntpoint(fsvp
->v_vfsp
);
1875 fssnap_create_kstats(sidp
, sidp
->sid_snapnumber
,
1876 refstr_value(mountpoint
), backpath
);
1877 refstr_rele(mountpoint
);
1879 mutex_exit(&snapshot_mutex
);
1882 * return with snapshot id rwlock held as a writer until
1883 * fssnap_create_done is called
1889 * fssnap_set_candidate_impl() - mark a chunk as a candidate for copy-on-write
1891 * sets a bit in the candidate bitmap that indicates that a chunk is a
1892 * candidate for copy-on-write. Typically, chunks that are allocated on
1893 * the file system at the time the snapshot is taken are candidates,
1894 * while chunks that have no allocated data do not need to be copied.
1895 * Chunks containing metadata must be marked as candidates as well.
1898 fssnap_set_candidate_impl(void *snapshot_id
, chunknumber_t chunknumber
)
1900 struct snapshot_id
*sid
= snapshot_id
;
1901 struct cow_info
*cowp
= sid
->sid_cowinfo
;
1902 struct cow_map
*cmap
= &cowp
->cow_map
;
1904 /* simple bitmap operation for now */
1905 ASSERT(chunknumber
< (cmap
->cmap_bmsize
* NBBY
));
1906 setbit(cmap
->cmap_candidate
, chunknumber
);
1910 * fssnap_is_candidate_impl() - check whether a chunk is a candidate
1912 * returns 0 if the chunk is not a candidate and 1 if the chunk is a
1913 * candidate. This can be used by the file system to change behavior for
1914 * chunks that might induce a copy-on-write. The offset is specified in
1915 * bytes since the chunk size may not be known by the file system.
1918 fssnap_is_candidate_impl(void *snapshot_id
, uoff_t off
)
1920 struct snapshot_id
*sid
= snapshot_id
;
1921 struct cow_info
*cowp
= sid
->sid_cowinfo
;
1922 struct cow_map
*cmap
= &cowp
->cow_map
;
1923 ulong_t chunknumber
= off
/ cmap
->cmap_chunksz
;
1925 /* simple bitmap operation for now */
1926 ASSERT(chunknumber
< (cmap
->cmap_bmsize
* NBBY
));
1927 return (isset(cmap
->cmap_candidate
, chunknumber
));
1931 * fssnap_create_done_impl() - complete the snapshot setup process
1933 * called when the file system is done populating the candidate bitmap
1934 * and it is ready to start using the snapshot. This routine releases
1935 * the snapshot locks, allows taskq tasks to start processing, and
1936 * creates the device minor nodes associated with the snapshot.
1939 fssnap_create_done_impl(void *snapshot_id
)
1941 struct snapshot_id
**sidpp
, *sidp
= snapshot_id
;
1942 struct cow_info
*cowp
;
1943 struct cow_map
*cmap
;
1944 int snapnumber
= -1;
1947 /* sid rwlock and cmap rwlock should be taken from fssnap_create */
1949 ASSERT(RW_WRITE_HELD(&sidp
->sid_rwlock
));
1950 ASSERT(sidp
->sid_cowinfo
);
1952 cowp
= sidp
->sid_cowinfo
;
1953 cmap
= &cowp
->cow_map
;
1955 ASSERT(RW_WRITE_HELD(&cmap
->cmap_rwlock
));
1957 sidp
->sid_flags
&= ~(SID_CREATING
| SID_DISABLED
);
1958 snapnumber
= sidp
->sid_snapnumber
;
1960 /* allocate state structure and find new snapshot id */
1961 if (ddi_soft_state_zalloc(statep
, snapnumber
) != DDI_SUCCESS
) {
1963 "snap_ioctl: create: could not allocate "
1964 "state for snapshot %d.", snapnumber
);
1969 sidpp
= ddi_get_soft_state(statep
, snapnumber
);
1972 /* create minor node based on snapshot number */
1973 ASSERT(fssnap_dip
!= NULL
);
1974 (void) snprintf(name
, sizeof (name
), "%d", snapnumber
);
1975 if (ddi_create_minor_node(fssnap_dip
, name
, S_IFBLK
,
1976 snapnumber
, DDI_PSEUDO
, 0) != DDI_SUCCESS
) {
1977 cmn_err(CE_WARN
, "snap_ioctl: could not create "
1978 "block minor node for snapshot %d.", snapnumber
);
1983 (void) snprintf(name
, sizeof (name
), "%d,raw", snapnumber
);
1984 if (ddi_create_minor_node(fssnap_dip
, name
, S_IFCHR
,
1985 snapnumber
, DDI_PSEUDO
, 0) != DDI_SUCCESS
) {
1986 cmn_err(CE_WARN
, "snap_ioctl: could not create "
1987 "character minor node for snapshot %d.", snapnumber
);
1992 rw_exit(&sidp
->sid_rwlock
);
1993 rw_exit(&cmap
->cmap_rwlock
);
1995 /* let the taskq threads start processing */
1996 taskq_resume(cowp
->cow_taskq
);
1998 return (snapnumber
);
2002 * fssnap_delete_impl() - delete a snapshot
2004 * used when a snapshot is no longer needed. This is called by the file
2005 * system when it receives an ioctl request to delete a snapshot. It is
2006 * also called internally when error conditions such as disk full, errors
2007 * writing to the backing file, or backing file maxsize exceeded occur.
2008 * If the snapshot device is busy when the delete request is received,
2009 * all state will be deleted except for the soft state and device files
2010 * associated with the snapshot; they will be deleted when the snapshot
2013 * NOTE this function takes a POINTER TO A POINTER to the snapshot id,
2014 * and expects to be able to set the handle held by the file system to
2015 * NULL. This depends on the file system checking that variable for NULL
2016 * before calling fssnap_strategy().
2019 fssnap_delete_impl(void *snapshot_id
)
2021 struct snapshot_id
**sidpp
= (struct snapshot_id
**)snapshot_id
;
2022 struct snapshot_id
*sidp
;
2023 struct snapshot_id
**statesidpp
;
2024 struct cow_info
*cowp
;
2025 struct cow_map
*cmap
;
2027 int snapnumber
= -1;
2031 * sidp is guaranteed to be valid if sidpp is valid because
2032 * the snapshot list is append-only.
2034 if (sidpp
== NULL
) {
2039 rw_enter(&sidp
->sid_rwlock
, RW_WRITER
);
2041 ASSERT(RW_WRITE_HELD(&sidp
->sid_rwlock
));
2044 * double check that the snapshot is still valid for THIS file system
2046 if (*sidpp
== NULL
) {
2047 rw_exit(&sidp
->sid_rwlock
);
2052 * Now we know the snapshot is still valid and will not go away
2053 * because we have the write lock. Once the state is transitioned
2054 * to "disabling", the sid_rwlock can be released. Any pending I/O
2055 * waiting for the lock as a reader will check for this state and
2056 * abort without touching data that may be getting freed.
2058 sidp
->sid_flags
|= SID_DISABLING
;
2059 if (sidp
->sid_flags
& SID_DELETE
) {
2060 cmn_err(CE_WARN
, "Snapshot %d automatically deleted.",
2061 sidp
->sid_snapnumber
);
2062 sidp
->sid_flags
&= ~(SID_DELETE
);
2067 * This is pointing into file system specific data! The assumption is
2068 * that fssnap_strategy() gets called from the file system based on
2069 * whether this reference to the snapshot_id is NULL or not. So
2070 * setting this to NULL should disable snapshots for the file system.
2074 /* remove cowinfo */
2075 cowp
= sidp
->sid_cowinfo
;
2077 rw_exit(&sidp
->sid_rwlock
);
2080 rw_exit(&sidp
->sid_rwlock
);
2082 /* destroy task queues first so they don't reference freed data. */
2083 if (cowp
->cow_taskq
) {
2084 taskq_destroy(cowp
->cow_taskq
);
2085 cowp
->cow_taskq
= NULL
;
2088 if (cowp
->cow_backfile_array
!= NULL
) {
2089 for (vpp
= cowp
->cow_backfile_array
; *vpp
; vpp
++)
2091 kmem_free(cowp
->cow_backfile_array
,
2092 (cowp
->cow_backcount
+ 1) * sizeof (vnode_t
*));
2093 cowp
->cow_backfile_array
= NULL
;
2096 sidp
->sid_cowinfo
= NULL
;
2099 cmap
= &cowp
->cow_map
;
2102 if (cmap
->cmap_candidate
)
2103 kmem_free(cmap
->cmap_candidate
, cmap
->cmap_bmsize
);
2105 if (cmap
->cmap_hastrans
)
2106 kmem_free(cmap
->cmap_hastrans
, cmap
->cmap_bmsize
);
2108 if (cmap
->cmap_table
)
2109 transtbl_free(&cowp
->cow_map
);
2111 rw_destroy(&cmap
->cmap_rwlock
);
2113 while (cmap
->cmap_waiters
) {
2114 sema_p(&cmap
->cmap_throttle_sem
);
2115 sema_v(&cmap
->cmap_throttle_sem
);
2117 sema_destroy(&cmap
->cmap_throttle_sem
);
2120 fssnap_delete_kstats(cowp
);
2122 kmem_free(cowp
, sizeof (struct cow_info
));
2124 statesidpp
= ddi_get_soft_state(statep
, sidp
->sid_snapnumber
);
2125 if (statesidpp
== NULL
|| *statesidpp
== NULL
) {
2127 "fssnap_delete_impl: could not find state for snapshot %d.",
2128 sidp
->sid_snapnumber
);
2130 ASSERT(*statesidpp
== sidp
);
2133 * Leave the node in the list marked DISABLED so it can be reused
2134 * and avoid many race conditions. Return the snapshot number
2137 mutex_enter(&snapshot_mutex
);
2138 rw_enter(&sidp
->sid_rwlock
, RW_WRITER
);
2139 sidp
->sid_flags
&= ~(SID_DISABLING
);
2140 sidp
->sid_flags
|= SID_DISABLED
;
2141 VN_RELE(sidp
->sid_fvp
);
2142 sidp
->sid_fvp
= NULL
;
2143 snapnumber
= sidp
->sid_snapnumber
;
2146 * If the snapshot is not busy, free the device info now. Otherwise
2147 * the device nodes are freed in snap_close() when the device is
2148 * closed. The sid will not be reused until the device is not busy.
2150 if (SID_AVAILABLE(sidp
)) {
2151 /* remove the device nodes */
2152 ASSERT(fssnap_dip
!= NULL
);
2153 (void) snprintf(name
, sizeof (name
), "%d",
2154 sidp
->sid_snapnumber
);
2155 ddi_remove_minor_node(fssnap_dip
, name
);
2156 (void) snprintf(name
, sizeof (name
), "%d,raw",
2157 sidp
->sid_snapnumber
);
2158 ddi_remove_minor_node(fssnap_dip
, name
);
2160 /* delete the state structure */
2161 ddi_soft_state_free(statep
, sidp
->sid_snapnumber
);
2165 mutex_exit(&snapshot_mutex
);
2166 rw_exit(&sidp
->sid_rwlock
);
2168 return (snapnumber
);
2172 * fssnap_create_kstats() - allocate and initialize snapshot kstats
2176 fssnap_create_kstats(snapshot_id_t
*sidp
, int snapnum
,
2177 const char *mountpoint
, const char *backfilename
)
2179 kstat_t
*num
, *mntpoint
, *bfname
;
2181 struct cow_info
*cowp
= sidp
->sid_cowinfo
;
2182 struct cow_kstat_num
*stats
;
2184 /* update the high water mark */
2185 if (fssnap_highwater_kstat
== NULL
) {
2186 cmn_err(CE_WARN
, "fssnap_create_kstats: failed to lookup "
2187 "high water mark kstat.");
2191 hw
= (kstat_named_t
*)fssnap_highwater_kstat
->ks_data
;
2192 if (hw
->value
.ui32
< snapnum
)
2193 hw
->value
.ui32
= snapnum
;
2195 /* initialize the mount point kstat */
2196 kstat_delete_byname(snapname
, snapnum
, FSSNAP_KSTAT_MNTPT
);
2198 if (mountpoint
!= NULL
) {
2199 mntpoint
= kstat_create(snapname
, snapnum
, FSSNAP_KSTAT_MNTPT
,
2200 "misc", KSTAT_TYPE_RAW
, strlen(mountpoint
) + 1, 0);
2201 if (mntpoint
== NULL
) {
2202 cowp
->cow_kstat_mntpt
= NULL
;
2203 cmn_err(CE_WARN
, "fssnap_create_kstats: failed to "
2204 "create mount point kstat");
2206 (void) strncpy(mntpoint
->ks_data
, mountpoint
,
2207 strlen(mountpoint
));
2208 cowp
->cow_kstat_mntpt
= mntpoint
;
2209 kstat_install(mntpoint
);
2212 cowp
->cow_kstat_mntpt
= NULL
;
2213 cmn_err(CE_WARN
, "fssnap_create_kstats: mount point not "
2217 /* initialize the backing file kstat */
2218 kstat_delete_byname(snapname
, snapnum
, FSSNAP_KSTAT_BFNAME
);
2220 if (backfilename
== NULL
) {
2221 cowp
->cow_kstat_bfname
= NULL
;
2223 bfname
= kstat_create(snapname
, snapnum
, FSSNAP_KSTAT_BFNAME
,
2224 "misc", KSTAT_TYPE_RAW
, strlen(backfilename
) + 1, 0);
2225 if (bfname
!= NULL
) {
2226 (void) strncpy(bfname
->ks_data
, backfilename
,
2227 strlen(backfilename
));
2228 cowp
->cow_kstat_bfname
= bfname
;
2229 kstat_install(bfname
);
2231 cowp
->cow_kstat_bfname
= NULL
;
2232 cmn_err(CE_WARN
, "fssnap_create_kstats: failed to "
2233 "create backing file name kstat");
2237 /* initialize numeric kstats */
2238 kstat_delete_byname(snapname
, snapnum
, FSSNAP_KSTAT_NUM
);
2240 num
= kstat_create(snapname
, snapnum
, FSSNAP_KSTAT_NUM
,
2241 "misc", KSTAT_TYPE_NAMED
,
2242 sizeof (struct cow_kstat_num
) / sizeof (kstat_named_t
),
2245 cmn_err(CE_WARN
, "fssnap_create_kstats: failed to create "
2247 cowp
->cow_kstat_num
= NULL
;
2251 cowp
->cow_kstat_num
= num
;
2252 stats
= num
->ks_data
;
2253 num
->ks_update
= fssnap_update_kstat_num
;
2254 num
->ks_private
= sidp
;
2256 kstat_named_init(&stats
->ckn_state
, FSSNAP_KSTAT_NUM_STATE
,
2258 kstat_named_init(&stats
->ckn_bfsize
, FSSNAP_KSTAT_NUM_BFSIZE
,
2260 kstat_named_init(&stats
->ckn_maxsize
, FSSNAP_KSTAT_NUM_MAXSIZE
,
2262 kstat_named_init(&stats
->ckn_createtime
, FSSNAP_KSTAT_NUM_CREATETIME
,
2264 kstat_named_init(&stats
->ckn_chunksize
, FSSNAP_KSTAT_NUM_CHUNKSIZE
,
2267 /* initialize the static kstats */
2268 stats
->ckn_chunksize
.value
.ui32
= cowp
->cow_map
.cmap_chunksz
;
2269 stats
->ckn_maxsize
.value
.ui64
= cowp
->cow_map
.cmap_maxsize
;
2270 stats
->ckn_createtime
.value
.l
= gethrestime_sec();
2276 * fssnap_update_kstat_num() - update a numerical snapshot kstat value
2280 fssnap_update_kstat_num(kstat_t
*ksp
, int rw
)
2282 snapshot_id_t
*sidp
= (snapshot_id_t
*)ksp
->ks_private
;
2283 struct cow_info
*cowp
= sidp
->sid_cowinfo
;
2284 struct cow_kstat_num
*stats
= ksp
->ks_data
;
2286 if (rw
== KSTAT_WRITE
)
2290 if (sidp
->sid_flags
& SID_CREATING
)
2291 stats
->ckn_state
.value
.i32
= COWSTATE_CREATING
;
2292 else if (SID_INACTIVE(sidp
))
2293 stats
->ckn_state
.value
.i32
= COWSTATE_DISABLED
;
2294 else if (SID_BUSY(sidp
))
2295 stats
->ckn_state
.value
.i32
= COWSTATE_ACTIVE
;
2297 stats
->ckn_state
.value
.i32
= COWSTATE_IDLE
;
2300 stats
->ckn_bfsize
.value
.ui64
= cowp
->cow_map
.cmap_nchunks
*
2301 cowp
->cow_map
.cmap_chunksz
;
2307 * fssnap_delete_kstats() - deallocate snapshot kstats
2311 fssnap_delete_kstats(struct cow_info
*cowp
)
2313 if (cowp
->cow_kstat_num
!= NULL
) {
2314 kstat_delete(cowp
->cow_kstat_num
);
2315 cowp
->cow_kstat_num
= NULL
;
2317 if (cowp
->cow_kstat_mntpt
!= NULL
) {
2318 kstat_delete(cowp
->cow_kstat_mntpt
);
2319 cowp
->cow_kstat_mntpt
= NULL
;
2321 if (cowp
->cow_kstat_bfname
!= NULL
) {
2322 kstat_delete(cowp
->cow_kstat_bfname
);
2323 cowp
->cow_kstat_bfname
= NULL
;