4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * Iterate over all children of the current object. This includes the normal
30 * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
31 * walk all datasets in the pool, and construct a directed graph of the form:
41 * @yesterday ----> foo
43 * In order to construct this graph, we have to walk every dataset in the pool,
44 * because the clone parent is stored as a property of the child, not the
45 * parent. The parent only keeps track of the number of clones.
47 * In the normal case (without clones) this would be rather expensive. To avoid
48 * unnecessary computation, we first try a walk of the subtree hierarchy
49 * starting from the initial node. At each dataset, we construct a node in the
50 * graph and an edge leading from its parent. If we don't see any snapshots
51 * with a non-zero clone count, then we are finished.
53 * If we do find a cloned snapshot, then we finish the walk of the current
54 * subtree, but indicate that we need to do a complete walk. We then perform a
55 * global walk of all datasets, avoiding the subtree we already processed.
57 * At the end of this, we'll end up with a directed graph of all relevant (and
58 * possible some irrelevant) datasets in the system. We need to both find our
59 * limiting subgraph and determine a safe ordering in which to destroy the
60 * datasets. We do a topological ordering of our graph starting at our target
61 * dataset, and then walk the results in reverse.
63 * It's possible for the graph to have cycles if, for example, the user renames
64 * a clone to be the parent of its origin snapshot. The user can request to
65 * generate an error in this case, or ignore the cycle and continue.
67 * When removing datasets, we want to destroy the snapshots in chronological
68 * order (because this is the most efficient method). In order to accomplish
69 * this, we store the creation transaction group with each vertex and keep each
70 * vertex's edges sorted according to this value. The topological sort will
71 * automatically walk the snapshots in the correct order.
84 #include "libzfs_impl.h"
85 #include "zfs_namecheck.h"
87 #define MIN_EDGECOUNT 4
90 * Vertex structure. Indexed by dataset name, this structure maintains a list
91 * of edges to other vertices.
94 typedef struct zfs_vertex
{
95 char zv_dataset
[ZFS_MAXNAMELEN
];
96 struct zfs_vertex
*zv_next
;
99 struct zfs_edge
**zv_edges
;
111 * Edge structure. Simply maintains a pointer to the destination vertex. There
112 * is no need to store the source vertex, since we only use edges in the context
113 * of the source vertex.
115 typedef struct zfs_edge
{
116 zfs_vertex_t
*ze_dest
;
117 struct zfs_edge
*ze_next
;
120 #define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */
123 * Graph structure. Vertices are maintained in a hash indexed by dataset name.
125 typedef struct zfs_graph
{
126 zfs_vertex_t
**zg_hash
;
134 * Allocate a new edge pointing to the target vertex.
137 zfs_edge_create(libzfs_handle_t
*hdl
, zfs_vertex_t
*dest
)
139 zfs_edge_t
*zep
= zfs_alloc(hdl
, sizeof (zfs_edge_t
));
153 zfs_edge_destroy(zfs_edge_t
*zep
)
159 * Allocate a new vertex with the given name.
161 static zfs_vertex_t
*
162 zfs_vertex_create(libzfs_handle_t
*hdl
, const char *dataset
)
164 zfs_vertex_t
*zvp
= zfs_alloc(hdl
, sizeof (zfs_vertex_t
));
169 assert(strlen(dataset
) < ZFS_MAXNAMELEN
);
171 (void) strlcpy(zvp
->zv_dataset
, dataset
, sizeof (zvp
->zv_dataset
));
173 if ((zvp
->zv_edges
= zfs_alloc(hdl
,
174 MIN_EDGECOUNT
* sizeof (void *))) == NULL
) {
179 zvp
->zv_edgealloc
= MIN_EDGECOUNT
;
185 * Destroy a vertex. Frees up any associated edges.
188 zfs_vertex_destroy(zfs_vertex_t
*zvp
)
192 for (i
= 0; i
< zvp
->zv_edgecount
; i
++)
193 zfs_edge_destroy(zvp
->zv_edges
[i
]);
200 * Given a vertex, add an edge to the destination vertex.
203 zfs_vertex_add_edge(libzfs_handle_t
*hdl
, zfs_vertex_t
*zvp
,
206 zfs_edge_t
*zep
= zfs_edge_create(hdl
, dest
);
211 if (zvp
->zv_edgecount
== zvp
->zv_edgealloc
) {
214 if ((ptr
= zfs_realloc(hdl
, zvp
->zv_edges
,
215 zvp
->zv_edgealloc
* sizeof (void *),
216 zvp
->zv_edgealloc
* 2 * sizeof (void *))) == NULL
)
220 zvp
->zv_edgealloc
*= 2;
223 zvp
->zv_edges
[zvp
->zv_edgecount
++] = zep
;
229 zfs_edge_compare(const void *a
, const void *b
)
231 const zfs_edge_t
*ea
= *((zfs_edge_t
**)a
);
232 const zfs_edge_t
*eb
= *((zfs_edge_t
**)b
);
234 if (ea
->ze_dest
->zv_txg
< eb
->ze_dest
->zv_txg
)
236 if (ea
->ze_dest
->zv_txg
> eb
->ze_dest
->zv_txg
)
242 * Sort the given vertex edges according to the creation txg of each vertex.
245 zfs_vertex_sort_edges(zfs_vertex_t
*zvp
)
247 if (zvp
->zv_edgecount
== 0)
250 qsort(zvp
->zv_edges
, zvp
->zv_edgecount
, sizeof (void *),
255 * Construct a new graph object. We allow the size to be specified as a
256 * parameter so in the future we can size the hash according to the number of
257 * datasets in the pool.
260 zfs_graph_create(libzfs_handle_t
*hdl
, const char *dataset
, size_t size
)
262 zfs_graph_t
*zgp
= zfs_alloc(hdl
, sizeof (zfs_graph_t
));
268 if ((zgp
->zg_hash
= zfs_alloc(hdl
,
269 size
* sizeof (zfs_vertex_t
*))) == NULL
) {
274 zgp
->zg_root
= dataset
;
275 zgp
->zg_clone_count
= 0;
281 * Destroy a graph object. We have to iterate over all the hash chains,
282 * destroying each vertex in the process.
285 zfs_graph_destroy(zfs_graph_t
*zgp
)
288 zfs_vertex_t
*current
, *next
;
290 for (i
= 0; i
< zgp
->zg_size
; i
++) {
291 current
= zgp
->zg_hash
[i
];
292 while (current
!= NULL
) {
293 next
= current
->zv_next
;
294 zfs_vertex_destroy(current
);
304 * Graph hash function. Classic bernstein k=33 hash function, taken from
305 * usr/src/cmd/sgs/tools/common/strhash.c
308 zfs_graph_hash(zfs_graph_t
*zgp
, const char *str
)
313 while ((c
= *str
++) != 0)
314 hash
= ((hash
<< 5) + hash
) + c
; /* hash * 33 + c */
316 return (hash
% zgp
->zg_size
);
320 * Given a dataset name, finds the associated vertex, creating it if necessary.
322 static zfs_vertex_t
*
323 zfs_graph_lookup(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *dataset
,
326 size_t idx
= zfs_graph_hash(zgp
, dataset
);
329 for (zvp
= zgp
->zg_hash
[idx
]; zvp
!= NULL
; zvp
= zvp
->zv_next
) {
330 if (strcmp(zvp
->zv_dataset
, dataset
) == 0) {
331 if (zvp
->zv_txg
== 0)
337 if ((zvp
= zfs_vertex_create(hdl
, dataset
)) == NULL
)
340 zvp
->zv_next
= zgp
->zg_hash
[idx
];
342 zgp
->zg_hash
[idx
] = zvp
;
349 * Given two dataset names, create an edge between them. For the source vertex,
350 * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
351 * created it as a destination of another edge. If 'dest' is NULL, then this
352 * is an individual vertex (i.e. the starting vertex), so don't add an edge.
355 zfs_graph_add(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *source
,
356 const char *dest
, uint64_t txg
)
358 zfs_vertex_t
*svp
, *dvp
;
360 if ((svp
= zfs_graph_lookup(hdl
, zgp
, source
, 0)) == NULL
)
362 svp
->zv_visited
= VISIT_SEEN
;
364 dvp
= zfs_graph_lookup(hdl
, zgp
, dest
, txg
);
367 if (zfs_vertex_add_edge(hdl
, svp
, dvp
) != 0)
375 * Iterate over all children of the given dataset, adding any vertices
376 * as necessary. Returns -1 if there was an error, or 0 otherwise.
377 * This is a simple recursive algorithm - the ZFS namespace typically
378 * is very flat. We manually invoke the necessary ioctl() calls to
379 * avoid the overhead and additional semantics of zfs_open().
382 iterate_children(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *dataset
)
384 zfs_cmd_t zc
= { 0 };
388 * Look up the source vertex, and avoid it if we've seen it before.
390 zvp
= zfs_graph_lookup(hdl
, zgp
, dataset
, 0);
393 if (zvp
->zv_visited
== VISIT_SEEN
)
397 * Iterate over all children
399 for ((void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
400 ioctl(hdl
->libzfs_fd
, ZFS_IOC_DATASET_LIST_NEXT
, &zc
) == 0;
401 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
))) {
404 * Ignore private dataset names.
406 if (dataset_name_hidden(zc
.zc_name
))
410 * Get statistics for this dataset, to determine the type of the
411 * dataset and clone statistics. If this fails, the dataset has
412 * since been removed, and we're pretty much screwed anyway.
414 zc
.zc_objset_stats
.dds_origin
[0] = '\0';
415 if (ioctl(hdl
->libzfs_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) != 0)
418 if (zc
.zc_objset_stats
.dds_origin
[0] != '\0') {
419 if (zfs_graph_add(hdl
, zgp
,
420 zc
.zc_objset_stats
.dds_origin
, zc
.zc_name
,
421 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
424 * Count origins only if they are contained in the graph
426 if (isa_child_of(zc
.zc_objset_stats
.dds_origin
,
428 zgp
->zg_clone_count
--;
432 * Add an edge between the parent and the child.
434 if (zfs_graph_add(hdl
, zgp
, dataset
, zc
.zc_name
,
435 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
439 * Recursively visit child
441 if (iterate_children(hdl
, zgp
, zc
.zc_name
))
446 * Now iterate over all snapshots.
448 bzero(&zc
, sizeof (zc
));
450 for ((void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
451 ioctl(hdl
->libzfs_fd
, ZFS_IOC_SNAPSHOT_LIST_NEXT
, &zc
) == 0;
452 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
))) {
455 * Get statistics for this dataset, to determine the type of the
456 * dataset and clone statistics. If this fails, the dataset has
457 * since been removed, and we're pretty much screwed anyway.
459 if (ioctl(hdl
->libzfs_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) != 0)
463 * Add an edge between the parent and the child.
465 if (zfs_graph_add(hdl
, zgp
, dataset
, zc
.zc_name
,
466 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
469 zgp
->zg_clone_count
+= zc
.zc_objset_stats
.dds_num_clones
;
472 zvp
->zv_visited
= VISIT_SEEN
;
478 * Returns false if there are no snapshots with dependent clones in this
479 * subtree or if all of those clones are also in this subtree. Returns
480 * true if there is an error or there are external dependents.
483 external_dependents(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *dataset
)
485 zfs_cmd_t zc
= { 0 };
488 * Check whether this dataset is a clone or has clones since
489 * iterate_children() only checks the children.
491 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
492 if (ioctl(hdl
->libzfs_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) != 0)
495 if (zc
.zc_objset_stats
.dds_origin
[0] != '\0') {
496 if (zfs_graph_add(hdl
, zgp
,
497 zc
.zc_objset_stats
.dds_origin
, zc
.zc_name
,
498 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
500 if (isa_child_of(zc
.zc_objset_stats
.dds_origin
, dataset
))
501 zgp
->zg_clone_count
--;
504 if ((zc
.zc_objset_stats
.dds_num_clones
) ||
505 iterate_children(hdl
, zgp
, dataset
))
508 return (zgp
->zg_clone_count
!= 0);
512 * Construct a complete graph of all necessary vertices. First, iterate over
513 * only our object's children. If no cloned snapshots are found, or all of
514 * the cloned snapshots are in this subtree then return a graph of the subtree.
515 * Otherwise, start at the root of the pool and iterate over all datasets.
518 construct_graph(libzfs_handle_t
*hdl
, const char *dataset
)
520 zfs_graph_t
*zgp
= zfs_graph_create(hdl
, dataset
, ZFS_GRAPH_SIZE
);
526 if ((strchr(dataset
, '/') == NULL
) ||
527 (external_dependents(hdl
, zgp
, dataset
))) {
529 * Determine pool name and try again.
531 int len
= strcspn(dataset
, "/@") + 1;
532 char *pool
= zfs_alloc(hdl
, len
);
535 zfs_graph_destroy(zgp
);
538 (void) strlcpy(pool
, dataset
, len
);
540 if (iterate_children(hdl
, zgp
, pool
) == -1 ||
541 zfs_graph_add(hdl
, zgp
, pool
, NULL
, 0) != 0) {
543 zfs_graph_destroy(zgp
);
549 if (ret
== -1 || zfs_graph_add(hdl
, zgp
, dataset
, NULL
, 0) != 0) {
550 zfs_graph_destroy(zgp
);
558 * Given a graph, do a recursive topological sort into the given array. This is
559 * really just a depth first search, so that the deepest nodes appear first.
560 * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
563 topo_sort(libzfs_handle_t
*hdl
, boolean_t allowrecursion
, char **result
,
564 size_t *idx
, zfs_vertex_t
*zgv
)
568 if (zgv
->zv_visited
== VISIT_SORT_PRE
&& !allowrecursion
) {
570 * If we've already seen this vertex as part of our depth-first
571 * search, then we have a cyclic dependency, and we must return
574 zfs_error_aux(hdl
, dgettext(TEXT_DOMAIN
,
575 "recursive dependency at '%s'"),
577 return (zfs_error(hdl
, EZFS_RECURSIVE
,
578 dgettext(TEXT_DOMAIN
,
579 "cannot determine dependent datasets")));
580 } else if (zgv
->zv_visited
>= VISIT_SORT_PRE
) {
582 * If we've already processed this as part of the topological
583 * sort, then don't bother doing so again.
588 zgv
->zv_visited
= VISIT_SORT_PRE
;
590 /* avoid doing a search if we don't have to */
591 zfs_vertex_sort_edges(zgv
);
592 for (i
= 0; i
< zgv
->zv_edgecount
; i
++) {
593 if (topo_sort(hdl
, allowrecursion
, result
, idx
,
594 zgv
->zv_edges
[i
]->ze_dest
) != 0)
598 /* we may have visited this in the course of the above */
599 if (zgv
->zv_visited
== VISIT_SORT_POST
)
602 if ((result
[*idx
] = zfs_alloc(hdl
,
603 strlen(zgv
->zv_dataset
) + 1)) == NULL
)
606 (void) strcpy(result
[*idx
], zgv
->zv_dataset
);
608 zgv
->zv_visited
= VISIT_SORT_POST
;
613 * The only public interface for this file. Do the dirty work of constructing a
614 * child list for the given object. Construct the graph, do the toplogical
615 * sort, and then return the array of strings to the caller.
617 * The 'allowrecursion' parameter controls behavior when cycles are found. If
618 * it is set, the the cycle is ignored and the results returned as if the cycle
619 * did not exist. If it is not set, then the routine will generate an error if
623 get_dependents(libzfs_handle_t
*hdl
, boolean_t allowrecursion
,
624 const char *dataset
, char ***result
, size_t *count
)
629 if ((zgp
= construct_graph(hdl
, dataset
)) == NULL
)
632 if ((*result
= zfs_alloc(hdl
,
633 zgp
->zg_nvertex
* sizeof (char *))) == NULL
) {
634 zfs_graph_destroy(zgp
);
638 if ((zvp
= zfs_graph_lookup(hdl
, zgp
, dataset
, 0)) == NULL
) {
640 zfs_graph_destroy(zgp
);
645 if (topo_sort(hdl
, allowrecursion
, *result
, count
, zvp
) != 0) {
647 zfs_graph_destroy(zgp
);
652 * Get rid of the last entry, which is our starting vertex and not
653 * strictly a dependent.
656 free((*result
)[*count
- 1]);
659 zfs_graph_destroy(zgp
);