usr/src/cmd/zoneadm/zfs.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26  * Copyright (c) 2016 Martin Matuska. All rights reserved.
  27  */
  28
  29 /*
  30  * This file contains the functions used to support the ZFS integration
  31  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
  32  * file system creation and destruction.
  33  */
  34
  35 #include <stdio.h>
  36 #include <errno.h>
  37 #include <unistd.h>
  38 #include <string.h>
  39 #include <locale.h>
  40 #include <libintl.h>
  41 #include <sys/stat.h>
  42 #include <sys/statvfs.h>
  43 #include <libgen.h>
  44 #include <libzonecfg.h>
  45 #include <sys/mnttab.h>
  46 #include <libzfs.h>
  47 #include <sys/mntent.h>
  48 #include <values.h>
  49 #include <strings.h>
  50 #include <assert.h>
  51
  52 #include "zoneadm.h"
  53
  54 libzfs_handle_t *g_zfs;
  55
  56 typedef struct zfs_mount_data {
  57         char            *match_name;
  58         zfs_handle_t    *match_handle;
  59 } zfs_mount_data_t;
  60
  61 typedef struct zfs_snapshot_data {
  62         char    *match_name;    /* zonename@SUNWzone */
  63         int     len;            /* strlen of match_name */
  64         int     max;            /* highest digit appended to snap name */
  65         int     num;            /* number of snapshots to rename */
  66         int     cntr;           /* counter for renaming snapshots */
  67 } zfs_snapshot_data_t;
  68
  69 typedef struct clone_data {
  70         zfs_handle_t    *clone_zhp;     /* clone dataset to promote */
  71         time_t          origin_creation; /* snapshot creation time of clone */
  72         const char      *snapshot;      /* snapshot of dataset being demoted */
  73 } clone_data_t;
  74
  75 /*
  76  * A ZFS file system iterator call-back function which returns the
  77  * zfs_handle_t for a ZFS file system on the specified mount point.
  78  */
  79 static int
  80 match_mountpoint(zfs_handle_t *zhp, void *data)
  81 {
  82         int                     res;
  83         zfs_mount_data_t        *cbp;
  84         char                    mp[ZFS_MAXPROPLEN];
  85
  86         if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
  87                 zfs_close(zhp);
  88                 return (0);
  89         }
  90
  91         /* First check if the dataset is mounted. */
  92         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
  93             0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
  94                 zfs_close(zhp);
  95                 return (0);
  96         }
  97
  98         /* Now check mount point. */
  99         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
 100             0, B_FALSE) != 0) {
 101                 zfs_close(zhp);
 102                 return (0);
 103         }
 104
 105         cbp = (zfs_mount_data_t *)data;
 106
 107         if (strcmp(mp, "legacy") == 0) {
 108                 /* If legacy, must look in mnttab for mountpoint. */
 109                 FILE            *fp;
 110                 struct mnttab   entry;
 111                 const char      *nm;
 112
 113                 nm = zfs_get_name(zhp);
 114                 if ((fp = fopen(MNTTAB, "r")) == NULL) {
 115                         zfs_close(zhp);
 116                         return (0);
 117                 }
 118
 119                 while (getmntent(fp, &entry) == 0) {
 120                         if (strcmp(nm, entry.mnt_special) == 0) {
 121                                 if (strcmp(entry.mnt_mountp, cbp->match_name)
 122                                     == 0) {
 123                                         (void) fclose(fp);
 124                                         cbp->match_handle = zhp;
 125                                         return (1);
 126                                 }
 127                                 break;
 128                         }
 129                 }
 130                 (void) fclose(fp);
 131
 132         } else if (strcmp(mp, cbp->match_name) == 0) {
 133                 cbp->match_handle = zhp;
 134                 return (1);
 135         }
 136
 137         /* Iterate over any nested datasets. */
 138         res = zfs_iter_filesystems(zhp, match_mountpoint, data);
 139         zfs_close(zhp);
 140         return (res);
 141 }
 142
 143 /*
 144  * Get ZFS handle for the specified mount point.
 145  */
 146 static zfs_handle_t *
 147 mount2zhandle(char *mountpoint)
 148 {
 149         zfs_mount_data_t        cb;
 150
 151         cb.match_name = mountpoint;
 152         cb.match_handle = NULL;
 153         (void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
 154         return (cb.match_handle);
 155 }
 156
 157 /*
 158  * Check if there is already a file system (zfs or any other type) mounted on
 159  * path.
 160  */
 161 static boolean_t
 162 is_mountpnt(char *path)
 163 {
 164         FILE            *fp;
 165         struct mnttab   entry;
 166
 167         if ((fp = fopen(MNTTAB, "r")) == NULL)
 168                 return (B_FALSE);
 169
 170         while (getmntent(fp, &entry) == 0) {
 171                 if (strcmp(path, entry.mnt_mountp) == 0) {
 172                         (void) fclose(fp);
 173                         return (B_TRUE);
 174                 }
 175         }
 176
 177         (void) fclose(fp);
 178         return (B_FALSE);
 179 }
 180
 181 /*
 182  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
 183  */
 184 static int
 185 pre_snapshot(char *presnapbuf)
 186 {
 187         int status;
 188
 189         /* No brand-specific handler */
 190         if (presnapbuf[0] == '\0')
 191                 return (Z_OK);
 192
 193         /* Run the hook */
 194         status = do_subproc(presnapbuf);
 195         if ((status = subproc_status(gettext("brand-specific presnapshot"),
 196             status, B_FALSE)) != ZONE_SUBPROC_OK)
 197                 return (Z_ERR);
 198
 199         return (Z_OK);
 200 }
 201
 202 /*
 203  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
 204  */
 205 static int
 206 post_snapshot(char *postsnapbuf)
 207 {
 208         int status;
 209
 210         /* No brand-specific handler */
 211         if (postsnapbuf[0] == '\0')
 212                 return (Z_OK);
 213
 214         /* Run the hook */
 215         status = do_subproc(postsnapbuf);
 216         if ((status = subproc_status(gettext("brand-specific postsnapshot"),
 217             status, B_FALSE)) != ZONE_SUBPROC_OK)
 218                 return (Z_ERR);
 219
 220         return (Z_OK);
 221 }
 222
 223 /*
 224  * This is a ZFS snapshot iterator call-back function which returns the
 225  * highest number of SUNWzone snapshots that have been taken.
 226  */
 227 static int
 228 get_snap_max(zfs_handle_t *zhp, void *data)
 229 {
 230         int                     res;
 231         zfs_snapshot_data_t     *cbp;
 232
 233         if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
 234                 zfs_close(zhp);
 235                 return (0);
 236         }
 237
 238         cbp = (zfs_snapshot_data_t *)data;
 239
 240         if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
 241                 char    *nump;
 242                 int     num;
 243
 244                 cbp->num++;
 245                 nump = (char *)(zfs_get_name(zhp) + cbp->len);
 246                 num = atoi(nump);
 247                 if (num > cbp->max)
 248                         cbp->max = num;
 249         }
 250
 251         res = zfs_iter_snapshots(zhp, B_FALSE, get_snap_max, data);
 252         zfs_close(zhp);
 253         return (res);
 254 }
 255
 256 /*
 257  * Take a ZFS snapshot to be used for cloning the zone.
 258  */
 259 static int
 260 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
 261     char *presnapbuf, char *postsnapbuf)
 262 {
 263         int                     res;
 264         char                    template[ZFS_MAX_DATASET_NAME_LEN];
 265         zfs_snapshot_data_t     cb;
 266
 267         /*
 268          * First we need to figure out the next available name for the
 269          * zone snapshot.  Look through the list of zones snapshots for
 270          * this file system to determine the maximum snapshot name.
 271          */
 272         if (snprintf(template, sizeof (template), "%s@SUNWzone",
 273             zfs_get_name(zhp)) >=  sizeof (template))
 274                 return (Z_ERR);
 275
 276         cb.match_name = template;
 277         cb.len = strlen(template);
 278         cb.max = 0;
 279
 280         if (zfs_iter_snapshots(zhp, B_FALSE, get_snap_max, &cb) != 0)
 281                 return (Z_ERR);
 282
 283         cb.max++;
 284
 285         if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
 286             zfs_get_name(zhp), cb.max) >= snap_size)
 287                 return (Z_ERR);
 288
 289         if (pre_snapshot(presnapbuf) != Z_OK)
 290                 return (Z_ERR);
 291         res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
 292         if (post_snapshot(postsnapbuf) != Z_OK)
 293                 return (Z_ERR);
 294
 295         if (res != 0)
 296                 return (Z_ERR);
 297         return (Z_OK);
 298 }
 299
 300 /*
 301  * We are using an explicit snapshot from some earlier point in time so
 302  * we need to validate it.  Run the brand specific hook.
 303  */
 304 static int
 305 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
 306 {
 307         int status;
 308         char cmdbuf[MAXPATHLEN];
 309
 310         /* No brand-specific handler */
 311         if (validsnapbuf[0] == '\0')
 312                 return (Z_OK);
 313
 314         /* pass args - snapshot_name & snap_path */
 315         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
 316             snapshot_name, snap_path) >= sizeof (cmdbuf)) {
 317                 zerror("Command line too long");
 318                 return (Z_ERR);
 319         }
 320
 321         /* Run the hook */
 322         status = do_subproc(cmdbuf);
 323         if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
 324             status, B_FALSE)) != ZONE_SUBPROC_OK)
 325                 return (Z_ERR);
 326
 327         return (Z_OK);
 328 }
 329
 330 /*
 331  * Remove the sw inventory file from inside this zonepath that we picked up out
 332  * of the snapshot.
 333  */
 334 static int
 335 clean_out_clone()
 336 {
 337         int err;
 338         zone_dochandle_t handle;
 339
 340         if ((handle = zonecfg_init_handle()) == NULL) {
 341                 zperror(cmd_to_str(CMD_CLONE), B_TRUE);
 342                 return (Z_ERR);
 343         }
 344
 345         if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
 346                 errno = err;
 347                 zperror(cmd_to_str(CMD_CLONE), B_TRUE);
 348                 zonecfg_fini_handle(handle);
 349                 return (Z_ERR);
 350         }
 351
 352         zonecfg_rm_detached(handle, B_FALSE);
 353         zonecfg_fini_handle(handle);
 354
 355         return (Z_OK);
 356 }
 357
 358 /*
 359  * Make a ZFS clone on zonepath from snapshot_name.
 360  */
 361 static int
 362 clone_snap(char *snapshot_name, char *zonepath)
 363 {
 364         int             res = Z_OK;
 365         int             err;
 366         zfs_handle_t    *zhp;
 367         zfs_handle_t    *clone;
 368         nvlist_t        *props = NULL;
 369
 370         if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
 371                 return (Z_NO_ENTRY);
 372
 373         (void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
 374
 375         /*
 376          * We turn off zfs SHARENFS and SHARESMB properties on the
 377          * zoneroot dataset in order to prevent the GZ from sharing
 378          * NGZ data by accident.
 379          */
 380         if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
 381             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
 382             "off") != 0) ||
 383             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
 384             "off") != 0)) {
 385                 nvlist_free(props);
 386                 (void) fprintf(stderr, gettext("could not create ZFS clone "
 387                     "%s: out of memory\n"), zonepath);
 388                 return (Z_ERR);
 389         }
 390
 391         err = zfs_clone(zhp, zonepath, props);
 392         zfs_close(zhp);
 393
 394         nvlist_free(props);
 395
 396         if (err != 0)
 397                 return (Z_ERR);
 398
 399         /* create the mountpoint if necessary */
 400         if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
 401                 return (Z_ERR);
 402
 403         /*
 404          * The clone has been created so we need to print a diagnostic
 405          * message if one of the following steps fails for some reason.
 406          */
 407         if (zfs_mount(clone, NULL, 0) != 0) {
 408                 (void) fprintf(stderr, gettext("could not mount ZFS clone "
 409                     "%s\n"), zfs_get_name(clone));
 410                 res = Z_ERR;
 411
 412         } else if (clean_out_clone() != Z_OK) {
 413                 (void) fprintf(stderr, gettext("could not remove the "
 414                     "software inventory from ZFS clone %s\n"),
 415                     zfs_get_name(clone));
 416                 res = Z_ERR;
 417         }
 418
 419         zfs_close(clone);
 420         return (res);
 421 }
 422
 423 /*
 424  * This function takes a zonepath and attempts to determine what the ZFS
 425  * file system name (not mountpoint) should be for that path.  We do not
 426  * assume that zonepath is an existing directory or ZFS fs since we use
 427  * this function as part of the process of creating a new ZFS fs or clone.
 428  *
 429  * The way this works is that we look at the parent directory of the zonepath
 430  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
 431  * append the last component of the zonepath to generate the ZFS name for the
 432  * zonepath.  This matches the algorithm that ZFS uses for automatically
 433  * mounting a new fs after it is created.
 434  *
 435  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
 436  * all of the complexity that a user could possibly configure with arbitrary
 437  * mounts since there is no way to generate a ZFS name from a random path in
 438  * the file system.  We only try to handle the automatic mounts that ZFS does
 439  * for each file system.  ZFS restricts this so that a new fs must be created
 440  * in an existing parent ZFS fs.  It then automatically mounts the new fs
 441  * directly under the mountpoint for the parent fs using the last component
 442  * of the name as the mountpoint directory.
 443  *
 444  * For example:
 445  *    Name                      Mountpoint
 446  *    space/eng/dev/test/zone1  /project1/eng/dev/test/zone1
 447  *
 448  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
 449  * Z_ERR.
 450  */
 451 static int
 452 path2name(char *zonepath, char *zfs_name, int len)
 453 {
 454         int             res;
 455         char            *bnm, *dnm, *dname, *bname;
 456         zfs_handle_t    *zhp;
 457         struct stat     stbuf;
 458
 459         /*
 460          * We need two tmp strings to handle paths directly in / (e.g. /foo)
 461          * since dirname will overwrite the first char after "/" in this case.
 462          */
 463         if ((bnm = strdup(zonepath)) == NULL)
 464                 return (Z_ERR);
 465
 466         if ((dnm = strdup(zonepath)) == NULL) {
 467                 free(bnm);
 468                 return (Z_ERR);
 469         }
 470
 471         bname = basename(bnm);
 472         dname = dirname(dnm);
 473
 474         /*
 475          * This is a quick test to save iterating over all of the zfs datasets
 476          * on the system (which can be a lot).  If the parent dir is not in a
 477          * ZFS fs, then we're done.
 478          */
 479         if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
 480             strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
 481                 free(bnm);
 482                 free(dnm);
 483                 return (Z_ERR);
 484         }
 485
 486         /* See if the parent directory is its own ZFS dataset. */
 487         if ((zhp = mount2zhandle(dname)) == NULL) {
 488                 /*
 489                  * The parent is not a ZFS dataset so we can't automatically
 490                  * create a dataset on the given path.
 491                  */
 492                 free(bnm);
 493                 free(dnm);
 494                 return (Z_ERR);
 495         }
 496
 497         res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
 498
 499         free(bnm);
 500         free(dnm);
 501         zfs_close(zhp);
 502         if (res >= len)
 503                 return (Z_ERR);
 504
 505         return (Z_OK);
 506 }
 507
 508 /*
 509  * A ZFS file system iterator call-back function used to determine if the
 510  * file system has dependents (snapshots & clones).
 511  */
 512 /* ARGSUSED */
 513 static int
 514 has_dependent(zfs_handle_t *zhp, void *data)
 515 {
 516         zfs_close(zhp);
 517         return (1);
 518 }
 519
 520 /*
 521  * Given a snapshot name, get the file system path where the snapshot lives.
 522  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
 523  * pl/zones/z1@SUNWzone1 would have a path of
 524  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
 525  */
 526 static int
 527 snap2path(char *snap_name, char *path, int len)
 528 {
 529         char            *p;
 530         zfs_handle_t    *zhp;
 531         char            mp[ZFS_MAXPROPLEN];
 532
 533         if ((p = strrchr(snap_name, '@')) == NULL)
 534                 return (Z_ERR);
 535
 536         /* Get the file system name from the snap_name. */
 537         *p = '\0';
 538         zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
 539         *p = '@';
 540         if (zhp == NULL)
 541                 return (Z_ERR);
 542
 543         /* Get the file system mount point. */
 544         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
 545             0, B_FALSE) != 0) {
 546                 zfs_close(zhp);
 547                 return (Z_ERR);
 548         }
 549         zfs_close(zhp);
 550
 551         p++;
 552         if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
 553                 return (Z_ERR);
 554
 555         return (Z_OK);
 556 }
 557
 558 /*
 559  * This callback function is used to iterate through a snapshot's dependencies
 560  * to find a filesystem that is a direct clone of the snapshot being iterated.
 561  */
 562 static int
 563 get_direct_clone(zfs_handle_t *zhp, void *data)
 564 {
 565         clone_data_t    *cd = data;
 566         char            origin[ZFS_MAX_DATASET_NAME_LEN];
 567         char            ds_path[ZFS_MAX_DATASET_NAME_LEN];
 568
 569         if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
 570                 zfs_close(zhp);
 571                 return (0);
 572         }
 573
 574         (void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
 575
 576         /* Make sure this is a direct clone of the snapshot we're iterating. */
 577         if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
 578             NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
 579                 zfs_close(zhp);
 580                 return (0);
 581         }
 582
 583         if (cd->clone_zhp != NULL)
 584                 zfs_close(cd->clone_zhp);
 585
 586         cd->clone_zhp = zhp;
 587         return (1);
 588 }
 589
 590 /*
 591  * A ZFS file system iterator call-back function used to determine the clone
 592  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
 593  * that has a clone.  If found, it returns a reference to that clone in the
 594  * callback data.
 595  */
 596 static int
 597 find_clone(zfs_handle_t *zhp, void *data)
 598 {
 599         clone_data_t    *cd = data;
 600         time_t          snap_creation;
 601         int             zret = 0;
 602
 603         /* If snapshot has no clones, skip it */
 604         if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
 605                 zfs_close(zhp);
 606                 return (0);
 607         }
 608
 609         cd->snapshot = zfs_get_name(zhp);
 610
 611         /* Get the creation time of this snapshot */
 612         snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
 613
 614         /*
 615          * If this snapshot's creation time is greater than (i.e. younger than)
 616          * the current youngest snapshot found, iterate this snapshot to
 617          * get the right clone.
 618          */
 619         if (snap_creation >= cd->origin_creation) {
 620                 /*
 621                  * Iterate the dependents of this snapshot to find a clone
 622                  * that's a direct dependent.
 623                  */
 624                 if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
 625                     cd)) == -1) {
 626                         zfs_close(zhp);
 627                         return (1);
 628                 } else if (zret == 1) {
 629                         /*
 630                          * Found a clone, update the origin_creation time
 631                          * in the callback data.
 632                          */
 633                         cd->origin_creation = snap_creation;
 634                 }
 635         }
 636
 637         zfs_close(zhp);
 638         return (0);
 639 }
 640
 641 /*
 642  * A ZFS file system iterator call-back function used to remove standalone
 643  * snapshots.
 644  */
 645 /* ARGSUSED */
 646 static int
 647 rm_snap(zfs_handle_t *zhp, void *data)
 648 {
 649         /* If snapshot has clones, something is wrong */
 650         if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
 651                 zfs_close(zhp);
 652                 return (1);
 653         }
 654
 655         if (zfs_unmount(zhp, NULL, 0) == 0) {
 656                 (void) zfs_destroy(zhp, B_FALSE);
 657         }
 658
 659         zfs_close(zhp);
 660         return (0);
 661 }
 662
 663 /*
 664  * A ZFS snapshot iterator call-back function which renames snapshots.
 665  */
 666 static int
 667 rename_snap(zfs_handle_t *zhp, void *data)
 668 {
 669         int                     res;
 670         zfs_snapshot_data_t     *cbp;
 671         char                    template[ZFS_MAX_DATASET_NAME_LEN];
 672
 673         cbp = (zfs_snapshot_data_t *)data;
 674
 675         /*
 676          * When renaming snapshots with the iterator, the iterator can see
 677          * the same snapshot after we've renamed up in the namespace.  To
 678          * prevent this we check the count for the number of snapshots we have
 679          * to rename and stop at that point.
 680          */
 681         if (cbp->cntr >= cbp->num) {
 682                 zfs_close(zhp);
 683                 return (0);
 684         }
 685
 686         if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
 687                 zfs_close(zhp);
 688                 return (0);
 689         }
 690
 691         /* Only rename the snapshots we automatically generate when we clone. */
 692         if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
 693                 zfs_close(zhp);
 694                 return (0);
 695         }
 696
 697         (void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
 698             cbp->max++);
 699
 700         res = (zfs_rename(zhp, template, B_FALSE, B_FALSE) != 0);
 701         if (res != 0)
 702                 (void) fprintf(stderr, gettext("failed to rename snapshot %s "
 703                     "to %s: %s\n"), zfs_get_name(zhp), template,
 704                     libzfs_error_description(g_zfs));
 705
 706         cbp->cntr++;
 707
 708         zfs_close(zhp);
 709         return (res);
 710 }
 711
 712 /*
 713  * Rename the source dataset's snapshots that are automatically generated when
 714  * we clone a zone so that there won't be a name collision when we promote the
 715  * cloned dataset.  Once the snapshots have been renamed, then promote the
 716  * clone.
 717  *
 718  * The snapshot rename process gets the highest number on the snapshot names
 719  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
 720  * and clone datasets, then renames the source dataset snapshots starting at
 721  * the next number.
 722  */
 723 static int
 724 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
 725 {
 726         zfs_snapshot_data_t     sd;
 727         char                    nm[ZFS_MAX_DATASET_NAME_LEN];
 728         char                    template[ZFS_MAX_DATASET_NAME_LEN];
 729
 730         (void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
 731         /*
 732          * Start by getting the clone's snapshot max which we use
 733          * during the rename of the original dataset's snapshots.
 734          */
 735         (void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
 736         sd.match_name = template;
 737         sd.len = strlen(template);
 738         sd.max = 0;
 739
 740         if (zfs_iter_snapshots(cln_zhp, B_FALSE, get_snap_max, &sd) != 0)
 741                 return (Z_ERR);
 742
 743         /*
 744          * Now make sure the source's snapshot max is at least as high as
 745          * the clone's snapshot max.
 746          */
 747         (void) snprintf(template, sizeof (template), "%s@SUNWzone",
 748             zfs_get_name(src_zhp));
 749         sd.match_name = template;
 750         sd.len = strlen(template);
 751         sd.num = 0;
 752
 753         if (zfs_iter_snapshots(src_zhp, B_FALSE, get_snap_max, &sd) != 0)
 754                 return (Z_ERR);
 755
 756         /*
 757          * Now rename the source dataset's snapshots so there's no
 758          * conflict when we promote the clone.
 759          */
 760         sd.max++;
 761         sd.cntr = 0;
 762         if (zfs_iter_snapshots(src_zhp, B_FALSE, rename_snap, &sd) != 0)
 763                 return (Z_ERR);
 764
 765         /* close and reopen the clone dataset to get the latest info */
 766         zfs_close(cln_zhp);
 767         if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
 768                 return (Z_ERR);
 769
 770         if (zfs_promote(cln_zhp) != 0) {
 771                 (void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
 772                     nm, libzfs_error_description(g_zfs));
 773                 return (Z_ERR);
 774         }
 775
 776         zfs_close(cln_zhp);
 777         return (Z_OK);
 778 }
 779
 780 /*
 781  * Promote the youngest clone.  That clone will then become the origin of all
 782  * of the other clones that were hanging off of the source dataset.
 783  */
 784 int
 785 promote_all_clones(zfs_handle_t *zhp)
 786 {
 787         clone_data_t    cd;
 788         char            nm[ZFS_MAX_DATASET_NAME_LEN];
 789
 790         cd.clone_zhp = NULL;
 791         cd.origin_creation = 0;
 792         cd.snapshot = NULL;
 793
 794         if (zfs_iter_snapshots(zhp, B_FALSE, find_clone, &cd) != 0) {
 795                 zfs_close(zhp);
 796                 return (Z_ERR);
 797         }
 798
 799         /* Nothing to promote. */
 800         if (cd.clone_zhp == NULL)
 801                 return (Z_OK);
 802
 803         /* Found the youngest clone to promote.  Promote it. */
 804         if (promote_clone(zhp, cd.clone_zhp) != 0) {
 805                 zfs_close(cd.clone_zhp);
 806                 zfs_close(zhp);
 807                 return (Z_ERR);
 808         }
 809
 810         /* close and reopen the main dataset to get the latest info */
 811         (void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
 812         zfs_close(zhp);
 813         if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
 814                 return (Z_ERR);
 815
 816         return (Z_OK);
 817 }
 818
 819 /*
 820  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
 821  * possible, or by copying the data from the snapshot to the zonepath.
 822  */
 823 int
 824 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
 825 {
 826         int     err = Z_OK;
 827         char    clone_name[MAXPATHLEN];
 828         char    snap_path[MAXPATHLEN];
 829
 830         if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
 831                 (void) fprintf(stderr, gettext("unable to find path for %s.\n"),
 832                     snap_name);
 833                 return (Z_ERR);
 834         }
 835
 836         if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
 837                 return (Z_NO_ENTRY);
 838
 839         /*
 840          * The zonepath cannot be ZFS cloned, try to copy the data from
 841          * within the snapshot to the zonepath.
 842          */
 843         if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
 844                 if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
 845                         if (clean_out_clone() != Z_OK)
 846                                 (void) fprintf(stderr,
 847                                     gettext("could not remove the "
 848                                     "software inventory from %s\n"), zonepath);
 849
 850                 return (err);
 851         }
 852
 853         if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
 854                 if (err != Z_NO_ENTRY) {
 855                         /*
 856                          * Cloning the snapshot failed.  Fall back to trying
 857                          * to install the zone by copying from the snapshot.
 858                          */
 859                         if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
 860                                 if (clean_out_clone() != Z_OK)
 861                                         (void) fprintf(stderr,
 862                                             gettext("could not remove the "
 863                                             "software inventory from %s\n"),
 864                                             zonepath);
 865                 } else {
 866                         /*
 867                          * The snapshot is unusable for some reason so restore
 868                          * the zone state to configured since we were unable to
 869                          * actually do anything about getting the zone
 870                          * installed.
 871                          */
 872                         int tmp;
 873
 874                         if ((tmp = zone_set_state(target_zone,
 875                             ZONE_STATE_CONFIGURED)) != Z_OK) {
 876                                 errno = tmp;
 877                                 zperror2(target_zone,
 878                                     gettext("could not set state"));
 879                         }
 880                 }
 881         }
 882
 883         return (err);
 884 }
 885
 886 /*
 887  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
 888  */
 889 int
 890 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
 891     char *postsnapbuf)
 892 {
 893         zfs_handle_t    *zhp;
 894         char            clone_name[MAXPATHLEN];
 895         char            snap_name[MAXPATHLEN];
 896
 897         /*
 898          * Try to get a zfs handle for the source_zonepath.  If this fails
 899          * the source_zonepath is not ZFS so return an error.
 900          */
 901         if ((zhp = mount2zhandle(source_zonepath)) == NULL)
 902                 return (Z_ERR);
 903
 904         /*
 905          * Check if there is a file system already mounted on zonepath.  If so,
 906          * we can't clone to the path so we should fall back to copying.
 907          */
 908         if (is_mountpnt(zonepath)) {
 909                 zfs_close(zhp);
 910                 (void) fprintf(stderr,
 911                     gettext("A file system is already mounted on %s,\n"
 912                     "preventing use of a ZFS clone.\n"), zonepath);
 913                 return (Z_ERR);
 914         }
 915
 916         /*
 917          * Instead of using path2name to get the clone name from the zonepath,
 918          * we could generate a name from the source zone ZFS name.  However,
 919          * this would mean we would create the clone under the ZFS fs of the
 920          * source instead of what the zonepath says.  For example,
 921          *
 922          * source_zonepath              zonepath
 923          * /pl/zones/dev/z1             /pl/zones/deploy/z2
 924          *
 925          * We don't want the clone to be under "dev", we want it under
 926          * "deploy", so that we can leverage the normal attribute inheritance
 927          * that ZFS provides in the fs hierarchy.
 928          */
 929         if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
 930                 zfs_close(zhp);
 931                 return (Z_ERR);
 932         }
 933
 934         if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
 935             postsnapbuf) != Z_OK) {
 936                 zfs_close(zhp);
 937                 return (Z_ERR);
 938         }
 939         zfs_close(zhp);
 940
 941         if (clone_snap(snap_name, clone_name) != Z_OK) {
 942                 /* Clean up the snapshot we just took. */
 943                 if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
 944                     != NULL) {
 945                         if (zfs_unmount(zhp, NULL, 0) == 0)
 946                                 (void) zfs_destroy(zhp, B_FALSE);
 947                         zfs_close(zhp);
 948                 }
 949
 950                 return (Z_ERR);
 951         }
 952
 953         (void) printf(gettext("Instead of copying, a ZFS clone has been "
 954             "created for this zone.\n"));
 955
 956         return (Z_OK);
 957 }
 958
 959 /*
 960  * Attempt to create a ZFS file system for the specified zonepath.
 961  * We either will successfully create a ZFS file system and get it mounted
 962  * on the zonepath or we don't.  The caller doesn't care since a regular
 963  * directory is used for the zonepath if no ZFS file system is mounted there.
 964  */
 965 void
 966 create_zfs_zonepath(char *zonepath)
 967 {
 968         zfs_handle_t    *zhp;
 969         char            zfs_name[MAXPATHLEN];
 970         nvlist_t        *props = NULL;
 971
 972         if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
 973                 return;
 974
 975         /* Check if the dataset already exists. */
 976         if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
 977                 zfs_close(zhp);
 978                 return;
 979         }
 980
 981         /*
 982          * We turn off zfs SHARENFS and SHARESMB properties on the
 983          * zoneroot dataset in order to prevent the GZ from sharing
 984          * NGZ data by accident.
 985          */
 986         if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
 987             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
 988             "off") != 0) ||
 989             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
 990             "off") != 0)) {
 991                 nvlist_free(props);
 992                 (void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
 993                     "out of memory\n"), zfs_name);
 994         }
 995
 996         if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
 997             (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
 998                 (void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
 999                     "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1000                 nvlist_free(props);
1001                 return;
1002         }
1003
1004         nvlist_free(props);
1005
1006         if (zfs_mount(zhp, NULL, 0) != 0) {
1007                 (void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1008                     "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1009                 (void) zfs_destroy(zhp, B_FALSE);
1010         } else {
1011                 if (chmod(zonepath, S_IRWXU) != 0) {
1012                         (void) fprintf(stderr, gettext("file system %s "
1013                             "successfully created, but chmod %o failed: %s\n"),
1014                             zfs_name, S_IRWXU, strerror(errno));
1015                         (void) destroy_zfs(zonepath);
1016                 } else {
1017                         (void) printf(gettext("A ZFS file system has been "
1018                             "created for this zone.\n"));
1019                 }
1020         }
1021
1022         zfs_close(zhp);
1023 }
1024
1025 /*
1026  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1027  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1028  * which means the caller should clean up the zonepath in the traditional
1029  * way.
1030  */
1031 int
1032 destroy_zfs(char *zonepath)
1033 {
1034         zfs_handle_t    *zhp;
1035         boolean_t       is_clone = B_FALSE;
1036         char            origin[ZFS_MAXPROPLEN];
1037
1038         if ((zhp = mount2zhandle(zonepath)) == NULL)
1039                 return (Z_ERR);
1040
1041         if (promote_all_clones(zhp) != 0)
1042                 return (Z_ERR);
1043
1044         /* Now cleanup any snapshots remaining. */
1045         if (zfs_iter_snapshots(zhp, B_FALSE, rm_snap, NULL) != 0) {
1046                 zfs_close(zhp);
1047                 return (Z_ERR);
1048         }
1049
1050         /*
1051          * We can't destroy the file system if it has still has dependents.
1052          * There shouldn't be any at this point, but we'll double check.
1053          */
1054         if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1055                 (void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1056                     "dataset still has dependents\n"), zfs_get_name(zhp));
1057                 zfs_close(zhp);
1058                 return (Z_ERR);
1059         }
1060
1061         /*
1062          * This might be a clone.  Try to get the snapshot so we can attempt
1063          * to destroy that as well.
1064          */
1065         if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1066             NULL, 0, B_FALSE) == 0)
1067                 is_clone = B_TRUE;
1068
1069         if (zfs_unmount(zhp, NULL, 0) != 0) {
1070                 (void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1071                     zfs_get_name(zhp), libzfs_error_description(g_zfs));
1072                 zfs_close(zhp);
1073                 return (Z_ERR);
1074         }
1075
1076         if (zfs_destroy(zhp, B_FALSE) != 0) {
1077                 /*
1078                  * If the destroy fails for some reason, try to remount
1079                  * the file system so that we can use "rm -rf" to clean up
1080                  * instead.
1081                  */
1082                 (void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1083                     zfs_get_name(zhp), libzfs_error_description(g_zfs));
1084                 (void) zfs_mount(zhp, NULL, 0);
1085                 zfs_close(zhp);
1086                 return (Z_ERR);
1087         }
1088
1089         /*
1090          * If the zone has ever been moved then the mountpoint dir will not be
1091          * cleaned up by the zfs_destroy().  To handle this case try to clean
1092          * it up now but don't worry if it fails, that will be normal.
1093          */
1094         (void) rmdir(zonepath);
1095
1096         (void) printf(gettext("The ZFS file system for this zone has been "
1097             "destroyed.\n"));
1098
1099         if (is_clone) {
1100                 zfs_handle_t    *ohp;
1101
1102                 /*
1103                  * Try to clean up the snapshot that the clone was taken from.
1104                  */
1105                 if ((ohp = zfs_open(g_zfs, origin,
1106                     ZFS_TYPE_SNAPSHOT)) != NULL) {
1107                         if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1108                             NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1109                                 (void) zfs_destroy(ohp, B_FALSE);
1110                         zfs_close(ohp);
1111                 }
1112         }
1113
1114         zfs_close(zhp);
1115         return (Z_OK);
1116 }
1117
1118 /*
1119  * Return true if the path is its own zfs file system.  We determine this
1120  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1121  * if it is a different fs.
1122  */
1123 boolean_t
1124 is_zonepath_zfs(char *zonepath)
1125 {
1126         int res;
1127         char *path;
1128         char *parent;
1129         struct statvfs64 buf1, buf2;
1130
1131         if (statvfs(zonepath, &buf1) != 0)
1132                 return (B_FALSE);
1133
1134         if (strcmp(buf1.f_basetype, "zfs") != 0)
1135                 return (B_FALSE);
1136
1137         if ((path = strdup(zonepath)) == NULL)
1138                 return (B_FALSE);
1139
1140         parent = dirname(path);
1141         res = statvfs(parent, &buf2);
1142         free(path);
1143
1144         if (res != 0)
1145                 return (B_FALSE);
1146
1147         if (buf1.f_fsid == buf2.f_fsid)
1148                 return (B_FALSE);
1149
1150         return (B_TRUE);
1151 }
1152
1153 /*
1154  * Implement the fast move of a ZFS file system by simply updating the
1155  * mountpoint.  Since it is file system already, we don't have the
1156  * issue of cross-file system copying.
1157  */
1158 int
1159 move_zfs(char *zonepath, char *new_zonepath)
1160 {
1161         int             ret = Z_ERR;
1162         zfs_handle_t    *zhp;
1163
1164         if ((zhp = mount2zhandle(zonepath)) == NULL)
1165                 return (Z_ERR);
1166
1167         if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1168             new_zonepath) == 0) {
1169                 /*
1170                  * Clean up the old mount point.  We ignore any failure since
1171                  * the zone is already successfully mounted on the new path.
1172                  */
1173                 (void) rmdir(zonepath);
1174                 ret = Z_OK;
1175         }
1176
1177         zfs_close(zhp);
1178
1179         return (ret);
1180 }
1181
1182 /*
1183  * Validate that the given dataset exists on the system, and that neither it nor
1184  * its children are zvols.
1185  *
1186  * Note that we don't do anything with the 'zoned' property here.  All
1187  * management is done in zoneadmd when the zone is actually rebooted.  This
1188  * allows us to automatically set the zoned property even when a zone is
1189  * rebooted by the administrator.
1190  */
1191 int
1192 verify_datasets(zone_dochandle_t handle)
1193 {
1194         int return_code = Z_OK;
1195         struct zone_dstab dstab;
1196         zfs_handle_t *zhp;
1197         char propbuf[ZFS_MAXPROPLEN];
1198         char source[ZFS_MAX_DATASET_NAME_LEN];
1199         zprop_source_t srctype;
1200
1201         if (zonecfg_setdsent(handle) != Z_OK) {
1202                 /*
1203                  * TRANSLATION_NOTE
1204                  * zfs and dataset are literals that should not be translated.
1205                  */
1206                 (void) fprintf(stderr, gettext("could not verify zfs datasets: "
1207                     "unable to enumerate datasets\n"));
1208                 return (Z_ERR);
1209         }
1210
1211         while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1212
1213                 if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1214                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1215                         (void) fprintf(stderr, gettext("could not verify zfs "
1216                             "dataset %s: %s\n"), dstab.zone_dataset_name,
1217                             libzfs_error_description(g_zfs));
1218                         return_code = Z_ERR;
1219                         continue;
1220                 }
1221
1222                 if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1223                     sizeof (propbuf), &srctype, source,
1224                     sizeof (source), 0) == 0 &&
1225                     (srctype == ZPROP_SRC_INHERITED)) {
1226                         (void) fprintf(stderr, gettext("could not verify zfs "
1227                             "dataset %s: mountpoint cannot be inherited\n"),
1228                             dstab.zone_dataset_name);
1229                         return_code = Z_ERR;
1230                         zfs_close(zhp);
1231                         continue;
1232                 }
1233
1234                 zfs_close(zhp);
1235         }
1236         (void) zonecfg_enddsent(handle);
1237
1238         return (return_code);
1239 }
1240
1241 /*
1242  * Verify that the ZFS dataset exists, and its mountpoint
1243  * property is set to "legacy".
1244  */
1245 int
1246 verify_fs_zfs(struct zone_fstab *fstab)
1247 {
1248         zfs_handle_t *zhp;
1249         char propbuf[ZFS_MAXPROPLEN];
1250
1251         if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1252             ZFS_TYPE_DATASET)) == NULL) {
1253                 (void) fprintf(stderr, gettext("could not verify fs %s: "
1254                     "could not access zfs dataset '%s'\n"),
1255                     fstab->zone_fs_dir, fstab->zone_fs_special);
1256                 return (Z_ERR);
1257         }
1258
1259         if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1260                 (void) fprintf(stderr, gettext("cannot verify fs %s: "
1261                     "'%s' is not a file system\n"),
1262                     fstab->zone_fs_dir, fstab->zone_fs_special);
1263                 zfs_close(zhp);
1264                 return (Z_ERR);
1265         }
1266
1267         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1268             NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1269                 (void) fprintf(stderr, gettext("could not verify fs %s: "
1270                     "zfs '%s' mountpoint is not \"legacy\"\n"),
1271                     fstab->zone_fs_dir, fstab->zone_fs_special);
1272                 zfs_close(zhp);
1273                 return (Z_ERR);
1274         }
1275
1276         zfs_close(zhp);
1277         return (Z_OK);
1278 }
1279
1280 /*
1281  * Destroy the specified mnttab structure that was created by mnttab_dup().
1282  * NOTE: The structure's mnt_time field isn't freed.
1283  */
1284 static void
1285 mnttab_destroy(struct mnttab *tabp)
1286 {
1287         assert(tabp != NULL);
1288
1289         free(tabp->mnt_mountp);
1290         free(tabp->mnt_special);
1291         free(tabp->mnt_fstype);
1292         free(tabp->mnt_mntopts);
1293         free(tabp);
1294 }
1295
1296 /*
1297  * Duplicate the specified mnttab structure.  The mnt_mountp and mnt_time
1298  * fields aren't duplicated.  This function returns a pointer to the new mnttab
1299  * structure or NULL if an error occurred.  If an error occurs, then this
1300  * function sets errno to reflect the error.  mnttab structures created by
1301  * this function should be destroyed via mnttab_destroy().
1302  */
1303 static struct mnttab *
1304 mnttab_dup(const struct mnttab *srcp)
1305 {
1306         struct mnttab *retval;
1307
1308         assert(srcp != NULL);
1309
1310         retval = (struct mnttab *)calloc(1, sizeof (*retval));
1311         if (retval == NULL) {
1312                 errno = ENOMEM;
1313                 return (NULL);
1314         }
1315         if (srcp->mnt_special != NULL) {
1316                 retval->mnt_special = strdup(srcp->mnt_special);
1317                 if (retval->mnt_special == NULL)
1318                         goto err;
1319         }
1320         if (srcp->mnt_fstype != NULL) {
1321                 retval->mnt_fstype = strdup(srcp->mnt_fstype);
1322                 if (retval->mnt_fstype == NULL)
1323                         goto err;
1324         }
1325         retval->mnt_mntopts = (char *)malloc(MAX_MNTOPT_STR * sizeof (char));
1326         if (retval->mnt_mntopts == NULL)
1327                 goto err;
1328         if (srcp->mnt_mntopts != NULL) {
1329                 if (strlcpy(retval->mnt_mntopts, srcp->mnt_mntopts,
1330                     MAX_MNTOPT_STR * sizeof (char)) >= MAX_MNTOPT_STR *
1331                     sizeof (char)) {
1332                         mnttab_destroy(retval);
1333                         errno = EOVERFLOW; /* similar to mount(2) behavior */
1334                         return (NULL);
1335                 }
1336         } else {
1337                 retval->mnt_mntopts[0] = '\0';
1338         }
1339         return (retval);
1340
1341 err:
1342         mnttab_destroy(retval);
1343         errno = ENOMEM;
1344         return (NULL);
1345 }
1346
1347 /*
1348  * Determine whether the specified ZFS dataset's mountpoint property is set
1349  * to "legacy".  If the specified dataset does not have a legacy mountpoint,
1350  * then the string pointer to which the mountpoint argument points is assigned
1351  * a dynamically-allocated string containing the dataset's mountpoint
1352  * property.  If the dataset's mountpoint property is "legacy" or a libzfs
1353  * error occurs, then the string pointer to which the mountpoint argument
1354  * points isn't modified.
1355  *
1356  * This function returns B_TRUE if it doesn't encounter any fatal errors.
1357  * It returns B_FALSE if it encounters a fatal error and sets errno to the
1358  * appropriate error code.
1359  */
1360 static boolean_t
1361 get_zfs_non_legacy_mountpoint(const char *dataset_name, char **mountpoint)
1362 {
1363         zfs_handle_t *zhp;
1364         char propbuf[ZFS_MAXPROPLEN];
1365
1366         assert(dataset_name != NULL);
1367         assert(mountpoint != NULL);
1368
1369         if ((zhp = zfs_open(g_zfs, dataset_name, ZFS_TYPE_DATASET)) == NULL) {
1370                 errno = EINVAL;
1371                 return (B_FALSE);
1372         }
1373         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1374             NULL, NULL, 0, 0) != 0) {
1375                 zfs_close(zhp);
1376                 errno = EINVAL;
1377                 return (B_FALSE);
1378         }
1379         zfs_close(zhp);
1380         if (strcmp(propbuf, "legacy") != 0) {
1381                 if ((*mountpoint = strdup(propbuf)) == NULL) {
1382                         errno = ENOMEM;
1383                         return (B_FALSE);
1384                 }
1385         }
1386         return (B_TRUE);
1387 }
1388
1389
1390 /*
1391  * This zonecfg_find_mounts() callback records information about mounts of
1392  * interest in a zonepath.  It also tallies the number of zone
1393  * root overlay mounts and the number of unexpected mounts found.
1394  * This function outputs errors using zerror() if it finds unexpected
1395  * mounts.  cookiep should point to an initialized zone_mounts_t structure.
1396  *
1397  * This function returns zero on success and a nonzero value on failure.
1398  */
1399 static int
1400 zone_mounts_cb(const struct mnttab *mountp, void *cookiep)
1401 {
1402         zone_mounts_t *mounts;
1403         const char *zone_mount_dir;
1404
1405         assert(mountp != NULL);
1406         assert(cookiep != NULL);
1407
1408         mounts = (zone_mounts_t *)cookiep;
1409         zone_mount_dir = mountp->mnt_mountp + mounts->zonepath_len;
1410         if (strcmp(zone_mount_dir, "/root") == 0) {
1411                 /*
1412                  * Check for an overlay mount.  If we already detected a /root
1413                  * mount, then the current mount must be an overlay mount.
1414                  */
1415                 if (mounts->root_mnttab != NULL) {
1416                         mounts->num_root_overlay_mounts++;
1417                         return (0);
1418                 }
1419
1420                 /*
1421                  * Store the root mount's mnttab information in the
1422                  * zone_mounts_t structure for future use.
1423                  */
1424                 if ((mounts->root_mnttab = mnttab_dup(mountp)) == NULL) {
1425                         zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1426                         return (-1);
1427                 }
1428
1429                 /*
1430                  * Determine if the filesystem is a ZFS filesystem with a
1431                  * non-legacy mountpoint.  If it is, then set the root
1432                  * filesystem's mnttab's mnt_mountp field to a non-NULL
1433                  * value, which will serve as a flag to indicate this special
1434                  * condition.
1435                  */
1436                 if (strcmp(mountp->mnt_fstype, MNTTYPE_ZFS) == 0 &&
1437                     get_zfs_non_legacy_mountpoint(mountp->mnt_special,
1438                     &mounts->root_mnttab->mnt_mountp) != B_TRUE) {
1439                         zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1440                         return (-1);
1441                 }
1442         } else {
1443                 /*
1444                  * An unexpected mount was found.  Notify the user.
1445                  */
1446                 if (mounts->num_unexpected_mounts == 0)
1447                         zerror(gettext("These file systems are mounted on "
1448                             "subdirectories of %s.\n"), mounts->zonepath);
1449                 mounts->num_unexpected_mounts++;
1450                 (void) zfm_print(mountp, NULL);
1451         }
1452         return (0);
1453 }
1454
1455 /*
1456  * Initialize the specified zone_mounts_t structure for the given zonepath.
1457  * If this function succeeds, it returns zero and the specified zone_mounts_t
1458  * structure contains information about mounts in the specified zonepath.
1459  * The function returns a nonzero value if it fails.  The zone_mounts_t
1460  * structure doesn't need be destroyed via zone_mounts_destroy() if this
1461  * function fails.
1462  */
1463 int
1464 zone_mounts_init(zone_mounts_t *mounts, const char *zonepath)
1465 {
1466         assert(mounts != NULL);
1467         assert(zonepath != NULL);
1468
1469         bzero(mounts, sizeof (*mounts));
1470         if ((mounts->zonepath = strdup(zonepath)) == NULL) {
1471                 zerror(gettext("the process ran out of memory while checking "
1472                     "for mounts in zonepath %s."), zonepath);
1473                 return (-1);
1474         }
1475         mounts->zonepath_len = strlen(zonepath);
1476         if (zonecfg_find_mounts((char *)zonepath, zone_mounts_cb, mounts) ==
1477             -1) {
1478                 zerror(gettext("an error occurred while checking for mounts "
1479                     "in zonepath %s."), zonepath);
1480                 zone_mounts_destroy(mounts);
1481                 return (-1);
1482         }
1483         return (0);
1484 }
1485
1486 /*
1487  * Destroy the memory used by the specified zone_mounts_t structure's fields.
1488  * This function doesn't free the memory occupied by the structure itself
1489  * (i.e., it doesn't free the parameter).
1490  */
1491 void
1492 zone_mounts_destroy(zone_mounts_t *mounts)
1493 {
1494         assert(mounts != NULL);
1495
1496         free(mounts->zonepath);
1497         if (mounts->root_mnttab != NULL)
1498                 mnttab_destroy(mounts->root_mnttab);
1499 }
1500
1501 /*
1502  * Mount a moving zone's root filesystem (if it had a root filesystem mount
1503  * prior to the move) using the specified zonepath.  mounts should refer to
1504  * the zone_mounts_t structure describing the zone's mount information.
1505  *
1506  * This function returns zero if the mount succeeds and a nonzero value
1507  * if it doesn't.
1508  */
1509 int
1510 zone_mount_rootfs(zone_mounts_t *mounts, const char *zonepath)
1511 {
1512         char zoneroot[MAXPATHLEN];
1513         struct mnttab *mtab;
1514         int flags;
1515
1516         assert(mounts != NULL);
1517         assert(zonepath != NULL);
1518
1519         /*
1520          * If there isn't a root filesystem, then don't do anything.
1521          */
1522         mtab = mounts->root_mnttab;
1523         if (mtab == NULL)
1524                 return (0);
1525
1526         /*
1527          * Determine the root filesystem's new mountpoint.
1528          */
1529         if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1530             sizeof (zoneroot)) {
1531                 zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1532                 return (-1);
1533         }
1534
1535         /*
1536          * If the root filesystem is a non-legacy ZFS filesystem (i.e., if it's
1537          * mnt_mountp field is non-NULL), then make the filesystem's new
1538          * mount point its mountpoint property and mount the filesystem.
1539          */
1540         if (mtab->mnt_mountp != NULL) {
1541                 zfs_handle_t *zhp;
1542
1543                 if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1544                     ZFS_TYPE_DATASET)) == NULL) {
1545                         zerror(gettext("could not get ZFS handle for the zone's"
1546                             " root filesystem"));
1547                         return (-1);
1548                 }
1549                 if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1550                     zoneroot) != 0) {
1551                         zerror(gettext("could not modify zone's root "
1552                             "filesystem's mountpoint property"));
1553                         zfs_close(zhp);
1554                         return (-1);
1555                 }
1556                 if (zfs_mount(zhp, mtab->mnt_mntopts, 0) != 0) {
1557                         zerror(gettext("unable to mount zone root %s: %s"),
1558                             zoneroot, libzfs_error_description(g_zfs));
1559                         if (zfs_prop_set(zhp,
1560                             zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1561                             mtab->mnt_mountp) != 0)
1562                                 zerror(gettext("unable to restore zone's root "
1563                                     "filesystem's mountpoint property"));
1564                         zfs_close(zhp);
1565                         return (-1);
1566                 }
1567                 zfs_close(zhp);
1568                 return (0);
1569         }
1570
1571         /*
1572          * The root filesystem is either a legacy-mounted ZFS filesystem or
1573          * a non-ZFS filesystem.  Use mount(2) to mount the root filesystem.
1574          */
1575         if (mtab->mnt_mntopts != NULL)
1576                 flags = MS_OPTIONSTR;
1577         else
1578                 flags = 0;
1579         if (mount(mtab->mnt_special, zoneroot, flags, mtab->mnt_fstype, NULL, 0,
1580             mtab->mnt_mntopts, MAX_MNTOPT_STR * sizeof (char)) != 0) {
1581                 flags = errno;
1582                 zerror(gettext("unable to mount zone root %s: %s"), zoneroot,
1583                     strerror(flags));
1584                 return (-1);
1585         }
1586         return (0);
1587 }
1588
1589 /*
1590  * Unmount a moving zone's root filesystem (if such a mount exists) using the
1591  * specified zonepath.  mounts should refer to the zone_mounts_t structure
1592  * describing the zone's mount information.  If force is B_TRUE, then if the
1593  * unmount fails, then the function will try to forcibly unmount the zone's root
1594  * filesystem.
1595  *
1596  * This function returns zero if the unmount (forced or otherwise) succeeds;
1597  * otherwise, it returns a nonzero value.
1598  */
1599 int
1600 zone_unmount_rootfs(zone_mounts_t *mounts, const char *zonepath,
1601     boolean_t force)
1602 {
1603         char zoneroot[MAXPATHLEN];
1604         struct mnttab *mtab;
1605         int err;
1606
1607         assert(mounts != NULL);
1608         assert(zonepath != NULL);
1609
1610         /*
1611          * If there isn't a root filesystem, then don't do anything.
1612          */
1613         mtab = mounts->root_mnttab;
1614         if (mtab == NULL)
1615                 return (0);
1616
1617         /*
1618          * Determine the root filesystem's mountpoint.
1619          */
1620         if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1621             sizeof (zoneroot)) {
1622                 zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1623                 return (-1);
1624         }
1625
1626         /*
1627          * If the root filesystem is a non-legacy ZFS fileystem, then unmount
1628          * the filesystem via libzfs.
1629          */
1630         if (mtab->mnt_mountp != NULL) {
1631                 zfs_handle_t *zhp;
1632
1633                 if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1634                     ZFS_TYPE_DATASET)) == NULL) {
1635                         zerror(gettext("could not get ZFS handle for the zone's"
1636                             " root filesystem"));
1637                         return (-1);
1638                 }
1639                 if (zfs_unmount(zhp, zoneroot, 0) != 0) {
1640                         if (force && zfs_unmount(zhp, zoneroot, MS_FORCE) ==
1641                             0) {
1642                                 zfs_close(zhp);
1643                                 return (0);
1644                         }
1645                         zerror(gettext("unable to unmount zone root %s: %s"),
1646                             zoneroot, libzfs_error_description(g_zfs));
1647                         zfs_close(zhp);
1648                         return (-1);
1649                 }
1650                 zfs_close(zhp);
1651                 return (0);
1652         }
1653
1654         /*
1655          * Use umount(2) to unmount the root filesystem.  If this fails, then
1656          * forcibly unmount it if the force flag is set.
1657          */
1658         if (umount(zoneroot) != 0) {
1659                 if (force && umount2(zoneroot, MS_FORCE) == 0)
1660                         return (0);
1661                 err = errno;
1662                 zerror(gettext("unable to unmount zone root %s: %s"), zoneroot,
1663                     strerror(err));
1664                 return (-1);
1665         }
1666         return (0);
1667 }
1668
1669 int
1670 init_zfs(void)
1671 {
1672         if ((g_zfs = libzfs_init()) == NULL) {
1673                 (void) fprintf(stderr, gettext("failed to initialize ZFS "
1674                     "library\n"));
1675                 return (Z_ERR);
1676         }
1677
1678         return (Z_OK);
1679 }