4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are
29 * triggered from a "stub" rnode via a special set of vnodeops.
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
37 #include <sys/vnode.h>
40 #include <sys/filio.h>
44 #include <sys/pathname.h>
45 #include <sys/dirent.h>
46 #include <sys/debug.h>
47 #include <sys/vmsystm.h>
48 #include <sys/fcntl.h>
49 #include <sys/flock.h>
51 #include <sys/errno.h>
52 #include <sys/strsubr.h>
53 #include <sys/sysmacros.h>
55 #include <sys/mount.h>
56 #include <sys/cmn_err.h>
57 #include <sys/pathconf.h>
58 #include <sys/utsname.h>
61 #include <sys/systeminfo.h>
62 #include <sys/policy.h>
66 #include <sys/mntent.h>
69 #include <rpc/types.h>
74 #include <nfs/nfs_clnt.h>
75 #include <nfs/nfs_acl.h>
78 #include <nfs/nfs4_kprot.h>
79 #include <nfs/rnode4.h>
80 #include <nfs/nfs4_clnt.h>
81 #include <nfs/nfsid_map.h>
82 #include <nfs/nfs4_idmap_impl.h>
89 #include <vm/seg_map.h>
90 #include <vm/seg_kpm.h>
91 #include <vm/seg_vn.h>
93 #include <sys/fs_subr.h>
96 #include <sys/int_fmtio.h>
98 #include <sys/sunddi.h>
100 #include <sys/priv_names.h>
102 extern zone_key_t nfs4clnt_zone_key
;
103 extern zone_key_t nfsidmap_zone_key
;
106 * The automatic unmounter thread stuff!
108 static int nfs4_trigger_thread_timer
= 20; /* in seconds */
113 static uint_t nfs4_trigger_mount_to
= 240;
115 typedef struct nfs4_trigger_globals
{
116 kmutex_t ntg_forest_lock
;
118 int ntg_thread_started
;
119 nfs4_ephemeral_tree_t
*ntg_forest
;
120 } nfs4_trigger_globals_t
;
122 kmutex_t nfs4_ephemeral_thread_lock
;
124 zone_key_t nfs4_ephemeral_key
= ZONE_KEY_UNINITIALIZED
;
126 static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t
*);
129 * Used for ephemeral mounts; contains data either duplicated from
130 * servinfo4_t, or hand-crafted, depending on type of ephemeral mount.
132 * It's intended that this structure is used solely for ephemeral
133 * mount-type specific data, for passing this data to
134 * nfs4_trigger_nargs_create().
136 typedef struct ephemeral_servinfo
{
142 struct netbuf
*esi_addr
;
143 struct netbuf
*esi_syncaddr
;
144 struct knetconfig
*esi_knconf
;
145 } ephemeral_servinfo_t
;
148 * Collect together the mount-type specific and generic data args.
150 typedef struct domount_args
{
151 ephemeral_servinfo_t
*dma_esi
;
152 char *dma_hostlist
; /* comma-sep. for RO failover */
153 struct nfs_args
*dma_nargs
;
158 * The vnode ops functions for a trigger stub vnode
160 static int nfs4_trigger_open(vnode_t
**, int, cred_t
*, caller_context_t
*);
161 static int nfs4_trigger_getattr(vnode_t
*, struct vattr
*, int, cred_t
*,
163 static int nfs4_trigger_setattr(vnode_t
*, struct vattr
*, int, cred_t
*,
165 static int nfs4_trigger_access(vnode_t
*, int, int, cred_t
*,
167 static int nfs4_trigger_readlink(vnode_t
*, struct uio
*, cred_t
*,
169 static int nfs4_trigger_lookup(vnode_t
*, char *, vnode_t
**,
170 struct pathname
*, int, vnode_t
*, cred_t
*, caller_context_t
*,
171 int *, pathname_t
*);
172 static int nfs4_trigger_create(vnode_t
*, char *, struct vattr
*,
173 enum vcexcl
, int, vnode_t
**, cred_t
*, int, caller_context_t
*,
175 static int nfs4_trigger_remove(vnode_t
*, char *, cred_t
*, caller_context_t
*,
177 static int nfs4_trigger_link(vnode_t
*, vnode_t
*, char *, cred_t
*,
178 caller_context_t
*, int);
179 static int nfs4_trigger_rename(vnode_t
*, char *, vnode_t
*, char *,
180 cred_t
*, caller_context_t
*, int);
181 static int nfs4_trigger_mkdir(vnode_t
*, char *, struct vattr
*,
182 vnode_t
**, cred_t
*, caller_context_t
*, int, vsecattr_t
*vsecp
);
183 static int nfs4_trigger_rmdir(vnode_t
*, char *, vnode_t
*, cred_t
*,
184 caller_context_t
*, int);
185 static int nfs4_trigger_symlink(vnode_t
*, char *, struct vattr
*, char *,
186 cred_t
*, caller_context_t
*, int);
187 static int nfs4_trigger_cmp(vnode_t
*, vnode_t
*, caller_context_t
*);
190 * Regular NFSv4 vnodeops that we need to reference directly
192 extern int nfs4_getattr(vnode_t
*, struct vattr
*, int, cred_t
*,
194 extern void nfs4_inactive(vnode_t
*, cred_t
*, caller_context_t
*);
195 extern int nfs4_rwlock(vnode_t
*, int, caller_context_t
*);
196 extern void nfs4_rwunlock(vnode_t
*, int, caller_context_t
*);
197 extern int nfs4_lookup(vnode_t
*, char *, vnode_t
**,
198 struct pathname
*, int, vnode_t
*, cred_t
*,
199 caller_context_t
*, int *, pathname_t
*);
200 extern int nfs4_pathconf(vnode_t
*, int, ulong_t
*, cred_t
*,
202 extern int nfs4_getsecattr(vnode_t
*, vsecattr_t
*, int, cred_t
*,
204 extern int nfs4_fid(vnode_t
*, fid_t
*, caller_context_t
*);
205 extern int nfs4_realvp(vnode_t
*, vnode_t
**, caller_context_t
*);
207 static int nfs4_trigger_mount(vnode_t
*, cred_t
*, vnode_t
**);
208 static int nfs4_trigger_domount(vnode_t
*, domount_args_t
*, vfs_t
**,
209 cred_t
*, vnode_t
**);
210 static int nfs4_trigger_domount_args_create(vnode_t
*, cred_t
*,
211 domount_args_t
**dmap
);
212 static void nfs4_trigger_domount_args_destroy(domount_args_t
*dma
,
214 static ephemeral_servinfo_t
*nfs4_trigger_esi_create(vnode_t
*, servinfo4_t
*,
216 static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t
*, vnode_t
*);
217 static ephemeral_servinfo_t
*nfs4_trigger_esi_create_mirrormount(vnode_t
*,
219 static ephemeral_servinfo_t
*nfs4_trigger_esi_create_referral(vnode_t
*,
221 static struct nfs_args
*nfs4_trigger_nargs_create(mntinfo4_t
*, servinfo4_t
*,
222 ephemeral_servinfo_t
*);
223 static void nfs4_trigger_nargs_destroy(struct nfs_args
*);
224 static char *nfs4_trigger_create_mntopts(vfs_t
*);
225 static void nfs4_trigger_destroy_mntopts(char *);
226 static int nfs4_trigger_add_mntopt(char *, char *, vfs_t
*);
227 static enum clnt_stat
nfs4_trigger_ping_server(servinfo4_t
*, int);
228 static enum clnt_stat
nfs4_ping_server_common(struct knetconfig
*,
229 struct netbuf
*, int);
231 extern int umount2_engine(vfs_t
*, int, cred_t
*, int);
234 * These are the vnodeops that we must define for stub vnodes.
237 * Many of the VOPs defined for NFSv4 do not need to be defined here,
238 * for various reasons. This will result in the VFS default function being
241 * - These VOPs require a previous fop_open to have occurred. That will have
242 * lost the reference to the stub vnode, meaning these should not be called:
243 * close, read, write, ioctl, readdir, seek.
245 * - These VOPs are meaningless for vnodes without data pages. Since the
246 * stub vnode is of type VDIR, these should not be called:
247 * space, getpage, putpage, map, addmap, delmap, pageio, fsync.
249 * - These VOPs are otherwise not applicable, and should not be called:
253 * These VOPs we do not want to define, but nor do we want the VFS default
254 * action. Instead, we specify the an error function.
256 * - frlock, dispose, shrlock.
259 * These VOPs we define to use the corresponding regular NFSv4 vnodeop.
260 * NOTE: if any of these ops involve an OTW call with the stub FH, then
261 * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo()
262 * to protect the security data in the servinfo4_t for the "parent"
263 * filesystem that contains the stub.
265 * - These VOPs should not trigger a mount, so that "ls -l" does not:
266 * pathconf, getsecattr.
268 * - These VOPs would not make sense to trigger:
269 * inactive, rwlock, rwunlock, fid, realvp.
271 const struct vnodeops nfs4_trigger_vnodeops
= {
272 .vnop_name
= "nfs4_trigger",
273 .vop_open
= nfs4_trigger_open
,
274 .vop_getattr
= nfs4_trigger_getattr
,
275 .vop_setattr
= nfs4_trigger_setattr
,
276 .vop_access
= nfs4_trigger_access
,
277 .vop_lookup
= nfs4_trigger_lookup
,
278 .vop_create
= nfs4_trigger_create
,
279 .vop_remove
= nfs4_trigger_remove
,
280 .vop_link
= nfs4_trigger_link
,
281 .vop_rename
= nfs4_trigger_rename
,
282 .vop_mkdir
= nfs4_trigger_mkdir
,
283 .vop_rmdir
= nfs4_trigger_rmdir
,
284 .vop_symlink
= nfs4_trigger_symlink
,
285 .vop_readlink
= nfs4_trigger_readlink
,
286 .vop_inactive
= nfs4_inactive
,
288 .vop_rwlock
= nfs4_rwlock
,
289 .vop_rwunlock
= nfs4_rwunlock
,
290 .vop_realvp
= nfs4_realvp
,
291 .vop_getsecattr
= nfs4_getsecattr
,
292 .vop_pathconf
= nfs4_pathconf
,
293 .vop_frlock
= fs_nosys
,
294 .vop_dispose
= fs_nodispose
,
295 .vop_shrlock
= fs_nosys
,
296 .vop_vnevent
= fs_vnevent_support
,
300 nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t
*net
)
302 ASSERT(mutex_owned(&net
->net_cnt_lock
));
304 ASSERT(net
->net_refcnt
!= 0);
308 nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t
*net
)
310 mutex_enter(&net
->net_cnt_lock
);
311 nfs4_ephemeral_tree_incr(net
);
312 mutex_exit(&net
->net_cnt_lock
);
316 * We need a safe way to decrement the refcnt whilst the
317 * lock is being held.
320 nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t
*net
)
322 ASSERT(mutex_owned(&net
->net_cnt_lock
));
323 ASSERT(net
->net_refcnt
!= 0);
328 nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t
*net
)
330 mutex_enter(&net
->net_cnt_lock
);
331 nfs4_ephemeral_tree_decr(net
);
332 mutex_exit(&net
->net_cnt_lock
);
336 * Trigger ops for stub vnodes; for mirror mounts, etc.
338 * The general idea is that a "triggering" op will first call
339 * nfs4_trigger_mount(), which will find out whether a mount has already
342 * If it has, then nfs4_trigger_mount() sets newvp to the root vnode
343 * of the covering vfs.
345 * If a mount has not yet been triggered, nfs4_trigger_mount() will do so,
346 * and again set newvp, as above.
348 * The triggering op may then re-issue the VOP by calling it on newvp.
350 * Note that some ops may perform custom action, and may or may not need
351 * to trigger a mount.
353 * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We
354 * obviously can't do this with VOP_<whatever>, since it's a stub vnode
355 * and that would just recurse. Instead, we call the v4 op directly,
356 * by name. This is OK, since we know that the vnode is for NFSv4,
357 * otherwise it couldn't be a stub.
362 nfs4_trigger_open(vnode_t
**vpp
, int flag
, cred_t
*cr
, caller_context_t
*ct
)
367 error
= nfs4_trigger_mount(*vpp
, cr
, &newvp
);
371 /* Release the stub vnode, as we're losing the reference to it */
374 /* Give the caller the root vnode of the newly-mounted fs */
377 /* return with VN_HELD(newvp) */
378 return (fop_open(vpp
, flag
, cr
, ct
));
382 nfs4_fake_attrs(vnode_t
*vp
, struct vattr
*vap
)
388 * Set some attributes here for referrals.
391 bzero(vap
, sizeof (struct vattr
));
403 vap
->va_fsid
= vp
->v_vfsp
->vfs_dev
;
405 vap
->va_blksize
= MAXBSIZE
;
411 * For the majority of cases, nfs4_trigger_getattr() will not trigger
412 * a mount. However, if ATTR_TRIGGER is set, we are being informed
413 * that we need to force the mount before we attempt to determine
414 * the attributes. The intent is an atomic operation for security
417 * If we're not triggering a mount, we can still inquire about the
418 * actual attributes from the server in the mirror mount case,
419 * and will return manufactured attributes for a referral (see
420 * the 'create' branch of find_referral_stubvp()).
423 nfs4_trigger_getattr(vnode_t
*vp
, struct vattr
*vap
, int flags
, cred_t
*cr
,
424 caller_context_t
*ct
)
428 if (flags
& ATTR_TRIGGER
) {
431 error
= nfs4_trigger_mount(vp
, cr
, &newvp
);
435 error
= fop_getattr(newvp
, vap
, flags
, cr
, ct
);
438 } else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp
))) {
440 error
= nfs4_getattr(vp
, vap
, flags
, cr
, ct
);
442 } else if (RP_ISSTUB_REFERRAL(VTOR4(vp
))) {
444 nfs4_fake_attrs(vp
, vap
);
452 nfs4_trigger_setattr(vnode_t
*vp
, struct vattr
*vap
, int flags
, cred_t
*cr
,
453 caller_context_t
*ct
)
458 error
= nfs4_trigger_mount(vp
, cr
, &newvp
);
462 error
= fop_setattr(newvp
, vap
, flags
, cr
, ct
);
469 nfs4_trigger_access(vnode_t
*vp
, int mode
, int flags
, cred_t
*cr
,
470 caller_context_t
*ct
)
475 error
= nfs4_trigger_mount(vp
, cr
, &newvp
);
479 error
= fop_access(newvp
, mode
, flags
, cr
, ct
);
486 nfs4_trigger_lookup(vnode_t
*dvp
, char *nm
, vnode_t
**vpp
,
487 struct pathname
*pnp
, int flags
, vnode_t
*rdir
, cred_t
*cr
,
488 caller_context_t
*ct
, int *deflags
, pathname_t
*rpnp
)
492 rnode4_t
*drp
= VTOR4(dvp
);
494 ASSERT(RP_ISSTUB(drp
));
497 * It's not legal to lookup ".." for an fs root, so we mustn't pass
498 * that up. Instead, pass onto the regular op, regardless of whether
499 * we've triggered a mount.
501 if (strcmp(nm
, "..") == 0)
502 if (RP_ISSTUB_MIRRORMOUNT(drp
)) {
503 return (nfs4_lookup(dvp
, nm
, vpp
, pnp
, flags
, rdir
, cr
,
505 } else if (RP_ISSTUB_REFERRAL(drp
)) {
506 /* Return the parent vnode */
507 return (vtodv(dvp
, vpp
, cr
, TRUE
));
510 error
= nfs4_trigger_mount(dvp
, cr
, &newdvp
);
514 error
= fop_lookup(newdvp
, nm
, vpp
, pnp
, flags
, rdir
, cr
, ct
,
522 nfs4_trigger_create(vnode_t
*dvp
, char *nm
, struct vattr
*va
,
523 enum vcexcl exclusive
, int mode
, vnode_t
**vpp
, cred_t
*cr
,
524 int flags
, caller_context_t
*ct
, vsecattr_t
*vsecp
)
529 error
= nfs4_trigger_mount(dvp
, cr
, &newdvp
);
533 error
= fop_create(newdvp
, nm
, va
, exclusive
, mode
, vpp
, cr
,
541 nfs4_trigger_remove(vnode_t
*dvp
, char *nm
, cred_t
*cr
, caller_context_t
*ct
,
547 error
= nfs4_trigger_mount(dvp
, cr
, &newdvp
);
551 error
= fop_remove(newdvp
, nm
, cr
, ct
, flags
);
558 nfs4_trigger_link(vnode_t
*tdvp
, vnode_t
*svp
, char *tnm
, cred_t
*cr
,
559 caller_context_t
*ct
, int flags
)
564 error
= nfs4_trigger_mount(tdvp
, cr
, &newtdvp
);
569 * We don't check whether svp is a stub. Let the NFSv4 code
570 * detect that error, and return accordingly.
572 error
= fop_link(newtdvp
, svp
, tnm
, cr
, ct
, flags
);
579 nfs4_trigger_rename(vnode_t
*sdvp
, char *snm
, vnode_t
*tdvp
, char *tnm
,
580 cred_t
*cr
, caller_context_t
*ct
, int flags
)
584 rnode4_t
*tdrp
= VTOR4(tdvp
);
587 * We know that sdvp is a stub, otherwise we would not be here.
589 * If tdvp is also be a stub, there are two possibilities: it
590 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)]
591 * or it is a different stub [!VN_CMP(sdvp, tdvp)].
593 * In the former case, just trigger sdvp, and treat tdvp as
594 * though it were not a stub.
596 * In the latter case, it might be a different stub for the
597 * same server fs as sdvp, or for a different server fs.
598 * Regardless, from the client perspective this would still
599 * be a cross-filesystem rename, and should not be allowed,
600 * so return EXDEV, without triggering either mount.
602 if (RP_ISSTUB(tdrp
) && !VN_CMP(sdvp
, tdvp
))
605 error
= nfs4_trigger_mount(sdvp
, cr
, &newsdvp
);
609 error
= fop_rename(newsdvp
, snm
, tdvp
, tnm
, cr
, ct
, flags
);
618 nfs4_trigger_mkdir(vnode_t
*dvp
, char *nm
, struct vattr
*va
, vnode_t
**vpp
,
619 cred_t
*cr
, caller_context_t
*ct
, int flags
, vsecattr_t
*vsecp
)
624 error
= nfs4_trigger_mount(dvp
, cr
, &newdvp
);
628 error
= fop_mkdir(newdvp
, nm
, va
, vpp
, cr
, ct
, flags
, vsecp
);
635 nfs4_trigger_rmdir(vnode_t
*dvp
, char *nm
, vnode_t
*cdir
, cred_t
*cr
,
636 caller_context_t
*ct
, int flags
)
641 error
= nfs4_trigger_mount(dvp
, cr
, &newdvp
);
645 error
= fop_rmdir(newdvp
, nm
, cdir
, cr
, ct
, flags
);
652 nfs4_trigger_symlink(vnode_t
*dvp
, char *lnm
, struct vattr
*tva
, char *tnm
,
653 cred_t
*cr
, caller_context_t
*ct
, int flags
)
658 error
= nfs4_trigger_mount(dvp
, cr
, &newdvp
);
662 error
= fop_symlink(newdvp
, lnm
, tva
, tnm
, cr
, ct
, flags
);
669 nfs4_trigger_readlink(vnode_t
*vp
, struct uio
*uiop
, cred_t
*cr
,
670 caller_context_t
*ct
)
675 error
= nfs4_trigger_mount(vp
, cr
, &newvp
);
679 error
= fop_readlink(newvp
, uiop
, cr
, ct
);
685 /* end of trigger vnode ops */
688 * See if the mount has already been done by another caller.
691 nfs4_trigger_mounted_already(vnode_t
*vp
, vnode_t
**newvpp
,
692 bool_t
*was_mounted
, vfs_t
**vfsp
)
695 mntinfo4_t
*mi
= VTOMI4(vp
);
697 *was_mounted
= FALSE
;
699 error
= vn_vfsrlock_wait(vp
);
703 *vfsp
= vn_mountedvfs(vp
);
705 /* the mount has already occurred */
706 error
= VFS_ROOT(*vfsp
, newvpp
);
708 /* need to update the reference time */
709 mutex_enter(&mi
->mi_lock
);
710 if (mi
->mi_ephemeral
)
711 mi
->mi_ephemeral
->ne_ref_time
=
713 mutex_exit(&mi
->mi_lock
);
724 * Mount upon a trigger vnode; for mirror-mounts, referrals, etc.
726 * The mount may have already occurred, via another thread. If not,
727 * assemble the location information - which may require fetching - and
730 * Sets newvp to be the root of the fs that is now covering vp. Note
731 * that we return with VN_HELD(*newvp).
733 * The caller is responsible for passing the VOP onto the covering fs.
736 nfs4_trigger_mount(vnode_t
*vp
, cred_t
*cr
, vnode_t
**newvpp
)
740 rnode4_t
*rp
= VTOR4(vp
);
741 mntinfo4_t
*mi
= VTOMI4(vp
);
744 nfs4_ephemeral_tree_t
*net
;
746 bool_t must_unlock
= FALSE
;
747 bool_t is_building
= FALSE
;
748 bool_t was_mounted
= FALSE
;
750 cred_t
*mcred
= NULL
;
752 nfs4_trigger_globals_t
*ntg
;
754 zone_t
*zone
= curproc
->p_zone
;
756 ASSERT(RP_ISSTUB(rp
));
761 * Has the mount already occurred?
763 error
= nfs4_trigger_mounted_already(vp
, newvpp
,
764 &was_mounted
, &vfsp
);
765 if (error
|| was_mounted
)
768 ntg
= zone_getspecific(nfs4_ephemeral_key
, zone
);
771 mutex_enter(&mi
->mi_lock
);
774 * We need to lock down the ephemeral tree.
776 if (mi
->mi_ephemeral_tree
== NULL
) {
777 net
= kmem_zalloc(sizeof (*net
), KM_SLEEP
);
778 mutex_init(&net
->net_tree_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
779 mutex_init(&net
->net_cnt_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
781 net
->net_status
= NFS4_EPHEMERAL_TREE_BUILDING
;
785 * We need to add it to the zone specific list for
786 * automatic unmounting and harvesting of deadwood.
788 mutex_enter(&ntg
->ntg_forest_lock
);
789 if (ntg
->ntg_forest
!= NULL
)
790 net
->net_next
= ntg
->ntg_forest
;
791 ntg
->ntg_forest
= net
;
792 mutex_exit(&ntg
->ntg_forest_lock
);
795 * No lock order confusion with mi_lock because no
796 * other node could have grabbed net_tree_lock.
798 mutex_enter(&net
->net_tree_lock
);
799 mi
->mi_ephemeral_tree
= net
;
801 mutex_exit(&mi
->mi_lock
);
804 VFS_HOLD(mi
->mi_vfsp
);
806 net
= mi
->mi_ephemeral_tree
;
807 nfs4_ephemeral_tree_hold(net
);
809 mutex_exit(&mi
->mi_lock
);
811 mutex_enter(&net
->net_tree_lock
);
814 * We can only procede if the tree is neither locked
815 * nor being torn down.
817 mutex_enter(&net
->net_cnt_lock
);
818 if (net
->net_status
& NFS4_EPHEMERAL_TREE_PROCESSING
) {
819 nfs4_ephemeral_tree_decr(net
);
820 mutex_exit(&net
->net_cnt_lock
);
821 mutex_exit(&net
->net_tree_lock
);
825 mutex_exit(&net
->net_cnt_lock
);
828 mutex_enter(&net
->net_cnt_lock
);
829 net
->net_status
|= NFS4_EPHEMERAL_TREE_MOUNTING
;
830 mutex_exit(&net
->net_cnt_lock
);
834 error
= nfs4_trigger_domount_args_create(vp
, cr
, &dma
);
839 * Note that since we define mirror mounts to work
840 * for any user, we simply extend the privileges of
841 * the user's credentials to allow the mount to
847 nfs4_trigger_domount_args_destroy(dma
, vp
);
851 crset_zone_privall(mcred
);
853 error
= nfs4_trigger_domount(vp
, dma
, &vfsp
, mcred
, newvpp
);
854 nfs4_trigger_domount_args_destroy(dma
, vp
);
856 DTRACE_PROBE2(nfs4clnt__func__referral__mount
,
857 vnode_t
*, vp
, int, error
);
864 mutex_enter(&net
->net_cnt_lock
);
865 net
->net_status
&= ~NFS4_EPHEMERAL_TREE_MOUNTING
;
868 * REFCNT: If we are the root of the tree, then we need
869 * to keep a reference because we malloced the tree and
870 * this is where we tied it to our mntinfo.
872 * If we are not the root of the tree, then our tie to
873 * the mntinfo occured elsewhere and we need to
874 * decrement the reference to the tree.
877 net
->net_status
&= ~NFS4_EPHEMERAL_TREE_BUILDING
;
879 nfs4_ephemeral_tree_decr(net
);
880 mutex_exit(&net
->net_cnt_lock
);
882 mutex_exit(&net
->net_tree_lock
);
885 if (!error
&& (newvpp
== NULL
|| *newvpp
== NULL
))
892 * Collect together both the generic & mount-type specific args.
895 nfs4_trigger_domount_args_create(vnode_t
*vp
, cred_t
*cr
, domount_args_t
**dmap
)
900 struct nfs_args
*nargs
, *nargs_head
;
901 enum clnt_stat status
;
902 ephemeral_servinfo_t
*esi
, *esi_first
;
904 mntinfo4_t
*mi
= VTOMI4(vp
);
906 nointr
= !(mi
->mi_flags
& MI4_INT
);
907 hostlist
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
909 svp
= mi
->mi_curr_serv
;
910 /* check if the current server is responding */
911 status
= nfs4_trigger_ping_server(svp
, nointr
);
912 if (status
== RPC_SUCCESS
) {
913 esi_first
= nfs4_trigger_esi_create(vp
, svp
, cr
);
914 if (esi_first
== NULL
) {
915 kmem_free(hostlist
, MAXPATHLEN
);
919 (void) strlcpy(hostlist
, esi_first
->esi_hostname
, MAXPATHLEN
);
921 nargs_head
= nfs4_trigger_nargs_create(mi
, svp
, esi_first
);
923 /* current server did not respond */
932 * If we have multiple servinfo4 structures, linked via sv_next,
933 * we must create one nfs_args for each, linking the nfs_args via
934 * nfs_ext_u.nfs_extB.next.
936 * We need to build a corresponding esi for each, too, but that is
937 * used solely for building nfs_args, and may be immediately
938 * discarded, as domount() requires the info from just one esi,
939 * but all the nfs_args.
941 * Currently, the NFS mount code will hang if not all servers
942 * requested are available. To avoid that, we need to ping each
943 * server, here, and remove it from the list if it is not
944 * responding. This has the side-effect of that server then
945 * being permanently unavailable for this failover mount, even if
946 * it recovers. That's unfortunate, but the best we can do until
947 * the mount code path is fixed.
951 * If the current server was down, loop indefinitely until we find
952 * at least one responsive server.
955 /* no locking needed for sv_next; it is only set at fs mount */
956 for (svp
= mi
->mi_servers
; svp
!= NULL
; svp
= svp
->sv_next
) {
957 struct nfs_args
*next
;
960 * nargs_head: the head of the nfs_args list
961 * nargs: the current tail of the list
962 * next: the newly-created element to be added
966 * We've already tried the current server, above;
967 * if it was responding, we have already included it
968 * and it may now be ignored.
970 * Otherwise, try it again, since it may now have
973 if (svp
== mi
->mi_curr_serv
&& esi_first
!= NULL
)
976 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
977 if (svp
->sv_flags
& SV4_NOTINUSE
) {
978 nfs_rw_exit(&svp
->sv_lock
);
981 nfs_rw_exit(&svp
->sv_lock
);
983 /* check if the server is responding */
984 status
= nfs4_trigger_ping_server(svp
, nointr
);
985 if (status
== RPC_INTR
) {
986 kmem_free(hostlist
, MAXPATHLEN
);
987 nfs4_trigger_esi_destroy(esi_first
, vp
);
989 while (nargs
!= NULL
) {
990 next
= nargs
->nfs_ext_u
.nfs_extB
.next
;
991 nfs4_trigger_nargs_destroy(nargs
);
995 } else if (status
!= RPC_SUCCESS
) {
996 /* if the server did not respond, ignore it */
1000 esi
= nfs4_trigger_esi_create(vp
, svp
, cr
);
1005 * If the original current server (mi_curr_serv)
1006 * was down when when we first tried it,
1007 * (i.e. esi_first == NULL),
1008 * we select this new server (svp) to be the server
1009 * that we will actually contact (esi_first).
1011 * Note that it's possible that mi_curr_serv == svp,
1012 * if that mi_curr_serv was down but has now recovered.
1014 next
= nfs4_trigger_nargs_create(mi
, svp
, esi
);
1015 if (esi_first
== NULL
) {
1016 ASSERT(nargs
== NULL
);
1017 ASSERT(nargs_head
== NULL
);
1020 (void) strlcpy(hostlist
,
1021 esi_first
->esi_hostname
, MAXPATHLEN
);
1023 ASSERT(nargs_head
!= NULL
);
1024 nargs
->nfs_ext_u
.nfs_extB
.next
= next
;
1025 (void) strlcat(hostlist
, ",", MAXPATHLEN
);
1026 (void) strlcat(hostlist
, esi
->esi_hostname
,
1028 /* esi was only needed for hostname & nargs */
1029 nfs4_trigger_esi_destroy(esi
, vp
);
1035 /* if we've had no response at all, wait a second */
1036 if (esi_first
== NULL
)
1039 } while (esi_first
== NULL
);
1040 ASSERT(nargs_head
!= NULL
);
1042 dma
= kmem_zalloc(sizeof (domount_args_t
), KM_SLEEP
);
1043 dma
->dma_esi
= esi_first
;
1044 dma
->dma_hostlist
= hostlist
;
1045 dma
->dma_nargs
= nargs_head
;
1052 nfs4_trigger_domount_args_destroy(domount_args_t
*dma
, vnode_t
*vp
)
1055 if (dma
->dma_esi
!= NULL
&& vp
!= NULL
)
1056 nfs4_trigger_esi_destroy(dma
->dma_esi
, vp
);
1058 if (dma
->dma_hostlist
!= NULL
)
1059 kmem_free(dma
->dma_hostlist
, MAXPATHLEN
);
1061 if (dma
->dma_nargs
!= NULL
) {
1062 struct nfs_args
*nargs
= dma
->dma_nargs
;
1065 struct nfs_args
*next
=
1066 nargs
->nfs_ext_u
.nfs_extB
.next
;
1068 nfs4_trigger_nargs_destroy(nargs
);
1070 } while (nargs
!= NULL
);
1073 kmem_free(dma
, sizeof (domount_args_t
));
1078 * The ephemeral_servinfo_t struct contains basic information we will need to
1079 * perform the mount. Whilst the structure is generic across different
1080 * types of ephemeral mount, the way we gather its contents differs.
1082 static ephemeral_servinfo_t
*
1083 nfs4_trigger_esi_create(vnode_t
*vp
, servinfo4_t
*svp
, cred_t
*cr
)
1085 ephemeral_servinfo_t
*esi
;
1086 rnode4_t
*rp
= VTOR4(vp
);
1088 ASSERT(RP_ISSTUB(rp
));
1090 /* Call the ephemeral type-specific routine */
1091 if (RP_ISSTUB_MIRRORMOUNT(rp
))
1092 esi
= nfs4_trigger_esi_create_mirrormount(vp
, svp
);
1093 else if (RP_ISSTUB_REFERRAL(rp
))
1094 esi
= nfs4_trigger_esi_create_referral(vp
, cr
);
1101 nfs4_trigger_esi_destroy(ephemeral_servinfo_t
*esi
, vnode_t
*vp
)
1103 rnode4_t
*rp
= VTOR4(vp
);
1105 ASSERT(RP_ISSTUB(rp
));
1107 /* Currently, no need for an ephemeral type-specific routine */
1110 * The contents of ephemeral_servinfo_t goes into nfs_args,
1111 * and will be handled by nfs4_trigger_nargs_destroy().
1112 * We need only free the structure itself.
1115 kmem_free(esi
, sizeof (ephemeral_servinfo_t
));
1119 * Some of this may turn out to be common with other ephemeral types,
1120 * in which case it should be moved to nfs4_trigger_esi_create(), or a
1121 * common function called.
1125 * Mirror mounts case - should have all data available
1127 static ephemeral_servinfo_t
*
1128 nfs4_trigger_esi_create_mirrormount(vnode_t
*vp
, servinfo4_t
*svp
)
1131 struct knetconfig
*sikncp
, *svkncp
;
1132 struct netbuf
*bufp
;
1133 ephemeral_servinfo_t
*esi
;
1135 esi
= kmem_zalloc(sizeof (ephemeral_servinfo_t
), KM_SLEEP
);
1137 /* initially set to be our type of ephemeral mount; may be added to */
1138 esi
->esi_mount_flags
= NFSMNT_MIRRORMOUNT
;
1141 * We're copying info from the stub rnode's servinfo4, but
1142 * we must create new copies, not pointers, since this information
1143 * is to be associated with the new mount, which will be
1144 * unmounted (and its structures freed) separately
1148 * Sizes passed to kmem_[z]alloc here must match those freed
1149 * in nfs4_free_args()
1153 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this
1154 * is difficult to avoid: as we need to read svp to calculate the
1155 * sizes to be allocated.
1157 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1159 esi
->esi_hostname
= kmem_zalloc(strlen(svp
->sv_hostname
) + 1, KM_SLEEP
);
1160 (void) strcat(esi
->esi_hostname
, svp
->sv_hostname
);
1162 esi
->esi_addr
= kmem_zalloc(sizeof (struct netbuf
), KM_SLEEP
);
1163 bufp
= esi
->esi_addr
;
1164 bufp
->len
= svp
->sv_addr
.len
;
1165 bufp
->maxlen
= svp
->sv_addr
.maxlen
;
1166 bufp
->buf
= kmem_zalloc(bufp
->len
, KM_SLEEP
);
1167 bcopy(svp
->sv_addr
.buf
, bufp
->buf
, bufp
->len
);
1169 esi
->esi_knconf
= kmem_zalloc(sizeof (*esi
->esi_knconf
), KM_SLEEP
);
1170 sikncp
= esi
->esi_knconf
;
1171 svkncp
= svp
->sv_knconf
;
1172 sikncp
->knc_semantics
= svkncp
->knc_semantics
;
1173 sikncp
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1174 (void) strcat((char *)sikncp
->knc_protofmly
,
1175 (char *)svkncp
->knc_protofmly
);
1176 sikncp
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1177 (void) strcat((char *)sikncp
->knc_proto
, (char *)svkncp
->knc_proto
);
1178 sikncp
->knc_rdev
= svkncp
->knc_rdev
;
1181 * Used when AUTH_DH is negotiated.
1183 * This is ephemeral mount-type specific, since it contains the
1184 * server's time-sync syncaddr.
1186 if (svp
->sv_dhsec
) {
1187 struct netbuf
*bufp
;
1189 dh_k4_clntdata_t
*data
;
1191 sdata
= svp
->sv_dhsec
;
1192 data
= (dh_k4_clntdata_t
*)sdata
->data
;
1193 ASSERT(sdata
->rpcflavor
== AUTH_DH
);
1195 bufp
= kmem_zalloc(sizeof (struct netbuf
), KM_SLEEP
);
1196 bufp
->len
= data
->syncaddr
.len
;
1197 bufp
->maxlen
= data
->syncaddr
.maxlen
;
1198 bufp
->buf
= kmem_zalloc(bufp
->len
, KM_SLEEP
);
1199 bcopy(data
->syncaddr
.buf
, bufp
->buf
, bufp
->len
);
1200 esi
->esi_syncaddr
= bufp
;
1202 if (data
->netname
!= NULL
) {
1203 int nmlen
= data
->netnamelen
;
1206 * We need to copy from a dh_k4_clntdata_t
1207 * netname/netnamelen pair to a NUL-terminated
1208 * netname string suitable for putting in nfs_args,
1209 * where the latter has no netnamelen field.
1211 esi
->esi_netname
= kmem_zalloc(nmlen
+ 1, KM_SLEEP
);
1212 bcopy(data
->netname
, esi
->esi_netname
, nmlen
);
1215 esi
->esi_syncaddr
= NULL
;
1216 esi
->esi_netname
= NULL
;
1219 stubpath
= fn_path(VTOSV(vp
)->sv_name
);
1220 /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */
1221 ASSERT(*stubpath
== '.');
1224 /* for nfs_args->fh */
1225 esi
->esi_path_len
= strlen(stubpath
) + 1;
1226 if (strcmp(svp
->sv_path
, "/") != 0)
1227 esi
->esi_path_len
+= strlen(svp
->sv_path
);
1228 esi
->esi_path
= kmem_zalloc(esi
->esi_path_len
, KM_SLEEP
);
1229 if (strcmp(svp
->sv_path
, "/") != 0)
1230 (void) strcat(esi
->esi_path
, svp
->sv_path
);
1231 (void) strcat(esi
->esi_path
, stubpath
);
1234 /* stubpath allocated by fn_path() */
1235 kmem_free(stubpath
, strlen(stubpath
) + 1);
1237 nfs_rw_exit(&svp
->sv_lock
);
1243 * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to
1244 * get network information required to do the mount call.
1247 nfs4_callmapid(utf8string
*server
, struct nfs_fsl_info
*resp
)
1249 door_arg_t door_args
;
1252 refd_door_args_t
*xdr_argsp
;
1253 refd_door_res_t
*orig_resp
;
1256 int res_len
= 16; /* length of an ip adress */
1257 int orig_reslen
= res_len
;
1259 struct nfsidmap_globals
*nig
;
1261 if (zone_status_get(curproc
->p_zone
) >= ZONE_IS_SHUTTING_DOWN
)
1262 return (ECONNREFUSED
);
1264 nig
= zone_getspecific(nfsidmap_zone_key
, nfs_zone());
1265 ASSERT(nig
!= NULL
);
1267 mutex_enter(&nig
->nfsidmap_daemon_lock
);
1268 dh
= nig
->nfsidmap_daemon_dh
;
1270 mutex_exit(&nig
->nfsidmap_daemon_lock
);
1272 "nfs4_callmapid: nfsmapid daemon not " \
1273 "running unable to resolve host name\n");
1277 mutex_exit(&nig
->nfsidmap_daemon_lock
);
1279 xdr_len
= xdr_sizeof(&(xdr_utf8string
), server
);
1281 xdr_argsp
= kmem_zalloc(xdr_len
+ sizeof (*xdr_argsp
), KM_SLEEP
);
1282 xdr_argsp
->xdr_len
= xdr_len
;
1283 xdr_argsp
->cmd
= NFSMAPID_SRV_NETINFO
;
1285 xdrmem_create(&xdr
, (char *)&xdr_argsp
->xdr_arg
,
1286 xdr_len
, XDR_ENCODE
);
1288 if (!xdr_utf8string(&xdr
, server
)) {
1289 kmem_free(xdr_argsp
, xdr_len
+ sizeof (*xdr_argsp
));
1295 orig_resp
= kmem_alloc(orig_reslen
, KM_SLEEP
);
1297 door_args
.data_ptr
= (char *)xdr_argsp
;
1298 door_args
.data_size
= sizeof (*xdr_argsp
) + xdr_argsp
->xdr_len
;
1299 door_args
.desc_ptr
= NULL
;
1300 door_args
.desc_num
= 0;
1301 door_args
.rbuf
= orig_resp
? (char *)orig_resp
: NULL
;
1302 door_args
.rsize
= res_len
;
1305 error
= door_ki_upcall(dh
, &door_args
);
1310 kmem_free(xdr_argsp
, xdr_len
+ sizeof (*xdr_argsp
));
1312 kmem_free(orig_resp
, orig_reslen
);
1314 * There is no door to connect to. The referral daemon
1315 * must not be running yet.
1318 "nfsmapid not running cannot resolve host name");
1323 * If the results buffer passed back are not the same as
1324 * what was sent free the old buffer and use the new one.
1326 if (orig_resp
&& orig_reslen
) {
1327 refd_door_res_t
*door_resp
;
1329 door_resp
= (refd_door_res_t
*)door_args
.rbuf
;
1330 if ((void *)door_args
.rbuf
!= orig_resp
)
1331 kmem_free(orig_resp
, orig_reslen
);
1332 if (door_resp
->res_status
== 0) {
1333 xdrmem_create(&xdr
, (char *)&door_resp
->xdr_res
,
1334 door_resp
->xdr_len
, XDR_DECODE
);
1335 bzero(resp
, sizeof (struct nfs_fsl_info
));
1336 if (!xdr_nfs_fsl_info(&xdr
, resp
)) {
1338 nfs4clnt__debug__referral__upcall__xdrfail
,
1339 struct nfs_fsl_info
*, resp
,
1340 char *, "nfs4_callmapid");
1345 nfs4clnt__debug__referral__upcall__badstatus
,
1346 int, door_resp
->res_status
,
1347 char *, "nfs4_callmapid");
1348 error
= door_resp
->res_status
;
1350 kmem_free(door_args
.rbuf
, door_args
.rsize
);
1353 DTRACE_PROBE2(nfs4clnt__func__referral__upcall
,
1354 char *, server
, int, error
);
1359 * Fetches the fs_locations attribute. Typically called
1360 * from a Replication/Migration/Referrals/Mirror-mount context
1362 * Fills in the attributes in garp. The caller is assumed
1363 * to have allocated memory for garp.
1365 * lock: if set do not lock s_recovlock and mi_recovlock mutex,
1366 * it's already done by caller. Otherwise lock these mutexes
1367 * before doing the rfs4call().
1374 nfs4_fetch_locations(mntinfo4_t
*mi
, nfs4_sharedfh_t
*sfh
, char *nm
,
1375 cred_t
*cr
, nfs4_ga_res_t
*garp
, COMPOUND4res_clnt
*callres
, bool_t lock
)
1377 COMPOUND4args_clnt args
;
1378 COMPOUND4res_clnt res
;
1380 int argoplist_size
= 3 * sizeof (nfs_argop4
);
1381 nfs4_server_t
*sp
= NULL
;
1383 nfs4_error_t e
= { 0, NFS4_OK
, RPC_SUCCESS
};
1385 struct nfs4_clnt
*nfscl
;
1388 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, 0);
1390 ASSERT(nfs_rw_lock_held(&mi
->mi_recovlock
, RW_READER
) ||
1391 nfs_rw_lock_held(&mi
->mi_recovlock
, RW_WRITER
));
1393 sp
= find_nfs4_server(mi
);
1395 nfs_rw_exit(&mi
->mi_recovlock
);
1398 mutex_exit(&sp
->s_lock
);
1402 (void) nfs_rw_enter_sig(&sp
->s_recovlock
,
1404 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_WRITER
, 0);
1407 ASSERT(nfs_rw_lock_held(&sp
->s_recovlock
, RW_READER
) ||
1408 nfs_rw_lock_held(&sp
->s_recovlock
, RW_WRITER
));
1413 * Do we want to do the setup for recovery here?
1415 * We know that the server responded to a null ping a very
1416 * short time ago, and we know that we intend to do a
1417 * single stateless operation - we want to fetch attributes,
1418 * so we know we can't encounter errors about state. If
1419 * something goes wrong with the GETATTR, like not being
1420 * able to get a response from the server or getting any
1421 * kind of FH error, we should fail the mount.
1423 * We may want to re-visited this at a later time.
1425 argop
= kmem_alloc(argoplist_size
, KM_SLEEP
);
1427 args
.ctag
= TAG_GETATTR_FSLOCATION
;
1428 /* PUTFH LOOKUP GETATTR */
1433 argop
[0].argop
= OP_CPUTFH
;
1434 argop
[0].nfs_argop4_u
.opcputfh
.sfh
= sfh
;
1436 /* 1. lookup name, can't be dotdot */
1437 argop
[1].argop
= OP_CLOOKUP
;
1438 argop
[1].nfs_argop4_u
.opclookup
.cname
= nm
;
1441 argop
[2].argop
= OP_GETATTR
;
1442 argop
[2].nfs_argop4_u
.opgetattr
.attr_request
=
1443 FATTR4_FSID_MASK
| FATTR4_FS_LOCATIONS_MASK
|
1444 FATTR4_MOUNTED_ON_FILEID_MASK
;
1445 argop
[2].nfs_argop4_u
.opgetattr
.mi
= mi
;
1447 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, 0, &e
);
1450 nfs_rw_exit(&mi
->mi_recovlock
);
1452 nfs_rw_exit(&sp
->s_recovlock
);
1455 nfscl
= zone_getspecific(nfs4clnt_zone_key
, nfs_zone());
1456 nfscl
->nfscl_stat
.referrals
.value
.ui64
++;
1457 DTRACE_PROBE3(nfs4clnt__func__referral__fsloc
,
1458 nfs4_sharedfh_t
*, sfh
, char *, nm
, nfs4_error_t
*, &e
);
1462 nfs4_server_rele(sp
);
1463 kmem_free(argop
, argoplist_size
);
1468 * Check for all possible error conditions.
1469 * For valid replies without an ops array or for illegal
1470 * replies, return a failure.
1472 if (res
.status
!= NFS4_OK
|| res
.array_len
< 3 ||
1473 res
.array
[2].nfs_resop4_u
.opgetattr
.status
!= NFS4_OK
) {
1479 * There isn't much value in putting the attributes
1480 * in the attr cache since fs_locations4 aren't
1481 * encountered very frequently, so just make them
1482 * available to the caller.
1484 *garp
= res
.array
[2].nfs_resop4_u
.opgetattr
.ga_res
;
1486 DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc
,
1487 nfs4_ga_res_t
*, garp
, char *, "nfs4_fetch_locations");
1489 /* No fs_locations? -- return a failure */
1490 if (garp
->n4g_ext_res
== NULL
||
1491 garp
->n4g_ext_res
->n4g_fslocations
.locations_val
== NULL
) {
1496 if (!garp
->n4g_fsid_valid
)
1501 /* the call was ok but failed validating the call results */
1502 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1504 ASSERT(callres
!= NULL
);
1509 nfs4_server_rele(sp
);
1510 kmem_free(argop
, argoplist_size
);
1514 /* tunable to disable referral mounts */
1515 int nfs4_no_referrals
= 0;
1518 * Returns NULL if the vnode cannot be created or found.
1521 find_referral_stubvp(vnode_t
*dvp
, char *nm
, cred_t
*cr
)
1523 nfs_fh4
*stub_fh
, *dfh
;
1524 nfs4_sharedfh_t
*sfhp
;
1527 fattr4_mounted_on_fileid mnt_on_fileid
;
1530 COMPOUND4res_clnt callres
;
1533 if (nfs4_no_referrals
)
1537 * Get the mounted_on_fileid, unique on that server::fsid
1540 if (nfs4_fetch_locations(mi
, VTOR4(dvp
)->r_fh
, nm
, cr
,
1541 &garp
, &callres
, FALSE
) == 0)
1543 mnt_on_fileid
= garp
.n4g_mon_fid
;
1544 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&callres
);
1547 * Build a fake filehandle from the dir FH and the mounted_on_fileid
1549 dfh
= &VTOR4(dvp
)->r_fh
->sfh_fh
;
1550 stub_fh
= kmem_alloc(sizeof (nfs_fh4
), KM_SLEEP
);
1551 stub_fh
->nfs_fh4_val
= kmem_alloc(dfh
->nfs_fh4_len
+
1552 sizeof (fattr4_mounted_on_fileid
), KM_SLEEP
);
1553 newfhval
= stub_fh
->nfs_fh4_val
;
1555 /* copy directory's file handle */
1556 bcopy(dfh
->nfs_fh4_val
, newfhval
, dfh
->nfs_fh4_len
);
1557 stub_fh
->nfs_fh4_len
= dfh
->nfs_fh4_len
;
1558 newfhval
= newfhval
+ dfh
->nfs_fh4_len
;
1560 /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */
1561 bcopy((char *)&mnt_on_fileid
, newfhval
,
1562 sizeof (fattr4_mounted_on_fileid
));
1563 stub_fh
->nfs_fh4_len
+= sizeof (fattr4_mounted_on_fileid
);
1565 sfhp
= sfh4_put(stub_fh
, VTOMI4(dvp
), NULL
);
1566 kmem_free(stub_fh
->nfs_fh4_val
, dfh
->nfs_fh4_len
+
1567 sizeof (fattr4_mounted_on_fileid
));
1568 kmem_free(stub_fh
, sizeof (nfs_fh4
));
1573 garp
.n4g_va
.va_type
= VDIR
;
1574 vp
= makenfs4node(sfhp
, NULL
, dvp
->v_vfsp
, t
,
1575 cr
, dvp
, fn_get(VTOSV(dvp
)->sv_name
, nm
, sfhp
));
1585 nfs4_setup_referral(vnode_t
*dvp
, char *nm
, vnode_t
**vpp
, cred_t
*cr
)
1590 if ((nvp
= find_referral_stubvp(dvp
, nm
, cr
)) == NULL
)
1594 mutex_enter(&rp
->r_statelock
);
1595 r4_stub_referral(rp
);
1596 mutex_exit(&rp
->r_statelock
);
1597 dnlc_enter(dvp
, nm
, nvp
);
1600 VN_RELE(*vpp
); /* no longer need this vnode */
1608 * Fetch the location information and resolve the new server.
1609 * Caller needs to free up the XDR data which is returned.
1610 * Input: mount info, shared filehandle, nodename
1611 * Return: Index to the result or Error(-1)
1612 * Output: FsLocations Info, Resolved Server Info.
1615 nfs4_process_referral(mntinfo4_t
*mi
, nfs4_sharedfh_t
*sfh
,
1616 char *nm
, cred_t
*cr
, nfs4_ga_res_t
*grp
, COMPOUND4res_clnt
*res
,
1617 struct nfs_fsl_info
*fsloc
)
1620 struct nfs_fsl_info nfsfsloc
;
1623 COMPOUND4res_clnt callres
;
1624 struct knetconfig
*knc
;
1626 ret
= nfs4_fetch_locations(mi
, sfh
, nm
, cr
, &garp
, &callres
, TRUE
);
1631 * As a lame attempt to figuring out if we're
1632 * handling a migration event or a referral,
1633 * look for rnodes with this fsid in the rnode
1636 * If we can find one or more such rnodes, it
1637 * means we're handling a migration event and
1638 * we want to bail out in that case.
1640 if (r4find_by_fsid(mi
, &garp
.n4g_fsid
)) {
1641 DTRACE_PROBE3(nfs4clnt__debug__referral__migration
,
1642 mntinfo4_t
*, mi
, nfs4_ga_res_t
*, &garp
,
1643 char *, "nfs4_process_referral");
1644 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&callres
);
1649 * Find the first responsive server to mount. When we find
1650 * one, fsp will point to it.
1652 for (i
= 0; i
< garp
.n4g_ext_res
->n4g_fslocations
.locations_len
; i
++) {
1654 fsp
= &garp
.n4g_ext_res
->n4g_fslocations
.locations_val
[i
];
1655 if (fsp
->server_len
== 0 || fsp
->server_val
== NULL
)
1658 error
= nfs4_callmapid(fsp
->server_val
, &nfsfsloc
);
1662 error
= nfs4_ping_server_common(nfsfsloc
.knconf
,
1663 nfsfsloc
.addr
, !(mi
->mi_flags
& MI4_INT
));
1664 if (error
== RPC_SUCCESS
)
1667 DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr
,
1668 sockaddr_in
*, (struct sockaddr_in
*)nfsfsloc
.addr
->buf
,
1669 char *, "nfs4_process_referral");
1671 xdr_free(xdr_nfs_fsl_info
, (char *)&nfsfsloc
);
1673 knc
= nfsfsloc
.knconf
;
1674 if ((i
>= garp
.n4g_ext_res
->n4g_fslocations
.locations_len
) ||
1675 (knc
->knc_protofmly
== NULL
) || (knc
->knc_proto
== NULL
)) {
1676 DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc
,
1677 nfs4_ga_res_t
*, &garp
, char *, "nfs4_process_referral");
1678 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&callres
);
1682 /* Send the results back */
1690 * Referrals case - need to fetch referral data and then upcall to
1691 * user-level to get complete mount data.
1693 static ephemeral_servinfo_t
*
1694 nfs4_trigger_esi_create_referral(vnode_t
*vp
, cred_t
*cr
)
1696 struct knetconfig
*sikncp
, *svkncp
;
1697 struct netbuf
*bufp
;
1698 ephemeral_servinfo_t
*esi
;
1702 struct nfs_fsl_info nfsfsloc
;
1705 char fn
[MAXNAMELEN
];
1708 COMPOUND4res_clnt callres
;
1711 * If we're passed in a stub vnode that
1712 * isn't a "referral" stub, bail out
1713 * and return a failure
1715 if (!RP_ISSTUB_REFERRAL(VTOR4(vp
)))
1718 if (vtodv(vp
, &dvp
, CRED(), TRUE
) != 0)
1722 if (nfs_rw_enter_sig(&drp
->r_rwlock
, RW_READER
, INTR4(dvp
))) {
1727 if (vtoname(vp
, fn
, MAXNAMELEN
) != 0) {
1728 nfs_rw_exit(&drp
->r_rwlock
);
1734 index
= nfs4_process_referral(mi
, drp
->r_fh
, fn
, cr
,
1735 &garp
, &callres
, &nfsfsloc
);
1736 nfs_rw_exit(&drp
->r_rwlock
);
1741 fsp
= &garp
.n4g_ext_res
->n4g_fslocations
.locations_val
[index
];
1742 esi
= kmem_zalloc(sizeof (ephemeral_servinfo_t
), KM_SLEEP
);
1744 /* initially set to be our type of ephemeral mount; may be added to */
1745 esi
->esi_mount_flags
= NFSMNT_REFERRAL
;
1748 kmem_zalloc(fsp
->server_val
->utf8string_len
+ 1, KM_SLEEP
);
1749 bcopy(fsp
->server_val
->utf8string_val
, esi
->esi_hostname
,
1750 fsp
->server_val
->utf8string_len
);
1751 esi
->esi_hostname
[fsp
->server_val
->utf8string_len
] = '\0';
1753 bufp
= kmem_alloc(sizeof (struct netbuf
), KM_SLEEP
);
1754 bufp
->len
= nfsfsloc
.addr
->len
;
1755 bufp
->maxlen
= nfsfsloc
.addr
->maxlen
;
1756 bufp
->buf
= kmem_zalloc(bufp
->len
, KM_SLEEP
);
1757 bcopy(nfsfsloc
.addr
->buf
, bufp
->buf
, bufp
->len
);
1758 esi
->esi_addr
= bufp
;
1760 esi
->esi_knconf
= kmem_zalloc(sizeof (*esi
->esi_knconf
), KM_SLEEP
);
1761 sikncp
= esi
->esi_knconf
;
1763 DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc
,
1764 struct nfs_fsl_info
*, &nfsfsloc
,
1765 char *, "nfs4_trigger_esi_create_referral");
1767 svkncp
= nfsfsloc
.knconf
;
1768 sikncp
->knc_semantics
= svkncp
->knc_semantics
;
1769 sikncp
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1770 (void) strlcat((char *)sikncp
->knc_protofmly
,
1771 (char *)svkncp
->knc_protofmly
, KNC_STRSIZE
);
1772 sikncp
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1773 (void) strlcat((char *)sikncp
->knc_proto
, (char *)svkncp
->knc_proto
,
1775 sikncp
->knc_rdev
= svkncp
->knc_rdev
;
1777 DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf
,
1778 struct knetconfig
*, sikncp
,
1779 char *, "nfs4_trigger_esi_create_referral");
1781 esi
->esi_netname
= kmem_zalloc(nfsfsloc
.netnm_len
, KM_SLEEP
);
1782 bcopy(nfsfsloc
.netname
, esi
->esi_netname
, nfsfsloc
.netnm_len
);
1783 esi
->esi_syncaddr
= NULL
;
1785 esi
->esi_path
= p
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1786 esi
->esi_path_len
= MAXPATHLEN
;
1788 for (i
= 0; i
< fsp
->rootpath
.pathname4_len
; i
++) {
1791 comp
= &fsp
->rootpath
.pathname4_val
[i
];
1792 /* If no space, null the string and bail */
1793 if ((p
- esi
->esi_path
) + comp
->utf8string_len
+ 1 > MAXPATHLEN
)
1795 bcopy(comp
->utf8string_val
, p
, comp
->utf8string_len
);
1796 p
+= comp
->utf8string_len
;
1799 if (fsp
->rootpath
.pathname4_len
!= 0)
1804 esi
->esi_path
= strdup(p
);
1805 esi
->esi_path_len
= strlen(p
) + 1;
1806 kmem_free(p
, MAXPATHLEN
);
1808 /* Allocated in nfs4_process_referral() */
1809 xdr_free(xdr_nfs_fsl_info
, (char *)&nfsfsloc
);
1810 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&callres
);
1814 kmem_free(esi
->esi_path
, esi
->esi_path_len
);
1815 kmem_free(esi
->esi_hostname
, fsp
->server_val
->utf8string_len
+ 1);
1816 kmem_free(esi
->esi_addr
->buf
, esi
->esi_addr
->len
);
1817 kmem_free(esi
->esi_addr
, sizeof (struct netbuf
));
1818 kmem_free(esi
->esi_knconf
->knc_protofmly
, KNC_STRSIZE
);
1819 kmem_free(esi
->esi_knconf
->knc_proto
, KNC_STRSIZE
);
1820 kmem_free(esi
->esi_knconf
, sizeof (*esi
->esi_knconf
));
1821 kmem_free(esi
->esi_netname
, nfsfsloc
.netnm_len
);
1822 kmem_free(esi
, sizeof (ephemeral_servinfo_t
));
1823 xdr_free(xdr_nfs_fsl_info
, (char *)&nfsfsloc
);
1824 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&callres
);
1829 * Assemble the args, and call the generic VFS mount function to
1830 * finally perform the ephemeral mount.
1833 nfs4_trigger_domount(vnode_t
*stubvp
, domount_args_t
*dma
, vfs_t
**vfsp
,
1834 cred_t
*cr
, vnode_t
**newvpp
)
1837 char *mntpt
, *orig_path
, *path
;
1838 const char *orig_mntpt
;
1842 zone_t
*zone
= curproc
->p_zone
;
1843 bool_t has_leading_slash
;
1846 vfs_t
*stubvfsp
= stubvp
->v_vfsp
;
1847 ephemeral_servinfo_t
*esi
= dma
->dma_esi
;
1848 struct nfs_args
*nargs
= dma
->dma_nargs
;
1850 /* first, construct the mount point for the ephemeral mount */
1851 orig_path
= path
= fn_path(VTOSV(stubvp
)->sv_name
);
1852 orig_mntpt
= (char *)refstr_value(stubvfsp
->vfs_mntpt
);
1854 if (*orig_path
== '.')
1858 * Get rid of zone's root path
1860 if (zone
!= global_zone
) {
1862 * -1 for trailing '/' and -1 for EOS.
1864 if (strncmp(zone
->zone_rootpath
, orig_mntpt
,
1865 zone
->zone_rootpathlen
- 1) == 0) {
1866 orig_mntpt
+= (zone
->zone_rootpathlen
- 2);
1870 mntpt_len
= strlen(orig_mntpt
) + strlen(orig_path
);
1871 mntpt
= kmem_zalloc(mntpt_len
+ 1, KM_SLEEP
);
1872 (void) strcat(mntpt
, orig_mntpt
);
1873 (void) strcat(mntpt
, orig_path
);
1875 kmem_free(path
, strlen(path
) + 1);
1876 path
= esi
->esi_path
;
1879 if (path
[0] == '/' && path
[1] == '/')
1881 has_leading_slash
= (*path
== '/');
1883 spec_len
= strlen(dma
->dma_hostlist
);
1884 spec_len
+= strlen(path
);
1886 /* We are going to have to add this in */
1887 if (!has_leading_slash
)
1890 /* We need to get the ':' for dma_hostlist:esi_path */
1893 uap
= kmem_zalloc(sizeof (struct mounta
), KM_SLEEP
);
1894 uap
->spec
= kmem_zalloc(spec_len
+ 1, KM_SLEEP
);
1895 (void) snprintf(uap
->spec
, spec_len
+ 1, "%s:%s%s", dma
->dma_hostlist
,
1896 has_leading_slash
? "" : "/", path
);
1900 uap
->flags
= MS_SYSSPACE
| MS_DATA
;
1901 /* fstype-independent mount options not covered elsewhere */
1902 /* copy parent's mount(8) "-m" flag */
1903 if (stubvfsp
->vfs_flag
& VFS_NOMNTTAB
)
1904 uap
->flags
|= MS_NOMNTTAB
;
1906 uap
->fstype
= MNTTYPE_NFS4
;
1907 uap
->dataptr
= (char *)nargs
;
1908 /* not needed for MS_SYSSPACE */
1911 /* use optptr to pass in extra mount options */
1912 uap
->flags
|= MS_OPTIONSTR
;
1913 uap
->optptr
= nfs4_trigger_create_mntopts(stubvfsp
);
1914 if (uap
->optptr
== NULL
) {
1919 /* domount() expects us to count the trailing NUL */
1920 uap
->optlen
= strlen(uap
->optptr
) + 1;
1923 * If we get EBUSY, we try again once to see if we can perform
1924 * the mount. We do this because of a spurious race condition.
1926 for (i
= 0; i
< 2; i
++) {
1930 retval
= domount(NULL
, uap
, stubvp
, cr
, vfsp
);
1932 retval
= VFS_ROOT(*vfsp
, newvpp
);
1935 } else if (retval
!= EBUSY
) {
1940 * We might find it mounted by the other racer...
1942 error
= nfs4_trigger_mounted_already(stubvp
,
1943 newvpp
, &was_mounted
, vfsp
);
1946 } else if (was_mounted
) {
1954 nfs4_trigger_destroy_mntopts(uap
->optptr
);
1956 kmem_free(uap
->spec
, spec_len
+ 1);
1957 kmem_free(uap
, sizeof (struct mounta
));
1958 kmem_free(mntpt
, mntpt_len
+ 1);
1964 * Build an nfs_args structure for passing to domount().
1966 * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t;
1967 * generic data - common to all ephemeral mount types - is read directly
1968 * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode.
1970 static struct nfs_args
*
1971 nfs4_trigger_nargs_create(mntinfo4_t
*mi
, servinfo4_t
*svp
,
1972 ephemeral_servinfo_t
*esi
)
1974 sec_data_t
*secdata
;
1975 struct nfs_args
*nargs
;
1977 /* setup the nfs args */
1978 nargs
= kmem_zalloc(sizeof (struct nfs_args
), KM_SLEEP
);
1980 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1982 nargs
->addr
= esi
->esi_addr
;
1984 /* for AUTH_DH by negotiation */
1985 if (esi
->esi_syncaddr
|| esi
->esi_netname
) {
1986 nargs
->flags
|= NFSMNT_SECURE
;
1987 nargs
->syncaddr
= esi
->esi_syncaddr
;
1988 nargs
->netname
= esi
->esi_netname
;
1991 nargs
->flags
|= NFSMNT_KNCONF
;
1992 nargs
->knconf
= esi
->esi_knconf
;
1993 nargs
->flags
|= NFSMNT_HOSTNAME
;
1994 nargs
->hostname
= esi
->esi_hostname
;
1995 nargs
->fh
= esi
->esi_path
;
1997 /* general mount settings, all copied from parent mount */
1998 mutex_enter(&mi
->mi_lock
);
2000 if (!(mi
->mi_flags
& MI4_HARD
))
2001 nargs
->flags
|= NFSMNT_SOFT
;
2003 nargs
->flags
|= NFSMNT_WSIZE
| NFSMNT_RSIZE
| NFSMNT_TIMEO
|
2005 nargs
->wsize
= mi
->mi_stsize
;
2006 nargs
->rsize
= mi
->mi_tsize
;
2007 nargs
->timeo
= mi
->mi_timeo
;
2008 nargs
->retrans
= mi
->mi_retrans
;
2010 if (mi
->mi_flags
& MI4_INT
)
2011 nargs
->flags
|= NFSMNT_INT
;
2012 if (mi
->mi_flags
& MI4_NOAC
)
2013 nargs
->flags
|= NFSMNT_NOAC
;
2015 nargs
->flags
|= NFSMNT_ACREGMIN
| NFSMNT_ACREGMAX
| NFSMNT_ACDIRMIN
|
2017 nargs
->acregmin
= HR2SEC(mi
->mi_acregmin
);
2018 nargs
->acregmax
= HR2SEC(mi
->mi_acregmax
);
2019 nargs
->acdirmin
= HR2SEC(mi
->mi_acdirmin
);
2020 nargs
->acdirmax
= HR2SEC(mi
->mi_acdirmax
);
2022 /* add any specific flags for this type of ephemeral mount */
2023 nargs
->flags
|= esi
->esi_mount_flags
;
2025 if (mi
->mi_flags
& MI4_NOCTO
)
2026 nargs
->flags
|= NFSMNT_NOCTO
;
2027 if (mi
->mi_flags
& MI4_GRPID
)
2028 nargs
->flags
|= NFSMNT_GRPID
;
2029 if (mi
->mi_flags
& MI4_LLOCK
)
2030 nargs
->flags
|= NFSMNT_LLOCK
;
2031 if (mi
->mi_flags
& MI4_NOPRINT
)
2032 nargs
->flags
|= NFSMNT_NOPRINT
;
2033 if (mi
->mi_flags
& MI4_DIRECTIO
)
2034 nargs
->flags
|= NFSMNT_DIRECTIO
;
2035 if (mi
->mi_flags
& MI4_PUBLIC
&& nargs
->flags
& NFSMNT_MIRRORMOUNT
)
2036 nargs
->flags
|= NFSMNT_PUBLIC
;
2038 /* Do some referral-specific option tweaking */
2039 if (nargs
->flags
& NFSMNT_REFERRAL
) {
2040 nargs
->flags
&= ~NFSMNT_DORDMA
;
2041 nargs
->flags
|= NFSMNT_TRYRDMA
;
2044 mutex_exit(&mi
->mi_lock
);
2047 * Security data & negotiation policy.
2049 * For mirror mounts, we need to preserve the parent mount's
2050 * preference for security negotiation, translating SV4_TRYSECDEFAULT
2051 * to NFSMNT_SECDEFAULT if present.
2053 * For referrals, we always want security negotiation and will
2054 * set NFSMNT_SECDEFAULT and we will not copy current secdata.
2055 * The reason is that we can't negotiate down from a parent's
2056 * Kerberos flavor to AUTH_SYS.
2058 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific
2059 * security flavour was requested, with data in sv_secdata, and that
2060 * no negotiation should occur. If this specified flavour fails, that's
2061 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT.
2063 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in
2064 * default flavour, in sv_secdata, but then negotiate a new flavour.
2065 * Possible flavours are recorded in an array in sv_secinfo, with
2066 * currently in-use flavour pointed to by sv_currsec.
2068 * If sv_currsec is set, i.e. if negotiation has already occurred,
2069 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless,
2070 * we will set NFSMNT_SECDEFAULT, to enable negotiation.
2072 if (nargs
->flags
& NFSMNT_REFERRAL
) {
2073 /* enable negotiation for referral mount */
2074 nargs
->flags
|= NFSMNT_SECDEFAULT
;
2075 secdata
= kmem_alloc(sizeof (sec_data_t
), KM_SLEEP
);
2076 secdata
->secmod
= secdata
->rpcflavor
= AUTH_SYS
;
2077 secdata
->data
= NULL
;
2078 } else if (svp
->sv_flags
& SV4_TRYSECDEFAULT
) {
2079 /* enable negotiation for mirror mount */
2080 nargs
->flags
|= NFSMNT_SECDEFAULT
;
2083 * As a starting point for negotiation, copy parent
2084 * mount's negotiated flavour (sv_currsec) if available,
2085 * or its passed-in flavour (sv_secdata) if not.
2087 if (svp
->sv_currsec
!= NULL
)
2088 secdata
= copy_sec_data(svp
->sv_currsec
);
2089 else if (svp
->sv_secdata
!= NULL
)
2090 secdata
= copy_sec_data(svp
->sv_secdata
);
2094 /* do not enable negotiation; copy parent's passed-in flavour */
2095 if (svp
->sv_secdata
!= NULL
)
2096 secdata
= copy_sec_data(svp
->sv_secdata
);
2101 nfs_rw_exit(&svp
->sv_lock
);
2103 nargs
->flags
|= NFSMNT_NEWARGS
;
2104 nargs
->nfs_args_ext
= NFS_ARGS_EXTB
;
2105 nargs
->nfs_ext_u
.nfs_extB
.secdata
= secdata
;
2107 /* for NFS RO failover; caller will set if necessary */
2108 nargs
->nfs_ext_u
.nfs_extB
.next
= NULL
;
2114 nfs4_trigger_nargs_destroy(struct nfs_args
*nargs
)
2117 * Either the mount failed, in which case the data is not needed, or
2118 * nfs4_mount() has either taken copies of what it needs or,
2119 * where it has merely copied the ptr, it has set *our* ptr to NULL,
2120 * whereby nfs4_free_args() will ignore it.
2122 nfs4_free_args(nargs
);
2123 kmem_free(nargs
, sizeof (struct nfs_args
));
2127 * When we finally get into the mounting, we need to add this
2128 * node to the ephemeral tree.
2130 * This is called from nfs4_mount().
2133 nfs4_record_ephemeral_mount(mntinfo4_t
*mi
, vnode_t
*mvp
)
2135 mntinfo4_t
*mi_parent
;
2136 nfs4_ephemeral_t
*eph
;
2137 nfs4_ephemeral_tree_t
*net
;
2139 nfs4_ephemeral_t
*prior
;
2140 nfs4_ephemeral_t
*child
;
2142 nfs4_ephemeral_t
*peer
;
2144 nfs4_trigger_globals_t
*ntg
;
2145 zone_t
*zone
= curproc
->p_zone
;
2149 mi_parent
= VTOMI4(mvp
);
2152 * Get this before grabbing anything else!
2154 ntg
= zone_getspecific(nfs4_ephemeral_key
, zone
);
2155 if (!ntg
->ntg_thread_started
) {
2156 nfs4_ephemeral_start_harvester(ntg
);
2159 mutex_enter(&mi_parent
->mi_lock
);
2160 mutex_enter(&mi
->mi_lock
);
2162 net
= mi
->mi_ephemeral_tree
=
2163 mi_parent
->mi_ephemeral_tree
;
2166 * If the mi_ephemeral_tree is NULL, then it
2167 * means that either the harvester or a manual
2168 * umount has cleared the tree out right before
2171 * There is nothing we can do here, so return
2172 * to the caller and let them decide whether they
2176 mutex_exit(&mi
->mi_lock
);
2177 mutex_exit(&mi_parent
->mi_lock
);
2183 * We've just tied the mntinfo to the tree, so
2184 * now we bump the refcnt and hold it there until
2185 * this mntinfo is removed from the tree.
2187 nfs4_ephemeral_tree_hold(net
);
2190 * We need to tack together the ephemeral mount
2191 * with this new mntinfo.
2193 eph
= kmem_zalloc(sizeof (*eph
), KM_SLEEP
);
2196 VFS_HOLD(mi
->mi_vfsp
);
2197 eph
->ne_ref_time
= gethrestime_sec();
2200 * We need to tell the ephemeral mount when
2203 eph
->ne_mount_to
= ntg
->ntg_mount_to
;
2205 mi
->mi_ephemeral
= eph
;
2208 * If the enclosing mntinfo4 is also ephemeral,
2209 * then we need to point to its enclosing parent.
2210 * Else the enclosing mntinfo4 is the enclosing parent.
2212 * We also need to weave this ephemeral node
2215 if (mi_parent
->mi_flags
& MI4_EPHEMERAL
) {
2217 * We need to decide if we are
2218 * the root node of this branch
2219 * or if we are a sibling of this
2222 prior
= mi_parent
->mi_ephemeral
;
2223 if (prior
== NULL
) {
2225 * Race condition, clean up, and
2226 * let caller handle mntinfo.
2228 mi
->mi_flags
&= ~MI4_EPHEMERAL
;
2229 mi
->mi_ephemeral
= NULL
;
2230 kmem_free(eph
, sizeof (*eph
));
2231 VFS_RELE(mi
->mi_vfsp
);
2233 nfs4_ephemeral_tree_rele(net
);
2236 if (prior
->ne_child
== NULL
) {
2237 prior
->ne_child
= eph
;
2239 child
= prior
->ne_child
;
2241 prior
->ne_child
= eph
;
2242 eph
->ne_peer
= child
;
2244 child
->ne_prior
= eph
;
2247 eph
->ne_prior
= prior
;
2251 * The parent mntinfo4 is the non-ephemeral
2252 * root of the ephemeral tree. We
2253 * need to decide if we are the root
2254 * node of that tree or if we are a
2255 * sibling of the root node.
2257 * We are the root if there is no
2260 if (net
->net_root
== NULL
) {
2261 net
->net_root
= eph
;
2263 eph
->ne_peer
= peer
= net
->net_root
;
2264 ASSERT(peer
!= NULL
);
2265 net
->net_root
= eph
;
2267 peer
->ne_prior
= eph
;
2270 eph
->ne_prior
= NULL
;
2273 mutex_exit(&mi
->mi_lock
);
2274 mutex_exit(&mi_parent
->mi_lock
);
2280 * Commit the changes to the ephemeral tree for removing this node.
2283 nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t
*eph
)
2285 nfs4_ephemeral_t
*e
= eph
;
2286 nfs4_ephemeral_t
*peer
;
2287 nfs4_ephemeral_t
*prior
;
2289 peer
= eph
->ne_peer
;
2290 prior
= e
->ne_prior
;
2293 * If this branch root was not the
2294 * tree root, then we need to fix back pointers.
2297 if (prior
->ne_child
== e
) {
2298 prior
->ne_child
= peer
;
2300 prior
->ne_peer
= peer
;
2304 peer
->ne_prior
= prior
;
2306 peer
->ne_mount
->mi_ephemeral_tree
->net_root
= peer
;
2307 peer
->ne_prior
= NULL
;
2309 e
->ne_mount
->mi_ephemeral_tree
->net_root
= NULL
;
2314 * We want to avoid recursion at all costs. So we need to
2315 * unroll the tree. We do this by a depth first traversal to
2316 * leaf nodes. We blast away the leaf and work our way back
2317 * up and down the tree.
2320 nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t
*eph
,
2321 int isTreeRoot
, int flag
, cred_t
*cr
)
2323 nfs4_ephemeral_t
*e
= eph
;
2324 nfs4_ephemeral_t
*prior
;
2330 * We use the loop while unrolling the ephemeral tree.
2334 * First we walk down the child.
2343 * If we are the root of the branch we are removing,
2344 * we end it here. But if the branch is the root of
2345 * the tree, we have to forge on. We do not consider
2346 * the peer list for the root because while it may
2347 * be okay to remove, it is both extra work and a
2348 * potential for a false-positive error to stall the
2351 if (e
== eph
&& isTreeRoot
== FALSE
)
2355 * Next we walk down the peer list.
2364 * We can only remove the node passed in by the
2365 * caller if it is the root of the ephemeral tree.
2366 * Otherwise, the caller will remove it.
2368 if (e
== eph
&& isTreeRoot
== FALSE
)
2372 * Okay, we have a leaf node, time
2375 * Note that prior can only be NULL if
2376 * and only if it is the root of the
2379 prior
= e
->ne_prior
;
2382 mutex_enter(&mi
->mi_lock
);
2384 ASSERT(vfsp
!= NULL
);
2387 * Cleared by umount2_engine.
2392 * Inform nfs4_unmount to not recursively
2393 * descend into this node's children when it
2396 mi
->mi_flags
|= MI4_EPHEMERAL_RECURSED
;
2397 mutex_exit(&mi
->mi_lock
);
2399 error
= umount2_engine(vfsp
, flag
, cr
, FALSE
);
2402 * We need to reenable nfs4_unmount's ability
2403 * to recursively descend on this node.
2405 mutex_enter(&mi
->mi_lock
);
2406 mi
->mi_flags
&= ~MI4_EPHEMERAL_RECURSED
;
2407 mutex_exit(&mi
->mi_lock
);
2413 * If we are the current node, we do not want to
2414 * touch anything else. At this point, the only
2415 * way the current node can have survived to here
2416 * is if it is the root of the ephemeral tree and
2417 * we are unmounting the enclosing mntinfo4.
2420 ASSERT(prior
== NULL
);
2425 * Stitch up the prior node. Note that since
2426 * we have handled the root of the tree, prior
2429 ASSERT(prior
!= NULL
);
2430 if (prior
->ne_child
== e
) {
2431 prior
->ne_child
= NULL
;
2433 ASSERT(prior
->ne_peer
== e
);
2435 prior
->ne_peer
= NULL
;
2445 * Common code to safely release net_cnt_lock and net_tree_lock
2448 nfs4_ephemeral_umount_unlock(bool_t
*pmust_unlock
,
2449 nfs4_ephemeral_tree_t
**pnet
)
2451 nfs4_ephemeral_tree_t
*net
= *pnet
;
2453 if (*pmust_unlock
) {
2454 mutex_enter(&net
->net_cnt_lock
);
2455 net
->net_status
&= ~NFS4_EPHEMERAL_TREE_UMOUNTING
;
2456 mutex_exit(&net
->net_cnt_lock
);
2458 mutex_exit(&net
->net_tree_lock
);
2460 *pmust_unlock
= FALSE
;
2465 * While we may have removed any child or sibling nodes of this
2466 * ephemeral node, we can not nuke it until we know that there
2467 * were no actived vnodes on it. This will do that final
2468 * work once we know it is not busy.
2471 nfs4_ephemeral_umount_activate(mntinfo4_t
*mi
, bool_t
*pmust_unlock
,
2472 nfs4_ephemeral_tree_t
**pnet
)
2475 * Now we need to get rid of the ephemeral data if it exists.
2477 mutex_enter(&mi
->mi_lock
);
2478 if (mi
->mi_ephemeral
) {
2480 * If we are the root node of an ephemeral branch
2481 * which is being removed, then we need to fixup
2482 * pointers into and out of the node.
2484 if (!(mi
->mi_flags
& MI4_EPHEMERAL_RECURSED
))
2485 nfs4_ephemeral_umount_cleanup(mi
->mi_ephemeral
);
2487 nfs4_ephemeral_tree_rele(*pnet
);
2488 ASSERT(mi
->mi_ephemeral
!= NULL
);
2490 kmem_free(mi
->mi_ephemeral
, sizeof (*mi
->mi_ephemeral
));
2491 mi
->mi_ephemeral
= NULL
;
2492 VFS_RELE(mi
->mi_vfsp
);
2495 mutex_exit(&mi
->mi_lock
);
2497 nfs4_ephemeral_umount_unlock(pmust_unlock
, pnet
);
2501 * Unmount an ephemeral node.
2503 * Note that if this code fails, then it must unlock.
2505 * If it succeeds, then the caller must be prepared to do so.
2508 nfs4_ephemeral_umount(mntinfo4_t
*mi
, int flag
, cred_t
*cr
,
2509 bool_t
*pmust_unlock
, nfs4_ephemeral_tree_t
**pnet
)
2512 nfs4_ephemeral_t
*eph
;
2513 nfs4_ephemeral_tree_t
*net
;
2514 int is_derooting
= FALSE
;
2515 int is_recursed
= FALSE
;
2516 int was_locked
= FALSE
;
2519 * Make sure to set the default state for cleaning
2520 * up the tree in the caller (and on the way out).
2522 *pmust_unlock
= FALSE
;
2525 * The active vnodes on this file system may be ephemeral
2526 * children. We need to check for and try to unmount them
2527 * here. If any can not be unmounted, we are going
2530 mutex_enter(&mi
->mi_lock
);
2533 * If an ephemeral tree, we need to check to see if
2534 * the lock is already held. If it is, then we need
2535 * to see if we are being called as a result of
2536 * the recursive removal of some node of the tree or
2537 * if we are another attempt to remove the tree.
2539 * mi_flags & MI4_EPHEMERAL indicates an ephemeral
2540 * node. mi_ephemeral being non-NULL also does this.
2542 * mi_ephemeral_tree being non-NULL is sufficient
2543 * to also indicate either it is an ephemeral node
2544 * or the enclosing mntinfo4.
2546 * Do we need MI4_EPHEMERAL? Yes, it is useful for
2547 * when we delete the ephemeral node and need to
2548 * differentiate from an ephemeral node and the
2549 * enclosing root node.
2551 *pnet
= net
= mi
->mi_ephemeral_tree
;
2553 mutex_exit(&mi
->mi_lock
);
2557 eph
= mi
->mi_ephemeral
;
2558 is_recursed
= mi
->mi_flags
& MI4_EPHEMERAL_RECURSED
;
2559 is_derooting
= (eph
== NULL
);
2561 mutex_enter(&net
->net_cnt_lock
);
2564 * If this is not recursion, then we need to
2565 * check to see if a harvester thread has
2566 * already grabbed the lock.
2568 * After we exit this branch, we may not
2569 * blindly return, we need to jump to
2573 if (net
->net_status
&
2574 NFS4_EPHEMERAL_TREE_LOCKED
) {
2576 * If the tree is locked, we need
2577 * to decide whether we are the
2578 * harvester or some explicit call
2579 * for a umount. The only way that
2580 * we are the harvester is if
2581 * MS_SYSSPACE is set.
2583 * We only let the harvester through
2586 * We return EBUSY so that the
2587 * caller knows something is
2588 * going on. Note that by that
2589 * time, the umount in the other
2590 * thread may have already occured.
2592 if (!(flag
& MS_SYSSPACE
)) {
2593 mutex_exit(&net
->net_cnt_lock
);
2594 mutex_exit(&mi
->mi_lock
);
2603 mutex_exit(&net
->net_cnt_lock
);
2604 mutex_exit(&mi
->mi_lock
);
2607 * If we are not the harvester, we need to check
2608 * to see if we need to grab the tree lock.
2610 if (was_locked
== FALSE
) {
2612 * If we grab the lock, it means that no other
2613 * operation is working on the tree. If we don't
2614 * grab it, we need to decide if this is because
2615 * we are a recursive call or a new operation.
2617 if (mutex_tryenter(&net
->net_tree_lock
)) {
2618 *pmust_unlock
= TRUE
;
2621 * If we are a recursive call, we can
2622 * proceed without the lock.
2623 * Otherwise we have to wait until
2624 * the lock becomes free.
2627 mutex_enter(&net
->net_cnt_lock
);
2628 if (net
->net_status
&
2629 (NFS4_EPHEMERAL_TREE_DEROOTING
2630 | NFS4_EPHEMERAL_TREE_INVALID
)) {
2631 mutex_exit(&net
->net_cnt_lock
);
2634 mutex_exit(&net
->net_cnt_lock
);
2637 * We can't hold any other locks whilst
2638 * we wait on this to free up.
2640 mutex_enter(&net
->net_tree_lock
);
2643 * Note that while mi->mi_ephemeral
2644 * may change and thus we have to
2645 * update eph, it is the case that
2646 * we have tied down net and
2647 * do not care if mi->mi_ephemeral_tree
2650 mutex_enter(&mi
->mi_lock
);
2651 eph
= mi
->mi_ephemeral
;
2652 mutex_exit(&mi
->mi_lock
);
2655 * Okay, we need to see if either the
2656 * tree got nuked or the current node
2657 * got nuked. Both of which will cause
2660 * Note that a subsequent retry of the
2661 * umount shall work.
2663 mutex_enter(&net
->net_cnt_lock
);
2664 if (net
->net_status
&
2665 NFS4_EPHEMERAL_TREE_INVALID
||
2666 (!is_derooting
&& eph
== NULL
)) {
2667 mutex_exit(&net
->net_cnt_lock
);
2668 mutex_exit(&net
->net_tree_lock
);
2671 mutex_exit(&net
->net_cnt_lock
);
2672 *pmust_unlock
= TRUE
;
2678 * Only once we have grabbed the lock can we mark what we
2679 * are planning on doing to the ephemeral tree.
2681 if (*pmust_unlock
) {
2682 mutex_enter(&net
->net_cnt_lock
);
2683 net
->net_status
|= NFS4_EPHEMERAL_TREE_UMOUNTING
;
2686 * Check to see if we are nuking the root.
2690 NFS4_EPHEMERAL_TREE_DEROOTING
;
2691 mutex_exit(&net
->net_cnt_lock
);
2694 if (!is_derooting
) {
2696 * Only work on children if the caller has not already
2700 ASSERT(eph
!= NULL
);
2702 error
= nfs4_ephemeral_unmount_engine(eph
,
2708 eph
= net
->net_root
;
2711 * Only work if there is something there.
2714 error
= nfs4_ephemeral_unmount_engine(eph
, TRUE
,
2717 mutex_enter(&net
->net_cnt_lock
);
2719 ~NFS4_EPHEMERAL_TREE_DEROOTING
;
2720 mutex_exit(&net
->net_cnt_lock
);
2725 * Nothing else which goes wrong will
2726 * invalidate the blowing away of the
2729 net
->net_root
= NULL
;
2733 * We have derooted and we have caused the tree to be
2736 mutex_enter(&net
->net_cnt_lock
);
2737 net
->net_status
&= ~NFS4_EPHEMERAL_TREE_DEROOTING
;
2738 net
->net_status
|= NFS4_EPHEMERAL_TREE_INVALID
;
2739 DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting
,
2740 uint_t
, net
->net_refcnt
);
2743 * We will not finalize this node, so safe to
2746 nfs4_ephemeral_tree_decr(net
);
2747 mutex_exit(&net
->net_cnt_lock
);
2749 if (was_locked
== FALSE
)
2750 mutex_exit(&net
->net_tree_lock
);
2753 * We have just blown away any notation of this
2754 * tree being locked or having a refcnt.
2755 * We can't let the caller try to clean things up.
2757 *pmust_unlock
= FALSE
;
2760 * At this point, the tree should no longer be
2761 * associated with the mntinfo4. We need to pull
2762 * it off there and let the harvester take
2763 * care of it once the refcnt drops.
2765 mutex_enter(&mi
->mi_lock
);
2766 mi
->mi_ephemeral_tree
= NULL
;
2767 mutex_exit(&mi
->mi_lock
);
2774 nfs4_ephemeral_umount_unlock(pmust_unlock
, pnet
);
2780 * Do the umount and record any error in the parent.
2783 nfs4_ephemeral_record_umount(vfs_t
*vfsp
, int flag
,
2784 nfs4_ephemeral_t
*e
, nfs4_ephemeral_t
*prior
)
2789 * Only act on if the fs is still mounted.
2794 error
= umount2_engine(vfsp
, flag
, kcred
, FALSE
);
2797 if (prior
->ne_child
== e
)
2799 NFS4_EPHEMERAL_CHILD_ERROR
;
2802 NFS4_EPHEMERAL_PEER_ERROR
;
2808 * For each tree in the forest (where the forest is in
2809 * effect all of the ephemeral trees for this zone),
2810 * scan to see if a node can be unmounted. Note that
2811 * unlike nfs4_ephemeral_unmount_engine(), we do
2812 * not process the current node before children or
2813 * siblings. I.e., if a node can be unmounted, we
2814 * do not recursively check to see if the nodes
2815 * hanging off of it can also be unmounted.
2817 * Instead, we delve down deep to try and remove the
2818 * children first. Then, because we share code with
2819 * nfs4_ephemeral_unmount_engine(), we will try
2820 * them again. This could be a performance issue in
2823 * Also note that unlike nfs4_ephemeral_unmount_engine(),
2824 * we do not halt on an error. We will not remove the
2825 * current node, but we will keep on trying to remove
2828 * force indicates that we want the unmount to occur
2829 * even if there is something blocking it.
2831 * time_check indicates that we want to see if the
2832 * mount has expired past mount_to or not. Typically
2833 * we want to do this and only on a shutdown of the
2834 * zone would we want to ignore the check.
2837 nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t
*ntg
,
2838 bool_t force
, bool_t time_check
)
2840 nfs4_ephemeral_tree_t
*net
;
2841 nfs4_ephemeral_tree_t
*prev
= NULL
;
2842 nfs4_ephemeral_tree_t
*next
;
2843 nfs4_ephemeral_t
*e
;
2844 nfs4_ephemeral_t
*prior
;
2845 time_t now
= gethrestime_sec();
2847 nfs4_ephemeral_tree_t
*harvest
= NULL
;
2855 flag
= MS_FORCE
| MS_SYSSPACE
;
2859 mutex_enter(&ntg
->ntg_forest_lock
);
2860 for (net
= ntg
->ntg_forest
; net
!= NULL
; net
= next
) {
2861 next
= net
->net_next
;
2863 nfs4_ephemeral_tree_hold(net
);
2865 mutex_enter(&net
->net_tree_lock
);
2868 * Let the unmount code know that the
2869 * tree is already locked!
2871 mutex_enter(&net
->net_cnt_lock
);
2872 net
->net_status
|= NFS4_EPHEMERAL_TREE_LOCKED
;
2873 mutex_exit(&net
->net_cnt_lock
);
2876 * If the intent is force all ephemeral nodes to
2877 * be unmounted in this zone, we can short circuit a
2878 * lot of tree traversal and simply zap the root node.
2881 if (net
->net_root
) {
2882 mi
= net
->net_root
->ne_mount
;
2885 ASSERT(vfsp
!= NULL
);
2888 * Cleared by umount2_engine.
2892 (void) umount2_engine(vfsp
, flag
,
2901 e
->ne_state
= NFS4_EPHEMERAL_VISIT_CHILD
;
2904 if (e
->ne_state
== NFS4_EPHEMERAL_VISIT_CHILD
) {
2905 e
->ne_state
= NFS4_EPHEMERAL_VISIT_SIBLING
;
2909 NFS4_EPHEMERAL_VISIT_CHILD
;
2913 } else if (e
->ne_state
==
2914 NFS4_EPHEMERAL_VISIT_SIBLING
) {
2915 e
->ne_state
= NFS4_EPHEMERAL_PROCESS_ME
;
2919 NFS4_EPHEMERAL_VISIT_CHILD
;
2923 } else if (e
->ne_state
==
2924 NFS4_EPHEMERAL_CHILD_ERROR
) {
2925 prior
= e
->ne_prior
;
2928 * If a child reported an error, do
2929 * not bother trying to unmount.
2931 * If your prior node is a parent,
2932 * pass the error up such that they
2933 * also do not try to unmount.
2935 * However, if your prior is a sibling,
2936 * let them try to unmount if they can.
2939 if (prior
->ne_child
== e
)
2941 NFS4_EPHEMERAL_CHILD_ERROR
;
2944 NFS4_EPHEMERAL_PEER_ERROR
;
2948 * Clear the error and if needed, process peers.
2950 * Once we mask out the error, we know whether
2951 * or we have to process another node.
2953 e
->ne_state
&= ~NFS4_EPHEMERAL_CHILD_ERROR
;
2954 if (e
->ne_state
== NFS4_EPHEMERAL_PROCESS_ME
)
2958 } else if (e
->ne_state
==
2959 NFS4_EPHEMERAL_PEER_ERROR
) {
2960 prior
= e
->ne_prior
;
2963 if (prior
->ne_child
== e
)
2965 NFS4_EPHEMERAL_CHILD_ERROR
;
2968 NFS4_EPHEMERAL_PEER_ERROR
;
2972 * Clear the error from this node and do the
2973 * correct processing.
2975 e
->ne_state
&= ~NFS4_EPHEMERAL_PEER_ERROR
;
2979 prior
= e
->ne_prior
;
2980 e
->ne_state
= NFS4_EPHEMERAL_OK
;
2983 * It must be the case that we need to process
2987 now
- e
->ne_ref_time
> e
->ne_mount_to
) {
2992 * Cleared by umount2_engine.
2998 * Note that we effectively work down to the
2999 * leaf nodes first, try to unmount them,
3000 * then work our way back up into the leaf
3003 * Also note that we deal with a lot of
3004 * complexity by sharing the work with
3005 * the manual unmount code.
3007 nfs4_ephemeral_record_umount(vfsp
, flag
,
3017 * At this point we are done processing this tree.
3019 * If the tree is invalid and we were the only reference
3020 * to it, then we push it on the local linked list
3021 * to remove it at the end. We avoid that action now
3022 * to keep the tree processing going along at a fair clip.
3024 * Else, even if we were the only reference, we
3025 * allow it to be reused as needed.
3027 mutex_enter(&net
->net_cnt_lock
);
3028 nfs4_ephemeral_tree_decr(net
);
3029 if (net
->net_refcnt
== 0 &&
3030 net
->net_status
& NFS4_EPHEMERAL_TREE_INVALID
) {
3031 net
->net_status
&= ~NFS4_EPHEMERAL_TREE_LOCKED
;
3032 mutex_exit(&net
->net_cnt_lock
);
3033 mutex_exit(&net
->net_tree_lock
);
3036 prev
->net_next
= net
->net_next
;
3038 ntg
->ntg_forest
= net
->net_next
;
3040 net
->net_next
= harvest
;
3043 VFS_RELE(net
->net_mount
->mi_vfsp
);
3044 MI4_RELE(net
->net_mount
);
3049 net
->net_status
&= ~NFS4_EPHEMERAL_TREE_LOCKED
;
3050 mutex_exit(&net
->net_cnt_lock
);
3051 mutex_exit(&net
->net_tree_lock
);
3055 mutex_exit(&ntg
->ntg_forest_lock
);
3057 for (net
= harvest
; net
!= NULL
; net
= next
) {
3058 next
= net
->net_next
;
3060 mutex_destroy(&net
->net_tree_lock
);
3061 mutex_destroy(&net
->net_cnt_lock
);
3062 kmem_free(net
, sizeof (*net
));
3067 * This is the thread which decides when the harvesting
3068 * can proceed and when to kill it off for this zone.
3071 nfs4_ephemeral_harvester(nfs4_trigger_globals_t
*ntg
)
3074 zone_t
*zone
= curproc
->p_zone
;
3077 timeleft
= zone_status_timedwait(zone
, ddi_get_lbolt() +
3078 nfs4_trigger_thread_timer
* hz
, ZONE_IS_SHUTTING_DOWN
);
3081 * zone is exiting...
3083 if (timeleft
!= -1) {
3084 ASSERT(zone_status_get(zone
) >= ZONE_IS_SHUTTING_DOWN
);
3090 * Only bother scanning if there is potential
3093 if (ntg
->ntg_forest
== NULL
)
3097 * Now scan the list and get rid of everything which
3100 nfs4_ephemeral_harvest_forest(ntg
, FALSE
, TRUE
);
3107 * The zone specific glue needed to start the unmount harvester.
3109 * Note that we want to avoid holding the mutex as long as possible,
3110 * hence the multiple checks.
3112 * The caller should avoid us getting down here in the first
3116 nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t
*ntg
)
3119 * It got started before we got here...
3121 if (ntg
->ntg_thread_started
)
3124 mutex_enter(&nfs4_ephemeral_thread_lock
);
3126 if (ntg
->ntg_thread_started
) {
3127 mutex_exit(&nfs4_ephemeral_thread_lock
);
3132 * Start the unmounter harvester thread for this zone.
3134 (void) zthread_create(NULL
, 0, nfs4_ephemeral_harvester
,
3135 ntg
, 0, minclsyspri
);
3137 ntg
->ntg_thread_started
= TRUE
;
3138 mutex_exit(&nfs4_ephemeral_thread_lock
);
3143 nfs4_ephemeral_zsd_create(zoneid_t zoneid
)
3145 nfs4_trigger_globals_t
*ntg
;
3147 ntg
= kmem_zalloc(sizeof (*ntg
), KM_SLEEP
);
3148 ntg
->ntg_thread_started
= FALSE
;
3151 * This is the default....
3153 ntg
->ntg_mount_to
= nfs4_trigger_mount_to
;
3155 mutex_init(&ntg
->ntg_forest_lock
, NULL
,
3156 MUTEX_DEFAULT
, NULL
);
3162 * Try a nice gentle walk down the forest and convince
3163 * all of the trees to gracefully give it up.
3167 nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid
, void *arg
)
3169 nfs4_trigger_globals_t
*ntg
= arg
;
3174 nfs4_ephemeral_harvest_forest(ntg
, FALSE
, FALSE
);
3178 * Race along the forest and rip all of the trees out by
3183 nfs4_ephemeral_zsd_destroy(zoneid_t zoneid
, void *arg
)
3185 nfs4_trigger_globals_t
*ntg
= arg
;
3190 nfs4_ephemeral_harvest_forest(ntg
, TRUE
, FALSE
);
3192 mutex_destroy(&ntg
->ntg_forest_lock
);
3193 kmem_free(ntg
, sizeof (*ntg
));
3197 * This is the zone independent cleanup needed for
3198 * emphemeral mount processing.
3201 nfs4_ephemeral_fini(void)
3203 (void) zone_key_delete(nfs4_ephemeral_key
);
3204 mutex_destroy(&nfs4_ephemeral_thread_lock
);
3208 * This is the zone independent initialization needed for
3209 * emphemeral mount processing.
3212 nfs4_ephemeral_init(void)
3214 mutex_init(&nfs4_ephemeral_thread_lock
, NULL
, MUTEX_DEFAULT
,
3217 zone_key_create(&nfs4_ephemeral_key
, nfs4_ephemeral_zsd_create
,
3218 nfs4_ephemeral_zsd_shutdown
, nfs4_ephemeral_zsd_destroy
);
3222 * nfssys() calls this function to set the per-zone
3223 * value of mount_to to drive when an ephemeral mount is
3224 * timed out. Each mount will grab a copy of this value
3228 nfs4_ephemeral_set_mount_to(uint_t mount_to
)
3230 nfs4_trigger_globals_t
*ntg
;
3231 zone_t
*zone
= curproc
->p_zone
;
3233 ntg
= zone_getspecific(nfs4_ephemeral_key
, zone
);
3235 ntg
->ntg_mount_to
= mount_to
;
3239 * Walk the list of v4 mount options; if they are currently set in vfsp,
3240 * append them to a new comma-separated mount option string, and return it.
3242 * Caller should free by calling nfs4_trigger_destroy_mntopts().
3245 nfs4_trigger_create_mntopts(vfs_t
*vfsp
)
3250 mntopts_t
*optproto
;
3252 mntopts
= kmem_zalloc(MAX_MNTOPT_STR
, KM_SLEEP
);
3254 /* get the list of applicable mount options for v4; locks *vswp */
3255 vswp
= vfs_getvfssw(MNTTYPE_NFS4
);
3256 optproto
= &vswp
->vsw_optproto
;
3258 for (i
= 0; i
< optproto
->mo_count
; i
++) {
3259 struct mntopt
*mop
= &optproto
->mo_list
[i
];
3261 if (mop
->mo_flags
& MO_EMPTY
)
3264 if (nfs4_trigger_add_mntopt(mntopts
, mop
->mo_name
, vfsp
)) {
3265 kmem_free(mntopts
, MAX_MNTOPT_STR
);
3266 vfs_unrefvfssw(vswp
);
3271 vfs_unrefvfssw(vswp
);
3274 * MNTOPT_XATTR is not in the v4 mount opt proto list,
3275 * and it may only be passed via MS_OPTIONSTR, so we
3276 * must handle it here.
3278 * Ideally, it would be in the list, but NFS does not specify its
3279 * own opt proto list, it uses instead the default one. Since
3280 * not all filesystems support extended attrs, it would not be
3281 * appropriate to add it there.
3283 if (nfs4_trigger_add_mntopt(mntopts
, MNTOPT_XATTR
, vfsp
) ||
3284 nfs4_trigger_add_mntopt(mntopts
, MNTOPT_NOXATTR
, vfsp
)) {
3285 kmem_free(mntopts
, MAX_MNTOPT_STR
);
3293 nfs4_trigger_destroy_mntopts(char *mntopts
)
3296 kmem_free(mntopts
, MAX_MNTOPT_STR
);
3300 * Check a single mount option (optname). Add to mntopts if it is set in VFS.
3303 nfs4_trigger_add_mntopt(char *mntopts
, char *optname
, vfs_t
*vfsp
)
3305 if (mntopts
== NULL
|| optname
== NULL
|| vfsp
== NULL
)
3308 if (vfs_optionisset(vfsp
, optname
, NULL
)) {
3309 size_t mntoptslen
= strlen(mntopts
);
3310 size_t optnamelen
= strlen(optname
);
3312 /* +1 for ',', +1 for NUL */
3313 if (mntoptslen
+ optnamelen
+ 2 > MAX_MNTOPT_STR
)
3316 /* first or subsequent mount option? */
3317 if (*mntopts
!= '\0')
3318 (void) strcat(mntopts
, ",");
3320 (void) strcat(mntopts
, optname
);
3326 static enum clnt_stat
3327 nfs4_ping_server_common(struct knetconfig
*knc
, struct netbuf
*addr
, int nointr
)
3331 enum clnt_stat status
;
3333 struct timeval timeout
;
3335 /* as per recov_newserver() */
3339 timeout
.tv_usec
= 0;
3341 if (clnt_tli_kcreate(knc
, addr
, NFS_PROGRAM
, NFS_V4
,
3342 max_msgsize
, retries
, CRED(), &cl
) != 0)
3343 return (RPC_FAILED
);
3346 cl
->cl_nosignal
= TRUE
;
3347 status
= CLNT_CALL(cl
, RFS_NULL
, xdr_void
, NULL
, xdr_void
, NULL
,
3350 cl
->cl_nosignal
= FALSE
;
3352 AUTH_DESTROY(cl
->cl_auth
);
3358 static enum clnt_stat
3359 nfs4_trigger_ping_server(servinfo4_t
*svp
, int nointr
)
3361 return (nfs4_ping_server_common(svp
->sv_knconf
, &svp
->sv_addr
, nointr
));