4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
24 * Copyright 2013 Joyent, Inc. All rights reserved.
28 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
29 * All rights reserved.
32 #include <sys/errno.h>
33 #include <sys/param.h>
34 #include <sys/types.h>
38 #include <sys/utsname.h>
40 #include <sys/vnode.h>
41 #include <sys/pathname.h>
42 #include <sys/bootconf.h>
43 #include <sys/fs_subr.h>
44 #include <rpc/types.h>
47 #include <nfs/nfs_clnt.h>
48 #include <nfs/rnode.h>
49 #include <nfs/mount.h>
50 #include <nfs/nfssys.h>
51 #include <sys/debug.h>
52 #include <sys/cmn_err.h>
54 #include <sys/fcntl.h>
58 * This is the loadable module wrapper.
60 #include <sys/systm.h>
61 #include <sys/modctl.h>
62 #include <sys/syscall.h>
65 #include <rpc/types.h>
71 * The pseudo NFS filesystem to allow diskless booting to dynamically
72 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements
73 * the VFS_MOUNTROOT op and is only intended to be used by the
74 * diskless booting code until the real root filesystem is mounted.
75 * Nothing else should ever call this!
77 * The strategy is that if the initial rootfs type is set to "nfsdyn"
78 * by loadrootmodules() this filesystem is called to mount the
79 * root filesystem. It first attempts to mount a V4 filesystem, and if that
80 * fails due to an RPC version mismatch it tries V3 and finally V2.
81 * Once the real mount succeeds the vfsops and rootfs name are changed
82 * to reflect the real filesystem type.
84 static int nfsdyninit(int, char *);
85 static int nfsdyn_mountroot(vfs_t
*, whymountroot_t
);
88 * The following data structures are used to configure the NFS
89 * system call, the NFS Version 2 client VFS, and the NFS Version
90 * 3 client VFS into the system. The NFS Version 4 structures are defined in
95 * The NFS system call.
97 static struct sysent nfssysent
= {
99 SE_32RVAL1
| SE_ARGC
| SE_NOUNLOAD
,
103 static struct modlsys modlsys
= {
105 "NFS syscall, client, and common",
109 #ifdef _SYSCALL32_IMPL
110 static struct modlsys modlsys32
= {
112 "NFS syscall, client, and common (32-bit)",
115 #endif /* _SYSCALL32_IMPL */
118 * The NFS Dynamic client VFS.
120 static vfsdef_t vfw
= {
128 static struct modlfs modlfs
= {
130 "network filesystem",
135 * The NFS Version 2 client VFS.
137 static vfsdef_t vfw2
= {
141 VSW_CANREMOUNT
|VSW_NOTZONESAFE
|VSW_STATS
,
145 static struct modlfs modlfs2
= {
147 "network filesystem version 2",
152 * The NFS Version 3 client VFS.
154 static vfsdef_t vfw3
= {
158 VSW_CANREMOUNT
|VSW_NOTZONESAFE
|VSW_STATS
,
162 static struct modlfs modlfs3
= {
164 "network filesystem version 3",
168 extern struct modlfs modlfs4
;
171 * We have too many linkage structures so we define our own XXX
173 struct modlinkage_big
{
174 int ml_rev
; /* rev of loadable modules system */
175 void *ml_linkage
[7]; /* NULL terminated list of */
176 /* linkage structures */
180 * All of the module configuration linkages required to configure
181 * the system call and client VFS's into the system.
183 static struct modlinkage_big modlinkage
= {
186 #ifdef _SYSCALL32_IMPL
197 * This routine is invoked automatically when the kernel module
198 * containing this routine is loaded. This allows module specific
199 * initialization to be done when the module is loaded.
206 if ((status
= nfs_clntinit()) != 0) {
207 cmn_err(CE_WARN
, "_init: nfs_clntinit failed");
212 * Create the version specific kstats.
214 * PSARC 2001/697 Contract Private Interface
215 * All nfs kstats are under SunMC contract
216 * Please refer to the PSARC listed above and contact
217 * SunMC before making any changes!
219 * Changes must be reviewed by Solaris File Sharing
220 * Changes must be communicated to contract-2001-697@sun.com
224 zone_key_create(&nfsstat_zone_key
, nfsstat_zone_init
, NULL
,
226 status
= mod_install((struct modlinkage
*)&modlinkage
);
229 (void) zone_key_delete(nfsstat_zone_key
);
232 * Failed to install module, cleanup previous
233 * initialization work.
238 * Clean up work performed indirectly by mod_installfs()
239 * as a result of our call to mod_install().
251 /* Don't allow module to be unloaded */
256 _info(struct modinfo
*modinfop
)
258 return (mod_info((struct modlinkage
*)&modlinkage
, modinfop
));
266 * Returns the preferred transfer size in bytes based on
267 * what network interfaces are available.
273 * For the moment, just return NFS_MAXDATA until we can query the
274 * appropriate transport.
276 return (NFS_MAXDATA
);
280 * Returns the preferred transfer size in bytes based on
281 * what network interfaces are available.
284 /* this should reflect the largest transfer size possible */
285 static int nfs3_max_transfer_size
= 1024 * 1024;
291 * For the moment, just return nfs3_max_transfer_size until we
292 * can query the appropriate transport.
294 return (nfs3_max_transfer_size
);
297 static uint_t nfs3_max_transfer_size_clts
= 32 * 1024;
298 static uint_t nfs3_max_transfer_size_cots
= 1024 * 1024;
299 static uint_t nfs3_max_transfer_size_rdma
= 1024 * 1024;
302 nfs3_tsize(struct knetconfig
*knp
)
305 if (knp
->knc_semantics
== NC_TPI_COTS_ORD
||
306 knp
->knc_semantics
== NC_TPI_COTS
)
307 return (nfs3_max_transfer_size_cots
);
308 if (knp
->knc_semantics
== NC_TPI_RDMA
)
309 return (nfs3_max_transfer_size_rdma
);
310 return (nfs3_max_transfer_size_clts
);
314 rfs3_tsize(struct svc_req
*req
)
317 if (req
->rq_xprt
->xp_type
== T_COTS_ORD
||
318 req
->rq_xprt
->xp_type
== T_COTS
)
319 return (nfs3_max_transfer_size_cots
);
320 if (req
->rq_xprt
->xp_type
== T_RDMA
)
321 return (nfs3_max_transfer_size_rdma
);
322 return (nfs3_max_transfer_size_clts
);
325 static const struct vfsops nfsdyn_vfsops
= {
326 .vfs_mountroot
= nfsdyn_mountroot
,
331 nfsdyninit(int fstyp
, char *name
)
335 error
= vfs_setfsops(fstyp
, &nfsdyn_vfsops
);
344 nfsdyn_mountroot(vfs_t
*vfsp
, whymountroot_t why
)
346 char root_hostname
[SYS_NMLN
+1];
347 struct servinfo
*svp
;
353 static char token
[10];
354 struct nfs_args args
; /* nfs mount arguments */
356 bzero(&args
, sizeof (args
));
358 /* do this BEFORE getfile which causes xid stamps to be initialized */
359 clkset(-1L); /* hack for now - until we get time svc? */
361 if (why
== ROOT_REMOUNT
) {
365 panic("nfs3_mountroot: why == ROOT_REMOUNT\n");
368 if (why
== ROOT_UNMOUNT
) {
370 * Nothing to do for NFS.
381 getfsname("root", name
, sizeof (token
));
384 root_path
= pn
.pn_path
;
386 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
387 mutex_init(&svp
->sv_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
388 svp
->sv_knconf
= kmem_zalloc(sizeof (*svp
->sv_knconf
), KM_SLEEP
);
389 svp
->sv_knconf
->knc_protofmly
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
390 svp
->sv_knconf
->knc_proto
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
393 * First try version 4
395 vfs_setops(vfsp
, &nfs4_vfsops
);
396 args
.addr
= &svp
->sv_addr
;
397 args
.fh
= (char *)&svp
->sv_fhandle
;
398 args
.knconf
= svp
->sv_knconf
;
399 args
.hostname
= root_hostname
;
402 if (error
= mount_root(*name
? name
: "root", root_path
, NFS_V4
,
404 if (error
!= EPROTONOSUPPORT
) {
405 nfs_cmn_err(error
, CE_WARN
,
406 "Unable to mount NFS root filesystem: %m");
409 vfs_setops(vfsp
, &nfsdyn_vfsops
);
416 bzero(&args
, sizeof (args
));
417 vfs_setops(vfsp
, &nfs3_vfsops
);
418 args
.addr
= &svp
->sv_addr
;
419 args
.fh
= (char *)&svp
->sv_fhandle
;
420 args
.knconf
= svp
->sv_knconf
;
421 args
.hostname
= root_hostname
;
424 if (error
= mount_root(*name
? name
: "root", root_path
,
425 NFS_V3
, &args
, &vfsflags
)) {
426 if (error
!= EPROTONOSUPPORT
) {
427 nfs_cmn_err(error
, CE_WARN
,
428 "Unable to mount NFS root filesystem: %m");
431 vfs_setops(vfsp
, &nfsdyn_vfsops
);
436 * Finally, try version 2
438 bzero(&args
, sizeof (args
));
439 args
.addr
= &svp
->sv_addr
;
440 args
.fh
= (char *)&svp
->sv_fhandle
.fh_buf
;
441 args
.knconf
= svp
->sv_knconf
;
442 args
.hostname
= root_hostname
;
445 vfs_setops(vfsp
, &nfs_vfsops
);
447 if (error
= mount_root(*name
? name
: "root",
448 root_path
, NFS_VERSION
, &args
, &vfsflags
)) {
449 nfs_cmn_err(error
, CE_WARN
,
450 "Unable to mount NFS root filesystem: %m");
453 vfs_setops(vfsp
, &nfsdyn_vfsops
);
461 return (VFS_MOUNTROOT(vfsp
, why
));
465 nfs_setopts(vnode_t
*vp
, model_t model
, struct nfs_args
*buf
)
467 mntinfo_t
*mi
; /* mount info, pointed at by vfs */
468 STRUCT_HANDLE(nfs_args
, args
);
472 STRUCT_SET_HANDLE(args
, model
, buf
);
474 flags
= STRUCT_FGET(args
, flags
);
477 * Set option fields in mount info record
481 if (flags
& NFSMNT_NOAC
) {
482 mi
->mi_flags
|= MI_NOAC
;
485 if (flags
& NFSMNT_NOCTO
)
486 mi
->mi_flags
|= MI_NOCTO
;
487 if (flags
& NFSMNT_LLOCK
)
488 mi
->mi_flags
|= MI_LLOCK
;
489 if (flags
& NFSMNT_GRPID
)
490 mi
->mi_flags
|= MI_GRPID
;
491 if (flags
& NFSMNT_RETRANS
) {
492 if (STRUCT_FGET(args
, retrans
) < 0)
494 mi
->mi_retrans
= STRUCT_FGET(args
, retrans
);
496 if (flags
& NFSMNT_TIMEO
) {
497 if (STRUCT_FGET(args
, timeo
) <= 0)
499 mi
->mi_timeo
= STRUCT_FGET(args
, timeo
);
501 * The following scales the standard deviation and
502 * and current retransmission timer to match the
503 * initial value for the timeout specified.
505 mi
->mi_timers
[NFS_CALLTYPES
].rt_deviate
=
506 (mi
->mi_timeo
* hz
* 2) / 5;
507 mi
->mi_timers
[NFS_CALLTYPES
].rt_rtxcur
=
508 mi
->mi_timeo
* hz
/ 10;
510 if (flags
& NFSMNT_RSIZE
) {
511 if (STRUCT_FGET(args
, rsize
) <= 0)
513 mi
->mi_tsize
= MIN(mi
->mi_tsize
, STRUCT_FGET(args
, rsize
));
514 mi
->mi_curread
= MIN(mi
->mi_curread
, mi
->mi_tsize
);
516 if (flags
& NFSMNT_WSIZE
) {
517 if (STRUCT_FGET(args
, wsize
) <= 0)
519 mi
->mi_stsize
= MIN(mi
->mi_stsize
, STRUCT_FGET(args
, wsize
));
520 mi
->mi_curwrite
= MIN(mi
->mi_curwrite
, mi
->mi_stsize
);
522 if (flags
& NFSMNT_ACREGMIN
) {
523 if (STRUCT_FGET(args
, acregmin
) < 0)
524 mi
->mi_acregmin
= ACMINMAX
;
526 mi
->mi_acregmin
= MIN(STRUCT_FGET(args
, acregmin
),
528 mi
->mi_acregmin
= SEC2HR(mi
->mi_acregmin
);
530 if (flags
& NFSMNT_ACREGMAX
) {
531 if (STRUCT_FGET(args
, acregmax
) < 0)
532 mi
->mi_acregmax
= ACMAXMAX
;
534 mi
->mi_acregmax
= MIN(STRUCT_FGET(args
, acregmax
),
536 mi
->mi_acregmax
= SEC2HR(mi
->mi_acregmax
);
538 if (flags
& NFSMNT_ACDIRMIN
) {
539 if (STRUCT_FGET(args
, acdirmin
) < 0)
540 mi
->mi_acdirmin
= ACMINMAX
;
542 mi
->mi_acdirmin
= MIN(STRUCT_FGET(args
, acdirmin
),
544 mi
->mi_acdirmin
= SEC2HR(mi
->mi_acdirmin
);
546 if (flags
& NFSMNT_ACDIRMAX
) {
547 if (STRUCT_FGET(args
, acdirmax
) < 0)
548 mi
->mi_acdirmax
= ACMAXMAX
;
550 mi
->mi_acdirmax
= MIN(STRUCT_FGET(args
, acdirmax
),
552 mi
->mi_acdirmax
= SEC2HR(mi
->mi_acdirmax
);
555 if (flags
& NFSMNT_LOOPBACK
)
556 mi
->mi_flags
|= MI_LOOPBACK
;
562 * Set or Clear direct I/O flag
563 * fop_rwlock() is held for write access to prevent a race condition
564 * which would occur if a process is in the middle of a write when
565 * directio flag gets set. It is possible that all pages may not get flushed.
570 nfs_directio(vnode_t
*vp
, int cmd
, cred_t
*cr
)
577 if (cmd
== DIRECTIO_ON
) {
579 if (rp
->r_flags
& RDIRECTIO
)
583 * Flush the page cache.
586 (void) fop_rwlock(vp
, V_WRITELOCK_TRUE
, NULL
);
588 if (rp
->r_flags
& RDIRECTIO
) {
589 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
593 if (vn_has_cached_data(vp
) &&
594 ((rp
->r_flags
& RDIRTY
) || rp
->r_awcount
> 0)) {
595 error
= fop_putpage(vp
, (offset_t
)0, (uint_t
)0,
598 if (error
== ENOSPC
|| error
== EDQUOT
) {
599 mutex_enter(&rp
->r_statelock
);
602 mutex_exit(&rp
->r_statelock
);
604 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
609 mutex_enter(&rp
->r_statelock
);
610 rp
->r_flags
|= RDIRECTIO
;
611 mutex_exit(&rp
->r_statelock
);
612 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
616 if (cmd
== DIRECTIO_OFF
) {
617 mutex_enter(&rp
->r_statelock
);
618 rp
->r_flags
&= ~RDIRECTIO
; /* disable direct mode */
619 mutex_exit(&rp
->r_statelock
);