1 /* $NetBSD: nfs_vfsops.c,v 1.209 2009/03/14 21:04:25 dsl Exp $ */
4 * Copyright (c) 1989, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: nfs_vfsops.c,v 1.209 2009/03/14 21:04:25 dsl Exp $");
40 #if defined(_KERNEL_OPT)
44 #include <sys/param.h>
45 #include <sys/ioctl.h>
46 #include <sys/signal.h>
48 #include <sys/namei.h>
49 #include <sys/device.h>
50 #include <sys/vnode.h>
51 #include <sys/kernel.h>
52 #include <sys/mount.h>
55 #include <sys/dirent.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
60 #include <sys/timetc.h>
61 #include <sys/kauth.h>
62 #include <sys/module.h>
65 #include <net/route.h>
66 #include <netinet/in.h>
68 #include <nfs/rpcv2.h>
69 #include <nfs/nfsproto.h>
70 #include <nfs/nfsnode.h>
72 #include <nfs/nfsmount.h>
73 #include <nfs/xdr_subs.h>
74 #include <nfs/nfsm_subs.h>
75 #include <nfs/nfsdiskless.h>
76 #include <nfs/nfs_var.h>
78 MODULE(MODULE_CLASS_VFS
, nfs
, NULL
);
80 extern struct nfsstats nfsstats
;
84 * keep a count of the nfs mounts to generate ficticious drive names
85 * for the per drive stats.
87 unsigned int nfs_mount_count
= 0;
93 extern const struct vnodeopv_desc nfsv2_vnodeop_opv_desc
;
94 extern const struct vnodeopv_desc spec_nfsv2nodeop_opv_desc
;
95 extern const struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc
;
97 const struct vnodeopv_desc
* const nfs_vnodeopv_descs
[] = {
98 &nfsv2_vnodeop_opv_desc
,
99 &spec_nfsv2nodeop_opv_desc
,
100 &fifo_nfsv2nodeop_opv_desc
,
104 struct vfsops nfs_vfsops
= {
106 sizeof (struct nfs_args
),
111 (void *)eopnotsupp
, /* vfs_quotactl */
121 (int (*)(struct mount
*, struct vnode
*, struct timespec
*)) eopnotsupp
,
123 (void *)eopnotsupp
, /* vfs_suspendctl */
124 genfs_renamelock_enter
,
125 genfs_renamelock_exit
,
132 extern u_int32_t nfs_procids
[NFS_NPROCS
];
133 extern u_int32_t nfs_prog
, nfs_vers
;
134 static struct sysctllog
*nfs_clog
;
136 static int nfs_mount_diskless(struct nfs_dlmount
*, const char *,
137 struct mount
**, struct vnode
**, struct lwp
*);
138 static void nfs_sysctl_init(void);
139 static void nfs_sysctl_fini(void);
142 nfs_modcmd(modcmd_t cmd
, void *arg
)
147 case MODULE_CMD_INIT
:
148 error
= vfs_attach(&nfs_vfsops
);
153 case MODULE_CMD_FINI
:
154 error
= vfs_detach(&nfs_vfsops
);
168 nfs_statvfs(struct mount
*mp
, struct statvfs
*sbp
)
170 struct lwp
*l
= curlwp
;
172 struct nfs_statfs
*sfp
;
176 char *bpos
, *dpos
, *cp2
;
177 struct nfsmount
*nmp
= VFSTONFS(mp
);
178 int error
= 0, retattr
;
182 int v3
= (nmp
->nm_flag
& NFSMNT_NFSV3
);
184 struct mbuf
*mreq
, *mrep
= NULL
, *md
, *mb
;
190 sfp
= (struct nfs_statfs
*)0;
194 cred
= kauth_cred_alloc();
196 if (v3
&& (nmp
->nm_iflag
& NFSMNT_GOTFSINFO
) == 0)
197 (void)nfs_fsinfo(nmp
, vp
, cred
, l
);
199 nfsstats
.rpccnt
[NFSPROC_FSSTAT
]++;
200 nfsm_reqhead(np
, NFSPROC_FSSTAT
, NFSX_FH(v3
));
202 nfsm_request(np
, NFSPROC_FSSTAT
, l
, cred
);
204 nfsm_postop_attr(vp
, retattr
, 0);
207 if (mrep
->m_next
!= NULL
)
208 printf("nfs_vfsops: nfs_statvfs would lose buffers\n");
213 nfsm_dissect(sfp
, struct nfs_statfs
*, NFSX_STATFS(v3
));
214 sbp
->f_flag
= nmp
->nm_flag
;
215 sbp
->f_iosize
= min(nmp
->nm_rsize
, nmp
->nm_wsize
);
217 sbp
->f_frsize
= sbp
->f_bsize
= NFS_FABLKSIZE
;
218 tquad
= fxdr_hyper(&sfp
->sf_tbytes
);
219 sbp
->f_blocks
= ((quad_t
)tquad
/ (quad_t
)NFS_FABLKSIZE
);
220 tquad
= fxdr_hyper(&sfp
->sf_fbytes
);
221 sbp
->f_bfree
= ((quad_t
)tquad
/ (quad_t
)NFS_FABLKSIZE
);
222 tquad
= fxdr_hyper(&sfp
->sf_abytes
);
223 tquad
= ((quad_t
)tquad
/ (quad_t
)NFS_FABLKSIZE
);
224 sbp
->f_bresvd
= sbp
->f_bfree
- tquad
;
225 sbp
->f_bavail
= tquad
;
226 /* Handle older NFS servers returning negative values */
227 if ((quad_t
)sbp
->f_bavail
< 0)
229 tquad
= fxdr_hyper(&sfp
->sf_tfiles
);
230 sbp
->f_files
= tquad
;
231 tquad
= fxdr_hyper(&sfp
->sf_ffiles
);
232 sbp
->f_ffree
= tquad
;
233 sbp
->f_favail
= tquad
;
235 sbp
->f_namemax
= MAXNAMLEN
;
237 sbp
->f_bsize
= NFS_FABLKSIZE
;
238 sbp
->f_frsize
= fxdr_unsigned(int32_t, sfp
->sf_bsize
);
239 sbp
->f_blocks
= fxdr_unsigned(int32_t, sfp
->sf_blocks
);
240 sbp
->f_bfree
= fxdr_unsigned(int32_t, sfp
->sf_bfree
);
241 sbp
->f_bavail
= fxdr_unsigned(int32_t, sfp
->sf_bavail
);
247 sbp
->f_namemax
= MAXNAMLEN
;
249 copy_statvfs_info(sbp
, mp
);
251 kauth_cred_free(cred
);
257 * nfs version 3 fsinfo rpc call
260 nfs_fsinfo(struct nfsmount
*nmp
, struct vnode
*vp
, kauth_cred_t cred
, struct lwp
*l
)
262 struct nfsv3_fsinfo
*fsp
;
265 u_int32_t
*tl
, pref
, xmax
;
266 char *bpos
, *dpos
, *cp2
;
267 int error
= 0, retattr
;
268 struct mbuf
*mreq
, *mrep
, *md
, *mb
;
270 struct nfsnode
*np
= VTONFS(vp
);
272 nfsstats
.rpccnt
[NFSPROC_FSINFO
]++;
273 nfsm_reqhead(np
, NFSPROC_FSINFO
, NFSX_FH(1));
275 nfsm_request(np
, NFSPROC_FSINFO
, l
, cred
);
276 nfsm_postop_attr(vp
, retattr
, 0);
278 nfsm_dissect(fsp
, struct nfsv3_fsinfo
*, NFSX_V3FSINFO
);
279 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_wtpref
);
280 if ((nmp
->nm_flag
& NFSMNT_WSIZE
) == 0 &&
281 pref
< nmp
->nm_wsize
&& pref
>= NFS_FABLKSIZE
)
282 nmp
->nm_wsize
= (pref
+ NFS_FABLKSIZE
- 1) &
283 ~(NFS_FABLKSIZE
- 1);
284 xmax
= fxdr_unsigned(u_int32_t
, fsp
->fs_wtmax
);
285 if (xmax
< nmp
->nm_wsize
&& xmax
> 0) {
286 nmp
->nm_wsize
= xmax
& ~(NFS_FABLKSIZE
- 1);
287 if (nmp
->nm_wsize
== 0)
288 nmp
->nm_wsize
= xmax
;
290 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_rtpref
);
291 if ((nmp
->nm_flag
& NFSMNT_RSIZE
) == 0 &&
292 pref
< nmp
->nm_rsize
&& pref
>= NFS_FABLKSIZE
)
293 nmp
->nm_rsize
= (pref
+ NFS_FABLKSIZE
- 1) &
294 ~(NFS_FABLKSIZE
- 1);
295 xmax
= fxdr_unsigned(u_int32_t
, fsp
->fs_rtmax
);
296 if (xmax
< nmp
->nm_rsize
&& xmax
> 0) {
297 nmp
->nm_rsize
= xmax
& ~(NFS_FABLKSIZE
- 1);
298 if (nmp
->nm_rsize
== 0)
299 nmp
->nm_rsize
= xmax
;
301 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_dtpref
);
302 if (pref
< nmp
->nm_readdirsize
&& pref
>= NFS_DIRFRAGSIZ
)
303 nmp
->nm_readdirsize
= (pref
+ NFS_DIRFRAGSIZ
- 1) &
304 ~(NFS_DIRFRAGSIZ
- 1);
305 if (xmax
< nmp
->nm_readdirsize
&& xmax
> 0) {
306 nmp
->nm_readdirsize
= xmax
& ~(NFS_DIRFRAGSIZ
- 1);
307 if (nmp
->nm_readdirsize
== 0)
308 nmp
->nm_readdirsize
= xmax
;
311 nmp
->nm_maxfilesize
= (u_int64_t
)0x80000000 * DEV_BSIZE
- 1;
312 maxfsize
= fxdr_hyper(&fsp
->fs_maxfilesize
);
313 if (maxfsize
> 0 && maxfsize
< nmp
->nm_maxfilesize
)
314 nmp
->nm_maxfilesize
= maxfsize
;
315 nmp
->nm_mountp
->mnt_fs_bshift
=
316 ffs(MIN(nmp
->nm_rsize
, nmp
->nm_wsize
)) - 1;
317 nmp
->nm_iflag
|= NFSMNT_GOTFSINFO
;
325 * Mount a remote root fs via. NFS. It goes like this:
326 * - Call nfs_boot_init() to fill in the nfs_diskless struct
327 * - build the rootfs mount point and call mountnfs() to do the rest.
333 struct nfs_diskless
*nd
;
341 l
= curlwp
; /* XXX */
343 if (device_class(root_device
) != DV_IFNET
)
347 * XXX time must be non-zero when we init the interface or else
348 * the arp code will wedge. [Fixed now in if_ether.c]
349 * However, the NFS attribute cache gives false "hits" when the
350 * current time < nfs_attrtimeo(nmp, np) so keep this in for now.
352 if (time_second
< NFS_MAXATTRTIMO
) {
353 ts
.tv_sec
= NFS_MAXATTRTIMO
;
359 * Call nfs_boot_init() to fill in the nfs_diskless struct.
360 * Side effect: Finds and configures a network interface.
362 nd
= kmem_zalloc(sizeof(*nd
), KM_SLEEP
);
363 error
= nfs_boot_init(nd
, l
);
365 kmem_free(nd
, sizeof(*nd
));
370 * Create the root mount point.
372 error
= nfs_mount_diskless(&nd
->nd_root
, "/", &mp
, &vp
, l
);
375 printf("root on %s\n", nd
->nd_root
.ndm_host
);
378 * Link it into the mount list.
380 mutex_enter(&mountlist_lock
);
381 CIRCLEQ_INSERT_TAIL(&mountlist
, mp
, mnt_list
);
382 mutex_exit(&mountlist_lock
);
384 mp
->mnt_vnodecovered
= NULLVP
;
385 vfs_unbusy(mp
, false, NULL
);
387 /* Get root attributes (for the time). */
388 error
= VOP_GETATTR(vp
, &attr
, l
->l_cred
);
390 panic("nfs_mountroot: getattr for root");
391 n
= attr
.va_atime
.tv_sec
;
393 printf("root time: 0x%lx\n", n
);
399 nfs_boot_cleanup(nd
, l
);
400 kmem_free(nd
, sizeof(*nd
));
405 * Internal version of mount system call for diskless setup.
406 * Separate function because we used to call it twice.
407 * (once for root and once for swap)
410 nfs_mount_diskless(struct nfs_dlmount
*ndmntp
, const char *mntname
, struct mount
**mpp
, struct vnode
**vpp
, struct lwp
*l
)
411 /* mntname: mount point name */
417 vfs_rootmountalloc(MOUNT_NFS
, mntname
, &mp
);
419 mp
->mnt_op
= &nfs_vfsops
;
422 * Historical practice expects NFS root file systems to
423 * be initially mounted r/w.
425 mp
->mnt_flag
&= ~MNT_RDONLY
;
427 /* Get mbuf for server sockaddr. */
428 m
= m_get(M_WAIT
, MT_SONAME
);
430 panic("nfs_mountroot: mget soname for %s", mntname
);
431 MCLAIM(m
, &nfs_mowner
);
432 memcpy(mtod(m
, void *), (void *)ndmntp
->ndm_args
.addr
,
433 (m
->m_len
= ndmntp
->ndm_args
.addr
->sa_len
));
435 error
= mountnfs(&ndmntp
->ndm_args
, mp
, m
, mntname
,
436 ndmntp
->ndm_args
.hostname
, vpp
, l
);
438 vfs_unbusy(mp
, false, NULL
);
440 printf("nfs_mountroot: mount %s failed: %d\n",
449 nfs_decode_args(struct nfsmount
*nmp
, struct nfs_args
*argp
, struct lwp
*l
)
458 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
459 * no sense in that context.
461 if (argp
->sotype
== SOCK_STREAM
)
462 argp
->flags
&= ~NFSMNT_NOCONN
;
465 * Cookie translation is not needed for v2, silently ignore it.
467 if ((argp
->flags
& (NFSMNT_XLATECOOKIE
|NFSMNT_NFSV3
)) ==
469 argp
->flags
&= ~NFSMNT_XLATECOOKIE
;
471 /* Re-bind if rsrvd port requested and wasn't on one */
472 adjsock
= !(nmp
->nm_flag
& NFSMNT_RESVPORT
)
473 && (argp
->flags
& NFSMNT_RESVPORT
);
474 /* Also re-bind if we're switching to/from a connected UDP socket */
475 adjsock
|= ((nmp
->nm_flag
& NFSMNT_NOCONN
) !=
476 (argp
->flags
& NFSMNT_NOCONN
));
479 nmp
->nm_flag
= argp
->flags
;
482 if ((argp
->flags
& NFSMNT_TIMEO
) && argp
->timeo
> 0) {
483 nmp
->nm_timeo
= (argp
->timeo
* NFS_HZ
+ 5) / 10;
484 if (nmp
->nm_timeo
< NFS_MINTIMEO
)
485 nmp
->nm_timeo
= NFS_MINTIMEO
;
486 else if (nmp
->nm_timeo
> NFS_MAXTIMEO
)
487 nmp
->nm_timeo
= NFS_MAXTIMEO
;
490 if ((argp
->flags
& NFSMNT_RETRANS
) && argp
->retrans
> 1) {
491 nmp
->nm_retry
= argp
->retrans
;
492 if (nmp
->nm_retry
> NFS_MAXREXMIT
)
493 nmp
->nm_retry
= NFS_MAXREXMIT
;
497 if (argp
->flags
& NFSMNT_NFSV3
) {
498 if (argp
->sotype
== SOCK_DGRAM
)
499 maxio
= NFS_MAXDGRAMDATA
;
504 maxio
= NFS_V2MAXDATA
;
506 if ((argp
->flags
& NFSMNT_WSIZE
) && argp
->wsize
> 0) {
507 int osize
= nmp
->nm_wsize
;
508 nmp
->nm_wsize
= argp
->wsize
;
509 /* Round down to multiple of blocksize */
510 nmp
->nm_wsize
&= ~(NFS_FABLKSIZE
- 1);
511 if (nmp
->nm_wsize
<= 0)
512 nmp
->nm_wsize
= NFS_FABLKSIZE
;
513 adjsock
|= (nmp
->nm_wsize
!= osize
);
515 if (nmp
->nm_wsize
> maxio
)
516 nmp
->nm_wsize
= maxio
;
517 if (nmp
->nm_wsize
> MAXBSIZE
)
518 nmp
->nm_wsize
= MAXBSIZE
;
520 if ((argp
->flags
& NFSMNT_RSIZE
) && argp
->rsize
> 0) {
521 int osize
= nmp
->nm_rsize
;
522 nmp
->nm_rsize
= argp
->rsize
;
523 /* Round down to multiple of blocksize */
524 nmp
->nm_rsize
&= ~(NFS_FABLKSIZE
- 1);
525 if (nmp
->nm_rsize
<= 0)
526 nmp
->nm_rsize
= NFS_FABLKSIZE
;
527 adjsock
|= (nmp
->nm_rsize
!= osize
);
529 if (nmp
->nm_rsize
> maxio
)
530 nmp
->nm_rsize
= maxio
;
531 if (nmp
->nm_rsize
> MAXBSIZE
)
532 nmp
->nm_rsize
= MAXBSIZE
;
534 if ((argp
->flags
& NFSMNT_READDIRSIZE
) && argp
->readdirsize
> 0) {
535 nmp
->nm_readdirsize
= argp
->readdirsize
;
536 /* Round down to multiple of minimum blocksize */
537 nmp
->nm_readdirsize
&= ~(NFS_DIRFRAGSIZ
- 1);
538 if (nmp
->nm_readdirsize
< NFS_DIRFRAGSIZ
)
539 nmp
->nm_readdirsize
= NFS_DIRFRAGSIZ
;
540 /* Bigger than buffer size makes no sense */
541 if (nmp
->nm_readdirsize
> NFS_DIRBLKSIZ
)
542 nmp
->nm_readdirsize
= NFS_DIRBLKSIZ
;
543 } else if (argp
->flags
& NFSMNT_RSIZE
)
544 nmp
->nm_readdirsize
= nmp
->nm_rsize
;
546 if (nmp
->nm_readdirsize
> maxio
)
547 nmp
->nm_readdirsize
= maxio
;
549 if ((argp
->flags
& NFSMNT_MAXGRPS
) && argp
->maxgrouplist
>= 0 &&
550 argp
->maxgrouplist
<= NFS_MAXGRPS
)
551 nmp
->nm_numgrps
= argp
->maxgrouplist
;
552 if ((argp
->flags
& NFSMNT_READAHEAD
) && argp
->readahead
>= 0 &&
553 argp
->readahead
<= NFS_MAXRAHEAD
)
554 nmp
->nm_readahead
= argp
->readahead
;
555 if ((argp
->flags
& NFSMNT_DEADTHRESH
) && argp
->deadthresh
>= 1 &&
556 argp
->deadthresh
<= NFS_NEVERDEAD
)
557 nmp
->nm_deadthresh
= argp
->deadthresh
;
559 adjsock
|= ((nmp
->nm_sotype
!= argp
->sotype
) ||
560 (nmp
->nm_soproto
!= argp
->proto
));
561 nmp
->nm_sotype
= argp
->sotype
;
562 nmp
->nm_soproto
= argp
->proto
;
564 if (nmp
->nm_so
&& adjsock
) {
565 nfs_safedisconnect(nmp
);
566 if (nmp
->nm_sotype
== SOCK_DGRAM
)
567 while (nfs_connect(nmp
, (struct nfsreq
*)0, l
)) {
568 printf("nfs_args: retrying connect\n");
569 kpause("nfscn3", false, hz
, NULL
);
578 * It seems a bit dumb to copyinstr() the host and path here and then
579 * memcpy() them in mountnfs(), but I wanted to detect errors before
580 * doing the sockargs() call because sockargs() allocates an mbuf and
581 * an error after that means that I have to release the mbuf.
585 nfs_mount(struct mount
*mp
, const char *path
, void *data
, size_t *data_len
)
587 struct lwp
*l
= curlwp
;
589 struct nfs_args
*args
= data
;
591 struct nfsmount
*nmp
= VFSTONFS(mp
);
599 if (*data_len
< sizeof *args
)
603 if (mp
->mnt_flag
& MNT_GETARGS
) {
607 if (args
->addr
!= NULL
) {
608 sa
= mtod(nmp
->nm_nam
, struct sockaddr
*);
609 error
= copyout(sa
, args
->addr
, sa
->sa_len
);
612 args
->addrlen
= sa
->sa_len
;
616 args
->version
= NFS_ARGSVERSION
;
617 args
->sotype
= nmp
->nm_sotype
;
618 args
->proto
= nmp
->nm_soproto
;
621 args
->flags
= nmp
->nm_flag
;
622 args
->wsize
= nmp
->nm_wsize
;
623 args
->rsize
= nmp
->nm_rsize
;
624 args
->readdirsize
= nmp
->nm_readdirsize
;
625 args
->timeo
= nmp
->nm_timeo
;
626 args
->retrans
= nmp
->nm_retry
;
627 args
->maxgrouplist
= nmp
->nm_numgrps
;
628 args
->readahead
= nmp
->nm_readahead
;
629 args
->leaseterm
= 0; /* dummy */
630 args
->deadthresh
= nmp
->nm_deadthresh
;
631 args
->hostname
= NULL
;
632 *data_len
= sizeof *args
;
636 if (args
->version
!= NFS_ARGSVERSION
)
637 return (EPROGMISMATCH
);
638 if (args
->flags
& (NFSMNT_NQNFS
|NFSMNT_KERB
))
639 return (EPROGUNAVAIL
);
641 if (args
->flags
& NFSMNT_NFSV3
)
642 return (EPROGMISMATCH
);
644 if (mp
->mnt_flag
& MNT_UPDATE
) {
648 * When doing an update, we can't change from or to
649 * v3, or change cookie translation
651 args
->flags
= (args
->flags
& ~(NFSMNT_NFSV3
|NFSMNT_XLATECOOKIE
)) |
652 (nmp
->nm_flag
& (NFSMNT_NFSV3
|NFSMNT_XLATECOOKIE
));
653 nfs_decode_args(nmp
, args
, l
);
656 if (args
->fhsize
< 0 || args
->fhsize
> NFSX_V3FHMAX
)
658 nfh
= malloc(NFSX_V3FHMAX
, M_TEMP
, M_WAITOK
);
659 error
= copyin(args
->fh
, nfh
, args
->fhsize
);
662 pth
= malloc(MNAMELEN
, M_TEMP
, M_WAITOK
);
663 error
= copyinstr(path
, pth
, MNAMELEN
- 1, &len
);
666 memset(&pth
[len
], 0, MNAMELEN
- len
);
667 hst
= malloc(MNAMELEN
, M_TEMP
, M_WAITOK
);
668 error
= copyinstr(args
->hostname
, hst
, MNAMELEN
- 1, &len
);
671 memset(&hst
[len
], 0, MNAMELEN
- len
);
672 /* sockargs() call must be after above copyin() calls */
673 error
= sockargs(&nam
, args
->addr
, args
->addrlen
, MT_SONAME
);
676 MCLAIM(nam
, &nfs_mowner
);
678 error
= mountnfs(args
, mp
, nam
, pth
, hst
, &vp
, l
);
691 * Common code for mount and mountroot
694 mountnfs(struct nfs_args
*argp
, struct mount
*mp
, struct mbuf
*nam
, const char *pth
, const char *hst
, struct vnode
**vpp
, struct lwp
*l
)
696 struct nfsmount
*nmp
;
702 char iosname
[IOSTATNAMELEN
];
705 * If the number of nfs iothreads to use has never
706 * been set, create a reasonable number of them.
709 if (nfs_niothreads
< 0) {
710 nfs_set_niothreads(NFS_DEFAULT_NIOTHREADS
);
713 if (mp
->mnt_flag
& MNT_UPDATE
) {
715 /* update paths, file handles, etc, here XXX */
719 nmp
= kmem_zalloc(sizeof(*nmp
), KM_SLEEP
);
721 TAILQ_INIT(&nmp
->nm_uidlruhead
);
722 TAILQ_INIT(&nmp
->nm_bufq
);
723 rw_init(&nmp
->nm_writeverflock
);
724 mutex_init(&nmp
->nm_lock
, MUTEX_DEFAULT
, IPL_NONE
);
725 rw_init(&nmp
->nm_rbtlock
);
726 cv_init(&nmp
->nm_rcvcv
, "nfsrcv");
727 cv_init(&nmp
->nm_sndcv
, "nfssnd");
728 cv_init(&nmp
->nm_aiocv
, "nfsaio");
729 cv_init(&nmp
->nm_disconcv
, "nfsdis");
736 if ((argp
->flags
& NFSMNT_NFSV3
) == 0)
739 if (argp
->fhsize
!= NFSX_V2FH
) {
745 * V2 can only handle 32 bit filesizes. For v3, nfs_fsinfo
746 * will overwrite this.
748 nmp
->nm_maxfilesize
= 0xffffffffLL
;
750 nmp
->nm_timeo
= NFS_TIMEO
;
751 nmp
->nm_retry
= NFS_RETRANS
;
752 nmp
->nm_wsize
= NFS_WSIZE
;
753 nmp
->nm_rsize
= NFS_RSIZE
;
754 nmp
->nm_readdirsize
= NFS_READDIRSIZE
;
755 nmp
->nm_numgrps
= NFS_MAXGRPS
;
756 nmp
->nm_readahead
= NFS_DEFRAHEAD
;
757 nmp
->nm_deadthresh
= NFS_DEFDEADTHRESH
;
758 error
= set_statvfs_info(pth
, UIO_SYSSPACE
, hst
, UIO_SYSSPACE
,
759 mp
->mnt_op
->vfs_name
, mp
, l
);
764 /* Set up the sockets and per-host congestion */
765 nmp
->nm_sotype
= argp
->sotype
;
766 nmp
->nm_soproto
= argp
->proto
;
768 nfs_decode_args(nmp
, argp
, l
);
770 mp
->mnt_fs_bshift
= ffs(MIN(nmp
->nm_rsize
, nmp
->nm_wsize
)) - 1;
771 mp
->mnt_dev_bshift
= DEV_BSHIFT
;
774 * For Connection based sockets (TCP,...) defer the connect until
775 * the first request, in case the server is not responding.
777 if (nmp
->nm_sotype
== SOCK_DGRAM
&&
778 (error
= nfs_connect(nmp
, (struct nfsreq
*)0, l
)))
782 * This is silly, but it has to be set so that vinifod() works.
783 * We do not want to do an nfs_statvfs() here since we can get
784 * stuck on a dead server and we are holding a lock on the mount
787 mp
->mnt_stat
.f_iosize
= NFS_MAXDGRAMDATA
;
788 error
= nfs_nget(mp
, (nfsfh_t
*)argp
->fh
, argp
->fhsize
, &np
);
792 attrs
= malloc(sizeof(struct vattr
), M_TEMP
, M_WAITOK
);
793 VOP_GETATTR(vp
, attrs
, l
->l_cred
);
794 if ((nmp
->nm_flag
& NFSMNT_NFSV3
) && (vp
->v_type
== VDIR
)) {
795 cr
= kauth_cred_alloc();
796 kauth_cred_setuid(cr
, attrs
->va_uid
);
797 kauth_cred_seteuid(cr
, attrs
->va_uid
);
798 kauth_cred_setsvuid(cr
, attrs
->va_uid
);
799 kauth_cred_setgid(cr
, attrs
->va_gid
);
800 kauth_cred_setegid(cr
, attrs
->va_gid
);
801 kauth_cred_setsvgid(cr
, attrs
->va_gid
);
802 nfs_cookieheuristic(vp
, &nmp
->nm_iflag
, l
, cr
);
808 * A reference count is needed on the nfsnode representing the
809 * remote root. If this object is not persistent, then backward
810 * traversals of the mount point (i.e. "..") will not work if
811 * the nfsnode gets flushed out of the cache. Ufs does not have
812 * this problem, because one can identify root inodes by their
813 * number == ROOTINO (2). So, just unlock, but no rele.
817 if (vp
->v_type
== VNON
)
819 vp
->v_vflag
|= VV_ROOT
;
823 snprintf(iosname
, sizeof(iosname
), "nfs%u", nfs_mount_count
++);
824 nmp
->nm_stats
= iostat_alloc(IOSTAT_NFS
, nmp
, iosname
);
829 rw_destroy(&nmp
->nm_writeverflock
);
830 rw_destroy(&nmp
->nm_rbtlock
);
831 mutex_destroy(&nmp
->nm_lock
);
832 cv_destroy(&nmp
->nm_rcvcv
);
833 cv_destroy(&nmp
->nm_sndcv
);
834 cv_destroy(&nmp
->nm_aiocv
);
835 cv_destroy(&nmp
->nm_disconcv
);
836 kmem_free(nmp
, sizeof(*nmp
));
842 * unmount system call
845 nfs_unmount(struct mount
*mp
, int mntflags
)
847 struct nfsmount
*nmp
;
849 int error
, flags
= 0;
851 if (mntflags
& MNT_FORCE
)
855 * Goes something like this..
856 * - Check for activity on the root vnode (other than ourselves).
857 * - Call vflush() to clear out vnodes for this file system,
858 * except for the root vnode.
859 * - Decrement reference on the vnode representing remote root.
861 * - Free up the data structures
864 * We need to decrement the ref. count on the nfsnode representing
865 * the remote root. See comment in mountnfs(). The VFS unmount()
866 * has done vput on this vnode, otherwise we would get deadlock!
869 error
= vget(vp
, LK_EXCLUSIVE
| LK_RETRY
);
873 if ((mntflags
& MNT_FORCE
) == 0 && vp
->v_usecount
> 2) {
878 error
= vflush(mp
, vp
, flags
);
885 * We are now committed to the unmount; mark the mount structure
886 * as doomed so that any sleepers kicked awake by nfs_disconnect
887 * will go away cleanly.
889 nmp
->nm_iflag
|= NFSMNT_DISMNT
;
892 * Clean up the stats... note that we carefully avoid decrementing
893 * nfs_mount_count here for good reason - we may not be unmounting
894 * the last thing mounted.
896 iostat_free(nmp
->nm_stats
);
899 * There are two reference counts to get rid of here
900 * (see comment in mountnfs()).
905 m_freem(nmp
->nm_nam
);
907 rw_destroy(&nmp
->nm_writeverflock
);
908 rw_destroy(&nmp
->nm_rbtlock
);
909 mutex_destroy(&nmp
->nm_lock
);
910 cv_destroy(&nmp
->nm_rcvcv
);
911 cv_destroy(&nmp
->nm_sndcv
);
912 cv_destroy(&nmp
->nm_aiocv
);
913 cv_destroy(&nmp
->nm_disconcv
);
914 kmem_free(nmp
, sizeof(*nmp
));
919 * Return root of a filesystem
922 nfs_root(struct mount
*mp
, struct vnode
**vpp
)
925 struct nfsmount
*nmp
;
930 error
= vget(vp
, LK_EXCLUSIVE
| LK_RETRY
);
940 * Flush out the buffer cache
944 nfs_sync(struct mount
*mp
, int waitfor
, kauth_cred_t cred
)
946 struct vnode
*vp
, *mvp
;
947 int error
, allerror
= 0;
950 * Force stale buffer cache information to be flushed.
952 if ((mvp
= vnalloc(mp
)) == NULL
)
956 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
957 * and vclean() can be called indirectly
959 mutex_enter(&mntvnode_lock
);
960 for (vp
= TAILQ_FIRST(&mp
->mnt_vnodelist
); vp
; vp
= vunmark(mvp
)) {
962 if (vp
->v_mount
!= mp
|| vismarker(vp
))
964 mutex_enter(&vp
->v_interlock
);
965 /* XXX MNT_LAZY cannot be right? */
966 if (waitfor
== MNT_LAZY
|| VOP_ISLOCKED(vp
) ||
967 (LIST_EMPTY(&vp
->v_dirtyblkhd
) &&
968 UVM_OBJ_IS_CLEAN(&vp
->v_uobj
))) {
969 mutex_exit(&vp
->v_interlock
);
972 mutex_exit(&mntvnode_lock
);
973 if (vget(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
)) {
977 error
= VOP_FSYNC(vp
, cred
,
978 waitfor
== MNT_WAIT
? FSYNC_WAIT
: 0, 0, 0);
982 mutex_enter(&mntvnode_lock
);
984 mutex_exit(&mntvnode_lock
);
990 * NFS flat namespace lookup.
991 * Currently unsupported.
995 nfs_vget(struct mount
*mp
, ino_t ino
, struct vnode
**vpp
)
1002 * Do that sysctl thang...
1005 sysctl_vfs_nfs_iothreads(SYSCTLFN_ARGS
)
1007 struct sysctlnode node
;
1011 val
= nfs_niothreads
;
1013 node
.sysctl_data
= &val
;
1014 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
1015 if (error
|| newp
== NULL
)
1018 return nfs_set_niothreads(val
);
1022 nfs_sysctl_init(void)
1025 sysctl_createv(&nfs_clog
, 0, NULL
, NULL
,
1027 CTLTYPE_NODE
, "vfs", NULL
,
1030 sysctl_createv(&nfs_clog
, 0, NULL
, NULL
,
1032 CTLTYPE_NODE
, "nfs",
1033 SYSCTL_DESCR("NFS vfs options"),
1035 CTL_VFS
, 2, CTL_EOL
);
1037 * XXX the "2" above could be dynamic, thereby eliminating one
1038 * more instance of the "number to vfs" mapping problem, but
1039 * "2" is the order as taken from sys/mount.h
1042 sysctl_createv(&nfs_clog
, 0, NULL
, NULL
,
1043 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
1044 CTLTYPE_STRUCT
, "nfsstats",
1045 SYSCTL_DESCR("NFS operation statistics"),
1046 NULL
, 0, &nfsstats
, sizeof(nfsstats
),
1047 CTL_VFS
, 2, NFS_NFSSTATS
, CTL_EOL
);
1048 sysctl_createv(&nfs_clog
, 0, NULL
, NULL
,
1049 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
1050 CTLTYPE_INT
, "iothreads",
1051 SYSCTL_DESCR("Number of NFS client processes desired"),
1052 sysctl_vfs_nfs_iothreads
, 0, NULL
, 0,
1053 CTL_VFS
, 2, NFS_IOTHREADS
, CTL_EOL
);
1057 nfs_sysctl_fini(void)
1060 sysctl_teardown(&nfs_clog
);
1065 nfs_fhtovp(struct mount
*mp
, struct fid
*fid
, struct vnode
**vpp
)
1073 fidsize
= fid
->fid_len
;
1074 if (fidsize
< sizeof(*fid
)) {
1077 fhsize
= fidsize
- sizeof(*fid
);
1078 if ((fhsize
% NFSX_UNSIGNED
) != 0) {
1081 if ((VFSTONFS(mp
)->nm_flag
& NFSMNT_NFSV3
) != 0) {
1082 if (fhsize
> NFSX_V3FHMAX
|| fhsize
== 0) {
1086 if (fhsize
!= NFSX_V2FH
) {
1090 error
= nfs_nget(mp
, (void *)fid
->fid_data
, fhsize
, &np
);
1095 error
= VOP_GETATTR(*vpp
, &va
, kauth_cred_get());
1104 nfs_vptofh(struct vnode
*vp
, struct fid
*buf
, size_t *bufsize
)
1112 fidsize
= sizeof(*fid
) + np
->n_fhsize
;
1113 if (*bufsize
< fidsize
) {
1118 struct fid fid_store
;
1121 memset(fid
, 0, sizeof(*fid
));
1122 fid
->fid_len
= fidsize
;
1123 memcpy(buf
, fid
, sizeof(*fid
));
1124 memcpy(buf
->fid_data
, np
->n_fhp
, np
->n_fhsize
);
1130 * Vfs start routine, a no-op.
1134 nfs_start(struct mount
*mp
, int flags
)