1 /* $NetBSD: nfs_subs.c,v 1.217 2009/05/14 15:42:22 yamt Exp $ */
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
38 * Copyright 2000 Wasabi Systems, Inc.
39 * All rights reserved.
41 * Written by Frank van der Linden for Wasabi Systems, Inc.
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed for the NetBSD Project by
54 * Wasabi Systems, Inc.
55 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
56 * or promote products derived from this software without specific prior
59 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.217 2009/05/14 15:42:22 yamt Exp $");
81 * These functions support the macros and help fiddle mbuf chains for
82 * the nfs op functions. They do things like create the rpc header and
83 * copy data between mbuf chains and uio lists.
85 #include <sys/param.h>
87 #include <sys/systm.h>
88 #include <sys/kernel.h>
90 #include <sys/mount.h>
91 #include <sys/vnode.h>
92 #include <sys/namei.h>
94 #include <sys/socket.h>
96 #include <sys/filedesc.h>
98 #include <sys/dirent.h>
100 #include <sys/kauth.h>
101 #include <sys/atomic.h>
103 #include <uvm/uvm_extern.h>
105 #include <nfs/rpcv2.h>
106 #include <nfs/nfsproto.h>
107 #include <nfs/nfsnode.h>
109 #include <nfs/xdr_subs.h>
110 #include <nfs/nfsm_subs.h>
111 #include <nfs/nfsmount.h>
112 #include <nfs/nfsrtt.h>
113 #include <nfs/nfs_var.h>
115 #include <miscfs/specfs/specdev.h>
117 #include <netinet/in.h>
119 static u_int32_t nfs_xid
;
121 int nuidhash_max
= NFS_MAXUIDHASH
;
123 * Data items converted to xdr at startup, since they are constant
124 * This is kinda hokey, but may save a little time doing byte swaps
126 u_int32_t nfs_xdrneg1
;
127 u_int32_t rpc_call
, rpc_vers
, rpc_reply
, rpc_msgdenied
, rpc_autherr
,
128 rpc_mismatch
, rpc_auth_unix
, rpc_msgaccepted
,
130 u_int32_t nfs_prog
, nfs_true
, nfs_false
;
132 /* And other global data */
133 const nfstype nfsv2_type
[9] =
134 { NFNON
, NFREG
, NFDIR
, NFBLK
, NFCHR
, NFLNK
, NFNON
, NFCHR
, NFNON
};
135 const nfstype nfsv3_type
[9] =
136 { NFNON
, NFREG
, NFDIR
, NFBLK
, NFCHR
, NFLNK
, NFSOCK
, NFFIFO
, NFNON
};
137 const enum vtype nv2tov_type
[8] =
138 { VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VNON
, VNON
};
139 const enum vtype nv3tov_type
[8] =
140 { VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VSOCK
, VFIFO
};
144 /* NFS client/server stats. */
145 struct nfsstats nfsstats
;
148 * Mapping of old NFS Version 2 RPC numbers to generic numbers.
150 const int nfsv3_procid
[NFS_NPROCS
] = {
177 * and the reverse mapping from generic to Version 2 procedure numbers
179 const int nfsv2_procid
[NFS_NPROCS
] = {
206 * Maps errno values to nfs error numbers.
207 * Use NFSERR_IO as the catch all for ones not specifically defined in
210 static const u_char nfsrv_v2errmap
[ELAST
] = {
211 NFSERR_PERM
, NFSERR_NOENT
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
212 NFSERR_NXIO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
213 NFSERR_IO
, NFSERR_IO
, NFSERR_ACCES
, NFSERR_IO
, NFSERR_IO
,
214 NFSERR_IO
, NFSERR_EXIST
, NFSERR_IO
, NFSERR_NODEV
, NFSERR_NOTDIR
,
215 NFSERR_ISDIR
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
216 NFSERR_IO
, NFSERR_FBIG
, NFSERR_NOSPC
, NFSERR_IO
, NFSERR_ROFS
,
217 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
218 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
219 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
220 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
221 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
222 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
223 NFSERR_IO
, NFSERR_IO
, NFSERR_NAMETOL
, NFSERR_IO
, NFSERR_IO
,
224 NFSERR_NOTEMPTY
, NFSERR_IO
, NFSERR_IO
, NFSERR_DQUOT
, NFSERR_STALE
,
225 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
226 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
227 NFSERR_IO
, NFSERR_IO
,
231 * Maps errno values to nfs error numbers.
232 * Although it is not obvious whether or not NFS clients really care if
233 * a returned error value is in the specified list for the procedure, the
234 * safest thing to do is filter them appropriately. For Version 2, the
235 * X/Open XNFS document is the only specification that defines error values
236 * for each RPC (The RFC simply lists all possible error values for all RPCs),
237 * so I have decided to not do this for Version 2.
238 * The first entry is the default error return and the rest are the valid
239 * errors for that RPC in increasing numeric order.
241 static const short nfsv3err_null
[] = {
246 static const short nfsv3err_getattr
[] = {
255 static const short nfsv3err_setattr
[] = {
271 static const short nfsv3err_lookup
[] = {
284 static const short nfsv3err_access
[] = {
293 static const short nfsv3err_readlink
[] = {
305 static const short nfsv3err_read
[] = {
318 static const short nfsv3err_write
[] = {
334 static const short nfsv3err_create
[] = {
351 static const short nfsv3err_mkdir
[] = {
368 static const short nfsv3err_symlink
[] = {
385 static const short nfsv3err_mknod
[] = {
403 static const short nfsv3err_remove
[] = {
417 static const short nfsv3err_rmdir
[] = {
435 static const short nfsv3err_rename
[] = {
458 static const short nfsv3err_link
[] = {
478 static const short nfsv3err_readdir
[] = {
491 static const short nfsv3err_readdirplus
[] = {
505 static const short nfsv3err_fsstat
[] = {
514 static const short nfsv3err_fsinfo
[] = {
522 static const short nfsv3err_pathconf
[] = {
530 static const short nfsv3err_commit
[] = {
539 static const short * const nfsrv_v3errmap
[] = {
557 nfsv3err_readdirplus
,
564 extern struct nfsrtt nfsrtt
;
566 u_long nfsdirhashmask
;
568 int nfs_webnamei(struct nameidata
*, struct vnode
*, struct proc
*);
571 * Create the header for an rpc request packet
572 * The hsiz is the size of the rest of the nfs request header.
573 * (just used to decide if a cluster is a good idea)
576 nfsm_reqh(struct nfsnode
*np
, u_long procid
, int hsiz
, char **bposp
)
581 mb
= m_get(M_WAIT
, MT_DATA
);
582 MCLAIM(mb
, &nfs_mowner
);
583 if (hsiz
>= MINCLSIZE
)
586 bpos
= mtod(mb
, void *);
588 /* Finally, return values */
594 * Build the RPC header and fill in the authorization info.
595 * The authorization string argument is only used when the credentials
596 * come from outside of the kernel.
597 * Returns the head of the mbuf list.
600 nfsm_rpchead(kauth_cred_t cr
, int nmflag
, int procid
,
601 int auth_type
, int auth_len
, char *auth_str
, int verf_len
,
602 char *verf_str
, struct mbuf
*mrest
, int mrest_len
,
603 struct mbuf
**mbp
, uint32_t *xidp
)
610 int siz
, grpsiz
, authsiz
;
612 authsiz
= nfsm_rndup(auth_len
);
613 mb
= m_gethdr(M_WAIT
, MT_DATA
);
614 MCLAIM(mb
, &nfs_mowner
);
615 if ((authsiz
+ 10 * NFSX_UNSIGNED
) >= MINCLSIZE
) {
617 } else if ((authsiz
+ 10 * NFSX_UNSIGNED
) < MHLEN
) {
618 MH_ALIGN(mb
, authsiz
+ 10 * NFSX_UNSIGNED
);
620 MH_ALIGN(mb
, 8 * NFSX_UNSIGNED
);
624 bpos
= mtod(mb
, void *);
627 * First the RPC header.
629 nfsm_build(tl
, u_int32_t
*, 8 * NFSX_UNSIGNED
);
631 *tl
++ = *xidp
= nfs_getxid();
634 *tl
++ = txdr_unsigned(NFS_PROG
);
635 if (nmflag
& NFSMNT_NFSV3
)
636 *tl
++ = txdr_unsigned(NFS_VER3
);
638 *tl
++ = txdr_unsigned(NFS_VER2
);
639 if (nmflag
& NFSMNT_NFSV3
)
640 *tl
++ = txdr_unsigned(procid
);
642 *tl
++ = txdr_unsigned(nfsv2_procid
[procid
]);
645 * And then the authorization cred.
647 *tl
++ = txdr_unsigned(auth_type
);
648 *tl
= txdr_unsigned(authsiz
);
651 nfsm_build(tl
, u_int32_t
*, auth_len
);
652 *tl
++ = 0; /* stamp ?? */
653 *tl
++ = 0; /* NULL hostname */
654 *tl
++ = txdr_unsigned(kauth_cred_geteuid(cr
));
655 *tl
++ = txdr_unsigned(kauth_cred_getegid(cr
));
656 grpsiz
= (auth_len
>> 2) - 5;
657 *tl
++ = txdr_unsigned(grpsiz
);
658 for (i
= 0; i
< grpsiz
; i
++)
659 *tl
++ = txdr_unsigned(kauth_cred_group(cr
, i
)); /* XXX elad review */
664 if (M_TRAILINGSPACE(mb
) == 0) {
666 mb2
= m_get(M_WAIT
, MT_DATA
);
667 MCLAIM(mb2
, &nfs_mowner
);
668 if (siz
>= MINCLSIZE
)
669 m_clget(mb2
, M_WAIT
);
673 bpos
= mtod(mb
, void *);
675 i
= min(siz
, M_TRAILINGSPACE(mb
));
676 memcpy(bpos
, auth_str
, i
);
682 if ((siz
= (nfsm_rndup(auth_len
) - auth_len
)) > 0) {
683 for (i
= 0; i
< siz
; i
++)
691 * And the verifier...
693 nfsm_build(tl
, u_int32_t
*, 2 * NFSX_UNSIGNED
);
695 *tl
++ = txdr_unsigned(RPCAUTH_KERB4
);
696 *tl
= txdr_unsigned(verf_len
);
699 if (M_TRAILINGSPACE(mb
) == 0) {
701 mb2
= m_get(M_WAIT
, MT_DATA
);
702 MCLAIM(mb2
, &nfs_mowner
);
703 if (siz
>= MINCLSIZE
)
704 m_clget(mb2
, M_WAIT
);
708 bpos
= mtod(mb
, void *);
710 i
= min(siz
, M_TRAILINGSPACE(mb
));
711 memcpy(bpos
, verf_str
, i
);
717 if ((siz
= (nfsm_rndup(verf_len
) - verf_len
)) > 0) {
718 for (i
= 0; i
< siz
; i
++)
723 *tl
++ = txdr_unsigned(RPCAUTH_NULL
);
727 mreq
->m_pkthdr
.len
= authsiz
+ 10 * NFSX_UNSIGNED
+ mrest_len
;
728 mreq
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
734 * copies mbuf chain to the uio scatter/gather list
737 nfsm_mbuftouio(struct mbuf
**mrep
, struct uio
*uiop
, int siz
, char **dpos
)
739 char *mbufcp
, *uiocp
;
747 len
= mtod(mp
, char *) + mp
->m_len
- mbufcp
;
748 rem
= nfsm_rndup(siz
)-siz
;
750 if (uiop
->uio_iovcnt
<= 0 || uiop
->uio_iov
== NULL
)
752 left
= uiop
->uio_iov
->iov_len
;
753 uiocp
= uiop
->uio_iov
->iov_base
;
762 mbufcp
= mtod(mp
, void *);
765 xfer
= (left
> len
) ? len
: left
;
766 error
= copyout_vmspace(uiop
->uio_vmspace
, mbufcp
,
775 uiop
->uio_offset
+= xfer
;
776 uiop
->uio_resid
-= xfer
;
778 if (uiop
->uio_iov
->iov_len
<= siz
) {
782 uiop
->uio_iov
->iov_base
=
783 (char *)uiop
->uio_iov
->iov_base
+ uiosiz
;
784 uiop
->uio_iov
->iov_len
-= uiosiz
;
792 error
= nfs_adv(mrep
, dpos
, rem
, len
);
800 * copies a uio scatter/gather list to an mbuf chain.
801 * NOTE: can ony handle iovcnt == 1
804 nfsm_uiotombuf(struct uio
*uiop
, struct mbuf
**mq
, int siz
, char **bpos
)
807 struct mbuf
*mp
, *mp2
;
808 int xfer
, left
, mlen
;
809 int uiosiz
, clflg
, rem
;
814 if (uiop
->uio_iovcnt
!= 1)
815 panic("nfsm_uiotombuf: iovcnt != 1");
818 if (siz
> MLEN
) /* or should it >= MCLBYTES ?? */
822 rem
= nfsm_rndup(siz
)-siz
;
825 left
= uiop
->uio_iov
->iov_len
;
826 uiocp
= uiop
->uio_iov
->iov_base
;
831 mlen
= M_TRAILINGSPACE(mp
);
833 mp
= m_get(M_WAIT
, MT_DATA
);
834 MCLAIM(mp
, &nfs_mowner
);
840 mlen
= M_TRAILINGSPACE(mp
);
842 xfer
= (left
> mlen
) ? mlen
: left
;
843 cp
= mtod(mp
, char *) + mp
->m_len
;
844 error
= copyin_vmspace(uiop
->uio_vmspace
, uiocp
, cp
,
852 uiop
->uio_offset
+= xfer
;
853 uiop
->uio_resid
-= xfer
;
855 uiop
->uio_iov
->iov_base
= (char *)uiop
->uio_iov
->iov_base
+
857 uiop
->uio_iov
->iov_len
-= uiosiz
;
861 if (rem
> M_TRAILINGSPACE(mp
)) {
862 mp
= m_get(M_WAIT
, MT_DATA
);
863 MCLAIM(mp
, &nfs_mowner
);
867 cp
= mtod(mp
, char *) + mp
->m_len
;
868 for (left
= 0; left
< rem
; left
++)
873 *bpos
= mtod(mp
, char *) + mp
->m_len
;
879 * Get at least "siz" bytes of correctly aligned data.
880 * When called the mbuf pointers are not necessarily correct,
881 * dsosp points to what ought to be in m_data and left contains
882 * what ought to be in m_len.
883 * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
884 * cases. (The macros use the vars. dpos and dpos2)
887 nfsm_disct(struct mbuf
**mdp
, char **dposp
, int siz
, int left
, char **cp2
)
889 struct mbuf
*m1
, *m2
;
890 struct mbuf
*havebuf
= NULL
;
897 panic("nfsm_disct: left < 0");
901 * Skip through the mbuf chain looking for an mbuf with
902 * some data. If the first mbuf found has enough data
903 * and it is correctly aligned return it.
907 *mdp
= m1
= m1
->m_next
;
910 src
= mtod(m1
, void *);
913 * If we start a new mbuf and it is big enough
914 * and correctly aligned just return it, don't
917 if (left
>= siz
&& nfsm_aligned(src
)) {
923 if ((m1
->m_flags
& M_EXT
) != 0) {
924 if (havebuf
&& M_TRAILINGSPACE(havebuf
) >= siz
&&
925 nfsm_aligned(mtod(havebuf
, char *) + havebuf
->m_len
)) {
927 * If the first mbuf with data has external data
928 * and there is a previous mbuf with some trailing
929 * space, use it to move the data into.
933 *cp2
= mtod(m1
, char *) + m1
->m_len
;
934 } else if (havebuf
) {
936 * If the first mbuf has a external data
937 * and there is no previous empty mbuf
938 * allocate a new mbuf and move the external
939 * data to the new mbuf. Also make the first
943 *mdp
= m1
= m_get(M_WAIT
, MT_DATA
);
944 MCLAIM(m1
, m2
->m_owner
);
945 if ((m2
->m_flags
& M_PKTHDR
) != 0) {
947 M_COPY_PKTHDR(m1
, m2
);
948 m_tag_delete_chain(m2
, NULL
);
949 m2
->m_flags
&= ~M_PKTHDR
;
952 havebuf
->m_next
= m1
;
959 *cp2
= mtod(m1
, char *);
961 struct mbuf
**nextp
= &m1
->m_next
;
965 m2
= m_get(M_WAIT
, MT_DATA
);
966 MCLAIM(m2
, m1
->m_owner
);
967 if (left
>= MINCLSIZE
) {
973 len
= (m2
->m_flags
& M_EXT
) != 0 ?
978 memcpy(mtod(m2
, char *), src
, len
);
983 *mdp
= m1
= m1
->m_next
;
985 *cp2
= mtod(m1
, char *);
989 * If the first mbuf has no external data
990 * move the data to the front of the mbuf.
993 dst
= mtod(m1
, char *);
995 memmove(dst
, src
, left
);
1001 *dposp
= *cp2
+ siz
;
1003 * Loop through mbufs pulling data up into first mbuf until
1004 * the first mbuf is full or there is no more data to
1007 dst
= mtod(m1
, char *) + m1
->m_len
;
1008 while ((len
= M_TRAILINGSPACE(m1
)) != 0 && m2
) {
1009 if ((len
= min(len
, m2
->m_len
)) != 0) {
1010 memcpy(dst
, mtod(m2
, char *), len
);
1018 if (m1
->m_len
< siz
)
1024 * Advance the position in the mbuf chain.
1027 nfs_adv(struct mbuf
**mdp
, char **dposp
, int offs
, int left
)
1042 *dposp
= mtod(m
, char *) + offs
;
1047 * Copy a string into mbufs for the hard cases...
1050 nfsm_strtmbuf(struct mbuf
**mb
, char **bpos
, const char *cp
, long siz
)
1052 struct mbuf
*m1
= NULL
, *m2
;
1053 long left
, xfer
, len
, tlen
;
1059 left
= M_TRAILINGSPACE(m2
);
1061 tl
= ((u_int32_t
*)(*bpos
));
1062 *tl
++ = txdr_unsigned(siz
);
1064 left
-= NFSX_UNSIGNED
;
1065 m2
->m_len
+= NFSX_UNSIGNED
;
1067 memcpy((void *) tl
, cp
, left
);
1074 /* Loop around adding mbufs */
1076 m1
= m_get(M_WAIT
, MT_DATA
);
1077 MCLAIM(m1
, &nfs_mowner
);
1079 m_clget(m1
, M_WAIT
);
1080 m1
->m_len
= NFSMSIZ(m1
);
1083 tl
= mtod(m1
, u_int32_t
*);
1086 *tl
++ = txdr_unsigned(siz
);
1087 m1
->m_len
-= NFSX_UNSIGNED
;
1088 tlen
= NFSX_UNSIGNED
;
1091 if (siz
< m1
->m_len
) {
1092 len
= nfsm_rndup(siz
);
1095 *(tl
+(xfer
>>2)) = 0;
1097 xfer
= len
= m1
->m_len
;
1099 memcpy((void *) tl
, cp
, xfer
);
1100 m1
->m_len
= len
+tlen
;
1105 *bpos
= mtod(m1
, char *) + m1
->m_len
;
1110 * Directory caching routines. They work as follows:
1111 * - a cache is maintained per VDIR nfsnode.
1112 * - for each offset cookie that is exported to userspace, and can
1113 * thus be thrown back at us as an offset to VOP_READDIR, store
1114 * information in the cache.
1117 * - blocknumber (essentially just a search key in the buffer cache)
1118 * - entry number in block.
1119 * - offset cookie of block in which this entry is stored
1120 * - 32 bit cookie if NFSMNT_XLATECOOKIE is used.
1121 * - entries are looked up in a hash table
1122 * - also maintained is an LRU list of entries, used to determine
1123 * which ones to delete if the cache grows too large.
1124 * - if 32 <-> 64 translation mode is requested for a filesystem,
1125 * the cache also functions as a translation table
1126 * - in the translation case, invalidating the cache does not mean
1127 * flushing it, but just marking entries as invalid, except for
1128 * the <64bit cookie, 32bitcookie> pair which is still valid, to
1129 * still be able to use the cache as a translation table.
1130 * - 32 bit cookies are uniquely created by combining the hash table
1131 * entry value, and one generation count per hash table entry,
1132 * incremented each time an entry is appended to the chain.
1133 * - the cache is invalidated each time a direcory is modified
1134 * - sanity checks are also done; if an entry in a block turns
1135 * out not to have a matching cookie, the cache is invalidated
1136 * and a new block starting from the wanted offset is fetched from
1138 * - directory entries as read from the server are extended to contain
1139 * the 64bit and, optionally, the 32bit cookies, for sanity checking
1140 * the cache and exporting them to userspace through the cookie
1141 * argument to VOP_READDIR.
1145 nfs_dirhash(off_t off
)
1148 char *cp
= (char *)&off
;
1151 for (i
= 0 ; i
< sizeof (off
); i
++)
1157 #define _NFSDC_MTX(np) (&NFSTOV(np)->v_interlock)
1158 #define NFSDC_LOCK(np) mutex_enter(_NFSDC_MTX(np))
1159 #define NFSDC_UNLOCK(np) mutex_exit(_NFSDC_MTX(np))
1160 #define NFSDC_ASSERT_LOCKED(np) KASSERT(mutex_owned(_NFSDC_MTX(np)))
1163 nfs_initdircache(struct vnode
*vp
)
1165 struct nfsnode
*np
= VTONFS(vp
);
1166 struct nfsdirhashhead
*dircache
;
1168 dircache
= hashinit(NFS_DIRHASHSIZ
, HASH_LIST
, true,
1172 if (np
->n_dircache
== NULL
) {
1173 np
->n_dircachesize
= 0;
1174 np
->n_dircache
= dircache
;
1176 TAILQ_INIT(&np
->n_dirchain
);
1180 hashdone(dircache
, HASH_LIST
, nfsdirhashmask
);
1184 nfs_initdirxlatecookie(struct vnode
*vp
)
1186 struct nfsnode
*np
= VTONFS(vp
);
1189 KASSERT(VFSTONFS(vp
->v_mount
)->nm_flag
& NFSMNT_XLATECOOKIE
);
1191 dirgens
= kmem_zalloc(NFS_DIRHASHSIZ
* sizeof(unsigned), KM_SLEEP
);
1193 if (np
->n_dirgens
== NULL
) {
1194 np
->n_dirgens
= dirgens
;
1199 kmem_free(dirgens
, NFS_DIRHASHSIZ
* sizeof(unsigned));
1202 static const struct nfsdircache dzero
;
1204 static void nfs_unlinkdircache(struct nfsnode
*np
, struct nfsdircache
*);
1205 static void nfs_putdircache_unlocked(struct nfsnode
*,
1206 struct nfsdircache
*);
1209 nfs_unlinkdircache(struct nfsnode
*np
, struct nfsdircache
*ndp
)
1212 NFSDC_ASSERT_LOCKED(np
);
1213 KASSERT(ndp
!= &dzero
);
1215 if (LIST_NEXT(ndp
, dc_hash
) == (void *)-1)
1218 TAILQ_REMOVE(&np
->n_dirchain
, ndp
, dc_chain
);
1219 LIST_REMOVE(ndp
, dc_hash
);
1220 LIST_NEXT(ndp
, dc_hash
) = (void *)-1; /* mark as unlinked */
1222 nfs_putdircache_unlocked(np
, ndp
);
1226 nfs_putdircache(struct nfsnode
*np
, struct nfsdircache
*ndp
)
1233 KASSERT(ndp
->dc_refcnt
> 0);
1235 ref
= --ndp
->dc_refcnt
;
1239 kmem_free(ndp
, sizeof(*ndp
));
1243 nfs_putdircache_unlocked(struct nfsnode
*np
, struct nfsdircache
*ndp
)
1247 NFSDC_ASSERT_LOCKED(np
);
1252 KASSERT(ndp
->dc_refcnt
> 0);
1253 ref
= --ndp
->dc_refcnt
;
1255 kmem_free(ndp
, sizeof(*ndp
));
1258 struct nfsdircache
*
1259 nfs_searchdircache(struct vnode
*vp
, off_t off
, int do32
, int *hashent
)
1261 struct nfsdirhashhead
*ndhp
;
1262 struct nfsdircache
*ndp
= NULL
;
1263 struct nfsnode
*np
= VTONFS(vp
);
1267 * Zero is always a valid cookie.
1271 return (struct nfsdircache
*)__UNCONST(&dzero
);
1273 if (!np
->n_dircache
)
1277 * We use a 32bit cookie as search key, directly reconstruct
1278 * the hashentry. Else use the hashfunction.
1281 ent
= (u_int32_t
)off
>> 24;
1282 if (ent
>= NFS_DIRHASHSIZ
)
1284 ndhp
= &np
->n_dircache
[ent
];
1286 ndhp
= NFSDIRHASH(np
, off
);
1290 *hashent
= (int)(ndhp
- np
->n_dircache
);
1294 LIST_FOREACH(ndp
, ndhp
, dc_hash
) {
1295 if (ndp
->dc_cookie32
== (u_int32_t
)off
) {
1297 * An invalidated entry will become the
1298 * start of a new block fetched from
1301 if (ndp
->dc_flags
& NFSDC_INVALID
) {
1302 ndp
->dc_blkcookie
= ndp
->dc_cookie
;
1304 ndp
->dc_flags
&= ~NFSDC_INVALID
;
1310 LIST_FOREACH(ndp
, ndhp
, dc_hash
) {
1311 if (ndp
->dc_cookie
== off
)
1322 struct nfsdircache
*
1323 nfs_enterdircache(struct vnode
*vp
, off_t off
, off_t blkoff
, int en
,
1326 struct nfsnode
*np
= VTONFS(vp
);
1327 struct nfsdirhashhead
*ndhp
;
1328 struct nfsdircache
*ndp
= NULL
;
1329 struct nfsdircache
*newndp
= NULL
;
1330 struct nfsmount
*nmp
= VFSTONFS(vp
->v_mount
);
1331 int hashent
= 0, gen
, overwrite
; /* XXX: GCC */
1334 * XXX refuse entries for offset 0. amd(8) erroneously sets
1335 * cookie 0 for the '.' entry, making this necessary. This
1336 * isn't so bad, as 0 is a special case anyway.
1340 return (struct nfsdircache
*)__UNCONST(&dzero
);
1342 if (!np
->n_dircache
)
1344 * XXX would like to do this in nfs_nget but vtype
1345 * isn't known at that time.
1347 nfs_initdircache(vp
);
1349 if ((nmp
->nm_flag
& NFSMNT_XLATECOOKIE
) && !np
->n_dirgens
)
1350 nfs_initdirxlatecookie(vp
);
1353 ndp
= nfs_searchdircache(vp
, off
, 0, &hashent
);
1356 if (ndp
&& (ndp
->dc_flags
& NFSDC_INVALID
) == 0) {
1358 * Overwriting an old entry. Check if it's the same.
1359 * If so, just return. If not, remove the old entry.
1361 if (ndp
->dc_blkcookie
== blkoff
&& ndp
->dc_entry
== en
)
1363 nfs_unlinkdircache(np
, ndp
);
1364 nfs_putdircache_unlocked(np
, ndp
);
1368 ndhp
= &np
->n_dircache
[hashent
];
1371 if (newndp
== NULL
) {
1373 newndp
= kmem_alloc(sizeof(*newndp
), KM_SLEEP
);
1374 newndp
->dc_refcnt
= 1;
1375 LIST_NEXT(newndp
, dc_hash
) = (void *)-1;
1381 if (nmp
->nm_flag
& NFSMNT_XLATECOOKIE
) {
1383 * We're allocating a new entry, so bump the
1384 * generation number.
1386 KASSERT(np
->n_dirgens
);
1387 gen
= ++np
->n_dirgens
[hashent
];
1389 np
->n_dirgens
[hashent
]++;
1392 ndp
->dc_cookie32
= (hashent
<< 24) | (gen
& 0xffffff);
1397 ndp
->dc_cookie
= off
;
1398 ndp
->dc_blkcookie
= blkoff
;
1406 * If the maximum directory cookie cache size has been reached
1407 * for this node, take one off the front. The idea is that
1408 * directories are typically read front-to-back once, so that
1409 * the oldest entries can be thrown away without much performance
1412 if (np
->n_dircachesize
== NFS_MAXDIRCACHE
) {
1413 nfs_unlinkdircache(np
, TAILQ_FIRST(&np
->n_dirchain
));
1415 np
->n_dircachesize
++;
1417 KASSERT(ndp
->dc_refcnt
== 1);
1418 LIST_INSERT_HEAD(ndhp
, ndp
, dc_hash
);
1419 TAILQ_INSERT_TAIL(&np
->n_dirchain
, ndp
, dc_chain
);
1422 KASSERT(ndp
->dc_refcnt
> 0);
1425 nfs_putdircache(np
, newndp
);
1430 nfs_invaldircache(struct vnode
*vp
, int flags
)
1432 struct nfsnode
*np
= VTONFS(vp
);
1433 struct nfsdircache
*ndp
= NULL
;
1434 struct nfsmount
*nmp
= VFSTONFS(vp
->v_mount
);
1435 const bool forcefree
= flags
& NFS_INVALDIRCACHE_FORCE
;
1438 if (vp
->v_type
!= VDIR
)
1439 panic("nfs: invaldircache: not dir");
1442 if ((flags
& NFS_INVALDIRCACHE_KEEPEOF
) == 0)
1443 np
->n_flag
&= ~NEOFVALID
;
1445 if (!np
->n_dircache
)
1449 if (!(nmp
->nm_flag
& NFSMNT_XLATECOOKIE
) || forcefree
) {
1450 while ((ndp
= TAILQ_FIRST(&np
->n_dirchain
)) != NULL
) {
1451 KASSERT(!forcefree
|| ndp
->dc_refcnt
== 1);
1452 nfs_unlinkdircache(np
, ndp
);
1454 np
->n_dircachesize
= 0;
1455 if (forcefree
&& np
->n_dirgens
) {
1456 kmem_free(np
->n_dirgens
,
1457 NFS_DIRHASHSIZ
* sizeof(unsigned));
1458 np
->n_dirgens
= NULL
;
1461 TAILQ_FOREACH(ndp
, &np
->n_dirchain
, dc_chain
)
1462 ndp
->dc_flags
|= NFSDC_INVALID
;
1469 * Called once before VFS init to initialize shared and
1470 * server-specific data structures.
1477 rpc_vers
= txdr_unsigned(RPC_VER2
);
1478 rpc_call
= txdr_unsigned(RPC_CALL
);
1479 rpc_reply
= txdr_unsigned(RPC_REPLY
);
1480 rpc_msgdenied
= txdr_unsigned(RPC_MSGDENIED
);
1481 rpc_msgaccepted
= txdr_unsigned(RPC_MSGACCEPTED
);
1482 rpc_mismatch
= txdr_unsigned(RPC_MISMATCH
);
1483 rpc_autherr
= txdr_unsigned(RPC_AUTHERR
);
1484 rpc_auth_unix
= txdr_unsigned(RPCAUTH_UNIX
);
1485 rpc_auth_kerb
= txdr_unsigned(RPCAUTH_KERB4
);
1486 nfs_prog
= txdr_unsigned(NFS_PROG
);
1487 nfs_true
= txdr_unsigned(true);
1488 nfs_false
= txdr_unsigned(false);
1489 nfs_xdrneg1
= txdr_unsigned(-1);
1490 nfs_ticks
= (hz
* NFS_TICKINTVL
+ 500) / 1000;
1493 nfs_xid
= arc4random();
1497 * Initialize reply list and start timer
1499 TAILQ_INIT(&nfs_reqq
);
1501 MOWNER_ATTACH(&nfs_mowner
);
1504 /* Initialize the kqueue structures */
1506 /* Initialize the iod structures */
1514 * This is disgusting, but it must support both modular and monolothic
1515 * configurations. For monolithic builds NFSSERVER may not imply NFS.
1522 static ONCE_DECL(nfs_init_once
);
1524 RUN_ONCE(&nfs_init_once
, nfs_init0
);
1537 MOWNER_DETACH(&nfs_mowner
);
1542 * Called once at VFS init to initialize client-specific data structures.
1548 /* Initialize NFS server / client shared data. */
1552 nfs_commitsize
= uvmexp
.npages
<< (PAGE_SHIFT
- 4);
1563 * Attribute cache routines.
1564 * nfs_loadattrcache() - loads or updates the cache contents from attributes
1565 * that are on the mbuf list
1566 * nfs_getattrcache() - returns valid attributes if found in cache, returns
1571 * Load the attribute cache (that lives in the nfsnode entry) with
1572 * the values on the mbuf list and
1574 * copy the attributes to *vaper
1577 nfsm_loadattrcache(struct vnode
**vpp
, struct mbuf
**mdp
, char **dposp
, struct vattr
*vaper
, int flags
)
1583 int v3
= NFS_ISV3(*vpp
);
1586 t1
= (mtod(md
, char *) + md
->m_len
) - *dposp
;
1587 error
= nfsm_disct(mdp
, dposp
, NFSX_FATTR(v3
), t1
, &cp2
);
1590 return nfs_loadattrcache(vpp
, (struct nfs_fattr
*)cp2
, vaper
, flags
);
1594 nfs_loadattrcache(struct vnode
**vpp
, struct nfs_fattr
*fp
, struct vattr
*vaper
, int flags
)
1596 struct vnode
*vp
= *vpp
;
1598 int v3
= NFS_ISV3(vp
);
1601 struct timespec mtime
;
1602 struct timespec ctime
;
1605 extern int (**spec_nfsv2nodeop_p
)(void *);
1610 vtyp
= nfsv3tov_type(fp
->fa_type
);
1611 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
1612 rdev
= makedev(fxdr_unsigned(u_int32_t
, fp
->fa3_rdev
.specdata1
),
1613 fxdr_unsigned(u_int32_t
, fp
->fa3_rdev
.specdata2
));
1614 fxdr_nfsv3time(&fp
->fa3_mtime
, &mtime
);
1615 fxdr_nfsv3time(&fp
->fa3_ctime
, &ctime
);
1617 vtyp
= nfsv2tov_type(fp
->fa_type
);
1618 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
1619 if (vtyp
== VNON
|| vtyp
== VREG
)
1620 vtyp
= IFTOVT(vmode
);
1621 rdev
= fxdr_unsigned(int32_t, fp
->fa2_rdev
);
1622 fxdr_nfsv2time(&fp
->fa2_mtime
, &mtime
);
1623 ctime
.tv_sec
= fxdr_unsigned(u_int32_t
,
1624 fp
->fa2_ctime
.nfsv2_sec
);
1628 * Really ugly NFSv2 kludge.
1630 if (vtyp
== VCHR
&& rdev
== 0xffffffff)
1637 * If v_type == VNON it is a new node, so fill in the v_type,
1638 * n_mtime fields. Check to see if it represents a special
1639 * device, and if so, check for a possible alias. Once the
1640 * correct vnode has been obtained, fill in the rest of the
1644 if (vp
->v_type
== VNON
) {
1646 if (vp
->v_type
== VFIFO
) {
1647 extern int (**fifo_nfsv2nodeop_p
)(void *);
1648 vp
->v_op
= fifo_nfsv2nodeop_p
;
1649 } else if (vp
->v_type
== VREG
) {
1650 mutex_init(&np
->n_commitlock
, MUTEX_DEFAULT
, IPL_NONE
);
1651 } else if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
1652 vp
->v_op
= spec_nfsv2nodeop_p
;
1653 spec_node_init(vp
, (dev_t
)rdev
);
1655 np
->n_mtime
= mtime
;
1657 uid
= fxdr_unsigned(uid_t
, fp
->fa_uid
);
1658 gid
= fxdr_unsigned(gid_t
, fp
->fa_gid
);
1662 * Invalidate access cache if uid, gid, mode or ctime changed.
1664 if (np
->n_accstamp
!= -1 &&
1665 (gid
!= vap
->va_gid
|| uid
!= vap
->va_uid
|| vmode
!= vap
->va_mode
1666 || timespeccmp(&ctime
, &vap
->va_ctime
, !=)))
1667 np
->n_accstamp
= -1;
1669 vap
->va_type
= vtyp
;
1670 vap
->va_mode
= vmode
;
1671 vap
->va_rdev
= (dev_t
)rdev
;
1672 vap
->va_mtime
= mtime
;
1673 vap
->va_ctime
= ctime
;
1674 vap
->va_birthtime
.tv_sec
= VNOVAL
;
1675 vap
->va_birthtime
.tv_nsec
= VNOVAL
;
1676 vap
->va_fsid
= vp
->v_mount
->mnt_stat
.f_fsidx
.__fsid_val
[0];
1679 vap
->va_blocksize
= NFS_DIRFRAGSIZ
;
1682 vap
->va_blocksize
= BLKDEV_IOSIZE
;
1685 vap
->va_blocksize
= MAXBSIZE
;
1688 vap
->va_blocksize
= v3
? vp
->v_mount
->mnt_stat
.f_iosize
:
1689 fxdr_unsigned(int32_t, fp
->fa2_blocksize
);
1693 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
1696 vap
->va_size
= fxdr_hyper(&fp
->fa3_size
);
1697 vap
->va_bytes
= fxdr_hyper(&fp
->fa3_used
);
1698 vap
->va_fileid
= fxdr_hyper(&fp
->fa3_fileid
);
1699 fxdr_nfsv3time(&fp
->fa3_atime
, &vap
->va_atime
);
1701 vap
->va_filerev
= 0;
1703 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
1706 vap
->va_size
= fxdr_unsigned(u_int32_t
, fp
->fa2_size
);
1707 vap
->va_bytes
= fxdr_unsigned(int32_t, fp
->fa2_blocks
)
1709 vap
->va_fileid
= fxdr_unsigned(int32_t, fp
->fa2_fileid
);
1710 fxdr_nfsv2time(&fp
->fa2_atime
, &vap
->va_atime
);
1712 vap
->va_gen
= fxdr_unsigned(u_int32_t
,fp
->fa2_ctime
.nfsv2_usec
);
1713 vap
->va_filerev
= 0;
1715 if (vap
->va_size
> VFSTONFS(vp
->v_mount
)->nm_maxfilesize
) {
1718 if (vap
->va_size
!= np
->n_size
) {
1719 if ((np
->n_flag
& NMODIFIED
) && vap
->va_size
< np
->n_size
) {
1720 vap
->va_size
= np
->n_size
;
1722 np
->n_size
= vap
->va_size
;
1723 if (vap
->va_type
== VREG
) {
1725 * we can't free pages if NAC_NOTRUNC because
1726 * the pages can be owned by ourselves.
1728 if (flags
& NAC_NOTRUNC
) {
1729 np
->n_flag
|= NTRUNCDELAYED
;
1731 genfs_node_wrlock(vp
);
1732 mutex_enter(&vp
->v_interlock
);
1733 (void)VOP_PUTPAGES(vp
, 0,
1734 0, PGO_SYNCIO
| PGO_CLEANIT
|
1735 PGO_FREE
| PGO_ALLPAGES
);
1736 uvm_vnp_setsize(vp
, np
->n_size
);
1737 genfs_node_unlock(vp
);
1742 np
->n_attrstamp
= time_second
;
1743 if (vaper
!= NULL
) {
1744 memcpy((void *)vaper
, (void *)vap
, sizeof(*vap
));
1745 if (np
->n_flag
& NCHG
) {
1746 if (np
->n_flag
& NACC
)
1747 vaper
->va_atime
= np
->n_atim
;
1748 if (np
->n_flag
& NUPD
)
1749 vaper
->va_mtime
= np
->n_mtim
;
1756 * Check the time stamp
1757 * If the cache is valid, copy contents to *vap and return 0
1758 * otherwise return an error
1761 nfs_getattrcache(struct vnode
*vp
, struct vattr
*vaper
)
1763 struct nfsnode
*np
= VTONFS(vp
);
1764 struct nfsmount
*nmp
= VFSTONFS(vp
->v_mount
);
1767 if (np
->n_attrstamp
== 0 ||
1768 (time_second
- np
->n_attrstamp
) >= nfs_attrtimeo(nmp
, np
)) {
1769 nfsstats
.attrcache_misses
++;
1772 nfsstats
.attrcache_hits
++;
1774 if (vap
->va_size
!= np
->n_size
) {
1775 if (vap
->va_type
== VREG
) {
1776 if ((np
->n_flag
& NMODIFIED
) != 0 &&
1777 vap
->va_size
< np
->n_size
) {
1778 vap
->va_size
= np
->n_size
;
1780 np
->n_size
= vap
->va_size
;
1782 genfs_node_wrlock(vp
);
1783 uvm_vnp_setsize(vp
, np
->n_size
);
1784 genfs_node_unlock(vp
);
1786 np
->n_size
= vap
->va_size
;
1788 memcpy((void *)vaper
, (void *)vap
, sizeof(struct vattr
));
1789 if (np
->n_flag
& NCHG
) {
1790 if (np
->n_flag
& NACC
)
1791 vaper
->va_atime
= np
->n_atim
;
1792 if (np
->n_flag
& NUPD
)
1793 vaper
->va_mtime
= np
->n_mtim
;
1799 nfs_delayedtruncate(struct vnode
*vp
)
1801 struct nfsnode
*np
= VTONFS(vp
);
1803 if (np
->n_flag
& NTRUNCDELAYED
) {
1804 np
->n_flag
&= ~NTRUNCDELAYED
;
1805 genfs_node_wrlock(vp
);
1806 mutex_enter(&vp
->v_interlock
);
1807 (void)VOP_PUTPAGES(vp
, 0,
1808 0, PGO_SYNCIO
| PGO_CLEANIT
| PGO_FREE
| PGO_ALLPAGES
);
1809 uvm_vnp_setsize(vp
, np
->n_size
);
1810 genfs_node_unlock(vp
);
1814 #define NFS_WCCKLUDGE_TIMEOUT (24 * 60 * 60) /* 1 day */
1815 #define NFS_WCCKLUDGE(nmp, now) \
1816 (((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \
1817 ((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0)
1820 * nfs_check_wccdata: check inaccurate wcc_data
1822 * => return non-zero if we shouldn't trust the wcc_data.
1823 * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed".
1827 nfs_check_wccdata(struct nfsnode
*np
, const struct timespec
*ctime
,
1828 struct timespec
*mtime
, bool docheck
)
1832 #if !defined(NFS_V2_ONLY)
1835 struct vnode
*vp
= NFSTOV(np
);
1836 struct nfsmount
*nmp
;
1837 long now
= time_second
;
1838 const struct timespec
*omtime
= &np
->n_vattr
->va_mtime
;
1839 const struct timespec
*octime
= &np
->n_vattr
->va_ctime
;
1840 const char *reason
= NULL
; /* XXX: gcc */
1842 if (timespeccmp(omtime
, mtime
, <=)) {
1847 if (vp
->v_type
== VDIR
&& timespeccmp(octime
, ctime
, <=)) {
1852 nmp
= VFSTONFS(vp
->v_mount
);
1856 * despite of the fact that we've updated the file,
1857 * timestamps of the file were not updated as we
1859 * it means that the server has incompatible
1860 * semantics of timestamps or (more likely)
1861 * the server time is not precise enough to
1862 * track each modifications.
1863 * in that case, we disable wcc processing.
1865 * yes, strictly speaking, we should disable all
1866 * caching. it's a compromise.
1869 mutex_enter(&nmp
->nm_lock
);
1870 if (!NFS_WCCKLUDGE(nmp
, now
)) {
1871 printf("%s: inaccurate wcc data (%s) detected,"
1873 " (ctime %u.%09u %u.%09u,"
1874 " mtime %u.%09u %u.%09u)\n",
1875 vp
->v_mount
->mnt_stat
.f_mntfromname
,
1877 (unsigned int)octime
->tv_sec
,
1878 (unsigned int)octime
->tv_nsec
,
1879 (unsigned int)ctime
->tv_sec
,
1880 (unsigned int)ctime
->tv_nsec
,
1881 (unsigned int)omtime
->tv_sec
,
1882 (unsigned int)omtime
->tv_nsec
,
1883 (unsigned int)mtime
->tv_sec
,
1884 (unsigned int)mtime
->tv_nsec
);
1886 nmp
->nm_iflag
|= NFSMNT_WCCKLUDGE
;
1887 nmp
->nm_wcckludgetime
= now
;
1888 mutex_exit(&nmp
->nm_lock
);
1889 } else if (NFS_WCCKLUDGE(nmp
, now
)) {
1890 error
= EPERM
; /* XXX */
1891 } else if (nmp
->nm_iflag
& NFSMNT_WCCKLUDGE
) {
1892 mutex_enter(&nmp
->nm_lock
);
1893 if (nmp
->nm_iflag
& NFSMNT_WCCKLUDGE
) {
1894 printf("%s: re-enabling wcc\n",
1895 vp
->v_mount
->mnt_stat
.f_mntfromname
);
1896 nmp
->nm_iflag
&= ~NFSMNT_WCCKLUDGE
;
1898 mutex_exit(&nmp
->nm_lock
);
1902 #endif /* !defined(NFS_V2_ONLY) */
1908 * Heuristic to see if the server XDR encodes directory cookies or not.
1909 * it is not supposed to, but a lot of servers may do this. Also, since
1910 * most/all servers will implement V2 as well, it is expected that they
1911 * may return just 32 bits worth of cookie information, so we need to
1912 * find out in which 32 bits this information is available. We do this
1913 * to avoid trouble with emulated binaries that can't handle 64 bit
1914 * directory offsets.
1918 nfs_cookieheuristic(struct vnode
*vp
, int *flagp
, struct lwp
*l
, kauth_cred_t cred
)
1924 off_t
*cookies
= NULL
, *cop
;
1925 int error
, eof
, nc
, len
;
1927 tbuf
= malloc(NFS_DIRFRAGSIZ
, M_TEMP
, M_WAITOK
);
1929 aiov
.iov_base
= tbuf
;
1930 aiov
.iov_len
= NFS_DIRFRAGSIZ
;
1931 auio
.uio_iov
= &aiov
;
1932 auio
.uio_iovcnt
= 1;
1933 auio
.uio_rw
= UIO_READ
;
1934 auio
.uio_resid
= NFS_DIRFRAGSIZ
;
1935 auio
.uio_offset
= 0;
1936 UIO_SETUP_SYSSPACE(&auio
);
1938 error
= VOP_READDIR(vp
, &auio
, cred
, &eof
, &cookies
, &nc
);
1940 len
= NFS_DIRFRAGSIZ
- auio
.uio_resid
;
1941 if (error
|| len
== 0) {
1944 free(cookies
, M_TEMP
);
1949 * Find the first valid entry and look at its offset cookie.
1953 for (cop
= cookies
; len
> 0; len
-= dp
->d_reclen
) {
1954 dp
= (struct dirent
*)cp
;
1955 if (dp
->d_fileno
!= 0 && len
>= dp
->d_reclen
) {
1956 if ((*cop
>> 32) != 0 && (*cop
& 0xffffffffLL
) == 0) {
1957 *flagp
|= NFSMNT_SWAPCOOKIE
;
1958 nfs_invaldircache(vp
, 0);
1959 nfs_vinvalbuf(vp
, 0, cred
, l
, 1);
1968 free(cookies
, M_TEMP
);
1973 * A fiddled version of m_adj() that ensures null fill to a 32-bit
1974 * boundary and only trims off the back end
1976 * 1. trim off 'len' bytes as m_adj(mp, -len).
1977 * 2. add zero-padding 'nul' bytes at the end of the mbuf chain.
1980 nfs_zeropad(struct mbuf
*mp
, int len
, int nul
)
1986 * Trim from tail. Scan the mbuf chain,
1987 * calculating its length and finding the last mbuf.
1988 * If the adjustment only affects this mbuf, then just
1989 * adjust and return. Otherwise, rescan and truncate
1990 * after the remaining size.
1996 if (m
->m_next
== NULL
)
2001 KDASSERT(count
>= len
);
2003 if (m
->m_len
>= len
) {
2008 * Correct length for chain is "count".
2009 * Find the mbuf with last data, adjust its length,
2010 * and toss data from remaining mbufs on chain.
2012 for (m
= mp
; m
; m
= m
->m_next
) {
2013 if (m
->m_len
>= count
) {
2019 KASSERT(m
&& m
->m_next
);
2024 KDASSERT(m
->m_next
== NULL
);
2033 if (M_ROMAP(m
) || M_TRAILINGSPACE(m
) < nul
) {
2036 KDASSERT(MLEN
>= nul
);
2037 n
= m_get(M_WAIT
, MT_DATA
);
2038 MCLAIM(n
, &nfs_mowner
);
2042 cp
= mtod(n
, void *);
2044 cp
= mtod(m
, char *) + m
->m_len
;
2047 for (i
= 0; i
< nul
; i
++)
2054 * Make these functions instead of macros, so that the kernel text size
2055 * doesn't get too big...
2058 nfsm_srvwcc(struct nfsrv_descript
*nfsd
, int before_ret
, struct vattr
*before_vap
, int after_ret
, struct vattr
*after_vap
, struct mbuf
**mbp
, char **bposp
)
2060 struct mbuf
*mb
= *mbp
;
2061 char *bpos
= *bposp
;
2065 nfsm_build(tl
, u_int32_t
*, NFSX_UNSIGNED
);
2068 nfsm_build(tl
, u_int32_t
*, 7 * NFSX_UNSIGNED
);
2070 txdr_hyper(before_vap
->va_size
, tl
);
2072 txdr_nfsv3time(&(before_vap
->va_mtime
), tl
);
2074 txdr_nfsv3time(&(before_vap
->va_ctime
), tl
);
2078 nfsm_srvpostopattr(nfsd
, after_ret
, after_vap
, mbp
, bposp
);
2082 nfsm_srvpostopattr(struct nfsrv_descript
*nfsd
, int after_ret
, struct vattr
*after_vap
, struct mbuf
**mbp
, char **bposp
)
2084 struct mbuf
*mb
= *mbp
;
2085 char *bpos
= *bposp
;
2087 struct nfs_fattr
*fp
;
2090 nfsm_build(tl
, u_int32_t
*, NFSX_UNSIGNED
);
2093 nfsm_build(tl
, u_int32_t
*, NFSX_UNSIGNED
+ NFSX_V3FATTR
);
2095 fp
= (struct nfs_fattr
*)tl
;
2096 nfsm_srvfattr(nfsd
, after_vap
, fp
);
2103 nfsm_srvfattr(struct nfsrv_descript
*nfsd
, struct vattr
*vap
, struct nfs_fattr
*fp
)
2106 fp
->fa_nlink
= txdr_unsigned(vap
->va_nlink
);
2107 fp
->fa_uid
= txdr_unsigned(vap
->va_uid
);
2108 fp
->fa_gid
= txdr_unsigned(vap
->va_gid
);
2109 if (nfsd
->nd_flag
& ND_NFSV3
) {
2110 fp
->fa_type
= vtonfsv3_type(vap
->va_type
);
2111 fp
->fa_mode
= vtonfsv3_mode(vap
->va_mode
);
2112 txdr_hyper(vap
->va_size
, &fp
->fa3_size
);
2113 txdr_hyper(vap
->va_bytes
, &fp
->fa3_used
);
2114 fp
->fa3_rdev
.specdata1
= txdr_unsigned(major(vap
->va_rdev
));
2115 fp
->fa3_rdev
.specdata2
= txdr_unsigned(minor(vap
->va_rdev
));
2116 fp
->fa3_fsid
.nfsuquad
[0] = 0;
2117 fp
->fa3_fsid
.nfsuquad
[1] = txdr_unsigned(vap
->va_fsid
);
2118 txdr_hyper(vap
->va_fileid
, &fp
->fa3_fileid
);
2119 txdr_nfsv3time(&vap
->va_atime
, &fp
->fa3_atime
);
2120 txdr_nfsv3time(&vap
->va_mtime
, &fp
->fa3_mtime
);
2121 txdr_nfsv3time(&vap
->va_ctime
, &fp
->fa3_ctime
);
2123 fp
->fa_type
= vtonfsv2_type(vap
->va_type
);
2124 fp
->fa_mode
= vtonfsv2_mode(vap
->va_type
, vap
->va_mode
);
2125 fp
->fa2_size
= txdr_unsigned(vap
->va_size
);
2126 fp
->fa2_blocksize
= txdr_unsigned(vap
->va_blocksize
);
2127 if (vap
->va_type
== VFIFO
)
2128 fp
->fa2_rdev
= 0xffffffff;
2130 fp
->fa2_rdev
= txdr_unsigned(vap
->va_rdev
);
2131 fp
->fa2_blocks
= txdr_unsigned(vap
->va_bytes
/ NFS_FABLKSIZE
);
2132 fp
->fa2_fsid
= txdr_unsigned(vap
->va_fsid
);
2133 fp
->fa2_fileid
= txdr_unsigned(vap
->va_fileid
);
2134 txdr_nfsv2time(&vap
->va_atime
, &fp
->fa2_atime
);
2135 txdr_nfsv2time(&vap
->va_mtime
, &fp
->fa2_mtime
);
2136 txdr_nfsv2time(&vap
->va_ctime
, &fp
->fa2_ctime
);
2141 * This function compares two net addresses by family and returns true
2142 * if they are the same host.
2143 * If there is any doubt, return false.
2144 * The AF_INET family is handled as a special case so that address mbufs
2145 * don't need to be saved to store "struct in_addr", which is only 4 bytes.
2148 netaddr_match(int family
, union nethostaddr
*haddr
, struct mbuf
*nam
)
2150 struct sockaddr_in
*inetaddr
;
2154 inetaddr
= mtod(nam
, struct sockaddr_in
*);
2155 if (inetaddr
->sin_family
== AF_INET
&&
2156 inetaddr
->sin_addr
.s_addr
== haddr
->had_inetaddr
)
2161 struct sockaddr_in6
*sin6_1
, *sin6_2
;
2163 sin6_1
= mtod(nam
, struct sockaddr_in6
*);
2164 sin6_2
= mtod(haddr
->had_nam
, struct sockaddr_in6
*);
2165 if (sin6_1
->sin6_family
== AF_INET6
&&
2166 IN6_ARE_ADDR_EQUAL(&sin6_1
->sin6_addr
, &sin6_2
->sin6_addr
))
2176 * The write verifier has changed (probably due to a server reboot), so all
2177 * PG_NEEDCOMMIT pages will have to be written again. Since they are marked
2178 * as dirty or are being written out just now, all this takes is clearing
2179 * the PG_NEEDCOMMIT flag. Once done the new write verifier can be set for
2183 nfs_clearcommit(struct mount
*mp
)
2188 struct nfsmount
*nmp
= VFSTONFS(mp
);
2190 rw_enter(&nmp
->nm_writeverflock
, RW_WRITER
);
2191 mutex_enter(&mntvnode_lock
);
2192 TAILQ_FOREACH(vp
, &mp
->mnt_vnodelist
, v_mntvnodes
) {
2193 KASSERT(vp
->v_mount
== mp
);
2194 if (vp
->v_type
!= VREG
)
2196 mutex_enter(&vp
->v_interlock
);
2197 if (vp
->v_iflag
& (VI_XLOCK
| VI_CLEAN
)) {
2198 mutex_exit(&vp
->v_interlock
);
2202 np
->n_pushlo
= np
->n_pushhi
= np
->n_pushedlo
=
2204 np
->n_commitflags
&=
2205 ~(NFS_COMMIT_PUSH_VALID
| NFS_COMMIT_PUSHED_VALID
);
2206 TAILQ_FOREACH(pg
, &vp
->v_uobj
.memq
, listq
.queue
) {
2207 pg
->flags
&= ~PG_NEEDCOMMIT
;
2209 mutex_exit(&vp
->v_interlock
);
2211 mutex_exit(&mntvnode_lock
);
2212 mutex_enter(&nmp
->nm_lock
);
2213 nmp
->nm_iflag
&= ~NFSMNT_STALEWRITEVERF
;
2214 mutex_exit(&nmp
->nm_lock
);
2215 rw_exit(&nmp
->nm_writeverflock
);
2219 nfs_merge_commit_ranges(struct vnode
*vp
)
2221 struct nfsnode
*np
= VTONFS(vp
);
2223 KASSERT(np
->n_commitflags
& NFS_COMMIT_PUSH_VALID
);
2225 if (!(np
->n_commitflags
& NFS_COMMIT_PUSHED_VALID
)) {
2226 np
->n_pushedlo
= np
->n_pushlo
;
2227 np
->n_pushedhi
= np
->n_pushhi
;
2228 np
->n_commitflags
|= NFS_COMMIT_PUSHED_VALID
;
2230 if (np
->n_pushlo
< np
->n_pushedlo
)
2231 np
->n_pushedlo
= np
->n_pushlo
;
2232 if (np
->n_pushhi
> np
->n_pushedhi
)
2233 np
->n_pushedhi
= np
->n_pushhi
;
2236 np
->n_pushlo
= np
->n_pushhi
= 0;
2237 np
->n_commitflags
&= ~NFS_COMMIT_PUSH_VALID
;
2239 #ifdef NFS_DEBUG_COMMIT
2240 printf("merge: committed: %u - %u\n", (unsigned)np
->n_pushedlo
,
2241 (unsigned)np
->n_pushedhi
);
2246 nfs_in_committed_range(struct vnode
*vp
, off_t off
, off_t len
)
2248 struct nfsnode
*np
= VTONFS(vp
);
2251 if (!(np
->n_commitflags
& NFS_COMMIT_PUSHED_VALID
))
2256 return (lo
>= np
->n_pushedlo
&& hi
<= np
->n_pushedhi
);
2260 nfs_in_tobecommitted_range(struct vnode
*vp
, off_t off
, off_t len
)
2262 struct nfsnode
*np
= VTONFS(vp
);
2265 if (!(np
->n_commitflags
& NFS_COMMIT_PUSH_VALID
))
2270 return (lo
>= np
->n_pushlo
&& hi
<= np
->n_pushhi
);
2274 nfs_add_committed_range(struct vnode
*vp
, off_t off
, off_t len
)
2276 struct nfsnode
*np
= VTONFS(vp
);
2282 if (!(np
->n_commitflags
& NFS_COMMIT_PUSHED_VALID
)) {
2283 np
->n_pushedlo
= lo
;
2284 np
->n_pushedhi
= hi
;
2285 np
->n_commitflags
|= NFS_COMMIT_PUSHED_VALID
;
2287 if (hi
> np
->n_pushedhi
)
2288 np
->n_pushedhi
= hi
;
2289 if (lo
< np
->n_pushedlo
)
2290 np
->n_pushedlo
= lo
;
2292 #ifdef NFS_DEBUG_COMMIT
2293 printf("add: committed: %u - %u\n", (unsigned)np
->n_pushedlo
,
2294 (unsigned)np
->n_pushedhi
);
2299 nfs_del_committed_range(struct vnode
*vp
, off_t off
, off_t len
)
2301 struct nfsnode
*np
= VTONFS(vp
);
2304 if (!(np
->n_commitflags
& NFS_COMMIT_PUSHED_VALID
))
2310 if (lo
> np
->n_pushedhi
|| hi
< np
->n_pushedlo
)
2312 if (lo
<= np
->n_pushedlo
)
2313 np
->n_pushedlo
= hi
;
2314 else if (hi
>= np
->n_pushedhi
)
2315 np
->n_pushedhi
= lo
;
2318 * XXX There's only one range. If the deleted range
2319 * is in the middle, pick the largest of the
2320 * contiguous ranges that it leaves.
2322 if ((np
->n_pushedlo
- lo
) > (hi
- np
->n_pushedhi
))
2323 np
->n_pushedhi
= lo
;
2325 np
->n_pushedlo
= hi
;
2327 #ifdef NFS_DEBUG_COMMIT
2328 printf("del: committed: %u - %u\n", (unsigned)np
->n_pushedlo
,
2329 (unsigned)np
->n_pushedhi
);
2334 nfs_add_tobecommitted_range(struct vnode
*vp
, off_t off
, off_t len
)
2336 struct nfsnode
*np
= VTONFS(vp
);
2342 if (!(np
->n_commitflags
& NFS_COMMIT_PUSH_VALID
)) {
2345 np
->n_commitflags
|= NFS_COMMIT_PUSH_VALID
;
2347 if (lo
< np
->n_pushlo
)
2349 if (hi
> np
->n_pushhi
)
2352 #ifdef NFS_DEBUG_COMMIT
2353 printf("add: tobecommitted: %u - %u\n", (unsigned)np
->n_pushlo
,
2354 (unsigned)np
->n_pushhi
);
2359 nfs_del_tobecommitted_range(struct vnode
*vp
, off_t off
, off_t len
)
2361 struct nfsnode
*np
= VTONFS(vp
);
2364 if (!(np
->n_commitflags
& NFS_COMMIT_PUSH_VALID
))
2370 if (lo
> np
->n_pushhi
|| hi
< np
->n_pushlo
)
2373 if (lo
<= np
->n_pushlo
)
2375 else if (hi
>= np
->n_pushhi
)
2379 * XXX There's only one range. If the deleted range
2380 * is in the middle, pick the largest of the
2381 * contiguous ranges that it leaves.
2383 if ((np
->n_pushlo
- lo
) > (hi
- np
->n_pushhi
))
2388 #ifdef NFS_DEBUG_COMMIT
2389 printf("del: tobecommitted: %u - %u\n", (unsigned)np
->n_pushlo
,
2390 (unsigned)np
->n_pushhi
);
2395 * Map errnos to NFS error numbers. For Version 3 also filter out error
2396 * numbers not specified for the associated procedure.
2399 nfsrv_errmap(struct nfsrv_descript
*nd
, int err
)
2401 const short *defaulterrp
, *errp
;
2403 if (nd
->nd_flag
& ND_NFSV3
) {
2404 if (nd
->nd_procnum
<= NFSPROC_COMMIT
) {
2405 errp
= defaulterrp
= nfsrv_v3errmap
[nd
->nd_procnum
];
2409 else if (*errp
> err
)
2412 return ((int)*defaulterrp
);
2414 return (err
& 0xffff);
2417 return ((int)nfsrv_v2errmap
[err
- 1]);
2426 /* get next xid. skip 0 */
2428 newxid
= atomic_inc_32_nv(&nfs_xid
);
2429 } while (__predict_false(newxid
== 0));
2431 return txdr_unsigned(newxid
);
2435 * assign a new xid for existing request.
2436 * used for NFSERR_JUKEBOX handling.
2439 nfs_renewxid(struct nfsreq
*req
)
2445 if (req
->r_nmp
->nm_sotype
== SOCK_STREAM
)
2446 off
= sizeof(u_int32_t
); /* RPC record mark */
2450 m_copyback(req
->r_mreq
, off
, sizeof(xid
), (void *)&xid
);
2456 * Set the attribute timeout based on how recently the file has been modified.
2460 nfs_attrtimeo(struct nfsmount
*nmp
, struct nfsnode
*np
)
2464 if ((nmp
->nm_flag
& NFSMNT_NOAC
) != 0)
2467 if (((np
)->n_flag
& NMODIFIED
) != 0)
2468 return NFS_MINATTRTIMO
;
2470 timeo
= (time_second
- np
->n_mtime
.tv_sec
) / 10;
2471 timeo
= max(timeo
, NFS_MINATTRTIMO
);
2472 timeo
= min(timeo
, NFS_MAXATTRTIMO
);
2475 #endif /* defined(NFS) */