1 /* $NetBSD: nfs.h,v 1.69 2007/12/04 17:42:30 yamt Exp $ */
3 * Copyright (c) 1989, 1993, 1995
4 * The Regents of the University of California. All rights reserved.
6 * This code is derived from software contributed to Berkeley by
7 * Rick Macklem at The University of Guelph.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)nfs.h 8.4 (Berkeley) 5/1/95
40 #include <sys/condvar.h>
41 #include <sys/fstypes.h>
43 #include <sys/mutex.h>
47 * Tunable constants for nfs
50 #define NFS_MAXIOVEC 34
51 #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */
52 #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */
53 #define NFS_TIMEO (3 * NFS_HZ) /* Default timeout = 3 seconds */
54 #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */
55 #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */
56 #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/
57 #define NFS_MAXREXMIT 100 /* Stop counting after this many */
58 #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
59 #define NFS_RETRANS 10 /* Num of retrans for soft mounts */
60 #define NFS_MAXGRPS 16 /* Max. size of groups list */
61 #ifndef NFS_MINATTRTIMO
62 #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
64 #ifndef NFS_MAXATTRTIMO
65 #define NFS_MAXATTRTIMO 60
67 #define NFS_TRYLATERDEL 1 /* Initial try later delay (sec) */
68 #define NFS_TRYLATERDELMAX (1*60) /* Maximum try later delay (sec) */
69 #define NFS_TRYLATERDELMUL 2 /* Exponential backoff multiplier */
72 * These can be overridden through <machine/param.h>, included via
73 * <sys/param.h>. This means that <sys/param.h> should always be
74 * included before this file.
77 #define NFS_WSIZE 8192 /* Def. write data size */
80 #define NFS_RSIZE 8192 /* Def. read data size */
82 #ifndef NFS_READDIRSIZE
83 #define NFS_READDIRSIZE 8192 /* Def. readdir size */
87 * NFS client IO daemon threads. May be overridden by config options.
89 #ifndef NFS_MAXASYNCDAEMON
90 #define NFS_MAXASYNCDAEMON 128 /* Max. number async_daemons runable */
94 * NFS client read-ahead. May be overridden by config options.
95 * Should be no more than NFS_MAXASYNCDAEMON as each read-ahead operation
96 * requires one IO thread.
99 #define NFS_MAXRAHEAD 32 /* Max. read ahead # blocks */
101 #define NFS_DEFRAHEAD 2 /* Def. read ahead # blocks */
103 #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
105 #define NFS_DEFDEADTHRESH NFS_NEVERDEAD /* Default nm_deadthresh */
106 #define NFS_NEVERDEAD 9 /* Greater than max. nm_timeouts */
109 extern int nfs_niothreads
; /* Number of async_daemons desired */
110 #ifndef NFS_DEFAULT_NIOTHREADS
111 #define NFS_DEFAULT_NIOTHREADS 4
114 #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */
115 #ifndef NFS_GATHERDELAY
116 #define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */
120 * NFS_DIRBLKSIZ is the size of buffers in the buffer cache used for
121 * NFS directory vnodes. NFS_DIRFRAGSIZ is the minimum aligned amount
122 * of data in those buffers, and thus the minimum amount of data
123 * that you can request. NFS_DIRFRAGSIZ should be no smaller than
127 #define NFS_DIRBLKSIZ 8192 /* Must be a multiple of DIRBLKSIZ */
128 #define NFS_DIRFRAGSIZ 512 /* Same as DIRBLKSIZ, generally */
131 * Maximum number of directory entries cached per NFS node, to avoid
132 * having this grow without bounds on very large directories. The
133 * minimum size to get reasonable performance for emulated binaries
134 * is the maximum number of entries that fits in NFS_DIRBLKSIZ.
135 * For NFS_DIRBLKSIZ = 512, this would be 512 / 14 = 36.
137 #define NFS_MAXDIRCACHE (NFS_DIRBLKSIZ / 14)
142 #define NFS_CMPFH(n, f, s) \
143 ((n)->n_fhsize == (s) && !memcmp((void *)(n)->n_fhp, (void *)(f), (s)))
145 #define NFS_ISV3(v) (0)
147 #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3)
149 #define NFS_SRVMAXDATA(n) \
150 (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \
151 NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA)
154 * Use the vm_page flag reserved for pager use to indicate pages
155 * which have been written to the server but not yet committed.
157 #define PG_NEEDCOMMIT PG_PAGER1
160 * The IO_METASYNC flag should be implemented for local file systems.
161 * (Until then, it is nothin at all.)
164 #define IO_METASYNC 0
168 * Export arguments for local filesystem mount calls.
169 * Keep in mind that changing this structure modifies nfssvc(2)'s ABI (see
170 * 'struct mountd_exports_list' below).
171 * When modifying this structure, take care to also edit the
172 * nfs_update_exports_30 function in nfs_export.c accordingly to convert
173 * export_args to export_args30.
176 int ex_flags
; /* export related flags */
177 uid_t ex_root
; /* mapping for root uid */
178 struct uucred ex_anon
; /* mapping for anonymous user */
179 struct sockaddr
*ex_addr
; /* net address to which exported */
180 int ex_addrlen
; /* and the net address length */
181 struct sockaddr
*ex_mask
; /* mask of valid bits in saddr */
182 int ex_masklen
; /* and the smask length */
183 char *ex_indexfile
; /* index file for WebNFS URLs */
187 * Structures for the nfssvc(2) syscall. Not that anyone but mountd, nfsd and
188 * mount_nfs should ever try and use it.
191 int sock
; /* Socket to serve */
192 void * name
; /* Client addr for connection based sockets */
193 int namelen
; /* Length of name */
196 struct nfsd_srvargs
{
197 struct nfsd
*nsd_nfsd
; /* Pointer to in kernel nfsd struct */
198 uid_t nsd_uid
; /* Effective uid mapped to cred */
199 u_int32_t nsd_haddr
; /* Ip address of client */
200 struct uucred nsd_cr
; /* Cred. uid maps to */
201 int nsd_authlen
; /* Length of auth string (ret) */
202 u_char
*nsd_authstr
; /* Auth string (ret) */
203 int nsd_verflen
; /* and the verfier */
205 struct timeval nsd_timestamp
; /* timestamp from verifier */
206 u_int32_t nsd_ttl
; /* credential ttl (sec) */
207 NFSKERBKEY_T nsd_key
; /* Session key */
211 char *ncd_dirp
; /* Mount dir path */
212 uid_t ncd_authuid
; /* Effective uid */
213 int ncd_authtype
; /* Type of authenticator */
214 u_int ncd_authlen
; /* Length of authenticator string */
215 u_char
*ncd_authstr
; /* Authenticator string */
216 u_int ncd_verflen
; /* and the verifier */
218 NFSKERBKEY_T ncd_key
; /* Session key */
221 struct mountd_exports_list
{
222 const char *mel_path
;
224 struct export_args
*mel_exports
;
228 * try to keep nfsstats, which is exposed to userland via sysctl,
229 * compatible after NQNFS removal.
230 * 26 is the old value of NFS_NPROCS, which includes NQNFS procedures.
232 #define NFSSTATS_NPROCS 26
239 int attrcache_misses
;
240 int lookupcache_hits
;
241 int lookupcache_misses
;
242 int direofcache_hits
;
243 int direofcache_misses
;
250 int biocache_readlinks
;
252 int biocache_readdirs
;
254 int rpccnt
[NFSSTATS_NPROCS
];
256 int srvrpccnt
[NFSSTATS_NPROCS
];
263 int srvcache_inproghits
;
264 int srvcache_idemdonehits
;
265 int srvcache_nonidemdonehits
;
267 int __srvnqnfs_leases
; /* unused */
268 int __srvnqnfs_maxleases
; /* unused */
269 int __srvnqnfs_getleases
; /* unused */
274 * Flags for nfssvc() system call.
276 #define NFSSVC_BIOD 0x002
277 #define NFSSVC_NFSD 0x004
278 #define NFSSVC_ADDSOCK 0x008
279 #define NFSSVC_AUTHIN 0x010
280 #define NFSSVC_GOTAUTH 0x040
281 #define NFSSVC_AUTHINFAIL 0x080
282 #define NFSSVC_MNTD 0x100
283 #define NFSSVC_SETEXPORTSLIST 0x200
286 * fs.nfs sysctl(3) identifiers
288 #define NFS_NFSSTATS 1 /* struct: struct nfsstats */
289 #define NFS_IOTHREADS 2 /* number of io threads */
292 #define NFS_NAMES { \
294 { "nfsstats", CTLTYPE_STRUCT }, \
295 { "iothreads", CTLTYPE_INT }, \
299 * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
300 * What should be in this set is open to debate, but I believe that since
301 * I/O system calls on ufs are never interrupted by signals the set should
302 * be minimal. My reasoning is that many current programs that use signals
303 * such as SIGALRM will not expect file I/O system calls to be interrupted
308 struct uio
; struct buf
; struct vattr
; struct nameidata
; /* XXX */
311 * Socket errors ignored for connectionless sockets??
312 * For now, ignore them all
314 #define NFSIGNORE_SOERROR(s, e) \
315 ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
316 ((s) & PR_CONNREQUIRED) == 0)
319 * Nfs outstanding request list element
322 TAILQ_ENTRY(nfsreq
) r_chain
;
327 struct nfsmount
*r_nmp
;
329 int r_flags
; /* flags on request, see below */
330 int r_retry
; /* max retransmission count */
331 int r_rexmit
; /* current retrans count */
332 int r_timer
; /* tick counter on reply */
333 u_int32_t r_procnum
; /* NFS procedure number */
334 int r_rtt
; /* RTT for rpc */
335 struct lwp
*r_lwp
; /* LWP that did I/O system call */
339 * Queue head for nfsreq's
341 extern TAILQ_HEAD(nfsreqhead
, nfsreq
) nfs_reqq
;
343 /* Flag values for r_flags */
344 #define R_TIMING 0x01 /* timing request (in mntp) */
345 #define R_SENT 0x02 /* request has been sent */
346 #define R_SOFTTERM 0x04 /* soft mnt, too many retries */
347 #define R_INTR 0x08 /* intr mnt, signal pending */
348 #define R_SOCKERR 0x10 /* Fatal error on socket */
349 #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
350 #define R_MUSTRESEND 0x40 /* Must resend request */
351 #define R_REXMITTED 0x100 /* retransmitted after reconnect */
354 * A list of nfssvc_sock structures is maintained with all the sockets
355 * that require service by the nfsd.
356 * The nfsuid structs hang off of the nfssvc_sock structs in both lru
357 * and uid hash lists.
359 #ifndef NFS_UIDHASHSIZ
360 #define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */
362 #define NUIDHASH(sock, uid) \
363 (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ])
364 #ifndef NFS_WDELAYHASHSIZ
365 #define NFS_WDELAYHASHSIZ 16 /* and with this */
367 #ifndef NFS_MUIDHASHSIZ
368 #define NFS_MUIDHASHSIZ 63 /* Tune the size of nfsmount with this */
370 #define NMUIDHASH(nmp, uid) \
371 (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ])
373 #ifndef NFS_DIRHASHSIZ
374 #define NFS_DIRHASHSIZ 64
376 #define NFSDIRHASH(np, off) \
377 (&np->n_dircache[(nfs_dirhash((off)) & nfsdirhashmask)])
380 * Macros for storing/retrieving cookies into directory buffers.
382 #define NFS_STASHCOOKIE(dp,off) \
383 *((off_t *)((char *)(dp) + (dp)->d_reclen - sizeof (off_t))) = off
384 #define NFS_GETCOOKIE(dp) \
385 (*((off_t *)((char *)(dp) + (dp)->d_reclen - sizeof (off_t))))
386 #define NFS_STASHCOOKIE32(dp, val) \
387 *((u_int32_t *)((char *)(dp) + (dp)->d_reclen - sizeof (off_t) - \
389 #define NFS_GETCOOKIE32(dp) \
390 (*((u_int32_t *)((char *)(dp) + (dp)->d_reclen - sizeof (off_t) - \
394 * Flags passed to nfs_bioread().
396 #define NFSBIO_CACHECOOKIES 0x0001 /* Cache dir offset cookies */
399 * Network address hash list element
402 u_int32_t had_inetaddr
;
403 struct mbuf
*had_nam
;
407 TAILQ_ENTRY(nfsuid
) nu_lru
; /* LRU chain */
408 LIST_ENTRY(nfsuid
) nu_hash
; /* Hash list */
409 int nu_flag
; /* Flags */
410 union nethostaddr nu_haddr
; /* Host addr. for dgram sockets */
411 kauth_cred_t nu_cr
; /* Cred uid mapped to */
412 int nu_expire
; /* Expiry time (sec) */
413 struct timeval nu_timestamp
; /* Kerb. timestamp */
414 u_int32_t nu_nickname
; /* Nickname on server */
415 NFSKERBKEY_T nu_key
; /* and session key */
418 #define nu_inetaddr nu_haddr.had_inetaddr
419 #define nu_nam nu_haddr.had_nam
420 /* Bits for nu_flag */
421 #define NU_INETADDR 0x1
424 #define NU_NETFAM(u) \
425 (((u)->nu_flag & NU_INETADDR) ? \
426 (((u)->nu_flag & NU_NAM) ? AF_INET6 : AF_INET) : AF_ISO)
428 #define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO)
432 * b: protected by SLP_BUSY
433 * g: protected by nfsd_lock
434 * s: protected by ns_lock
435 * a: protected by ns_alock
441 kcondvar_t ns_cv
; /* s: */
442 TAILQ_ENTRY(nfssvc_sock
) ns_chain
; /* g: List of all nfssvc_sock */
443 TAILQ_ENTRY(nfssvc_sock
) ns_pending
; /* g: List of pending sockets */
444 TAILQ_HEAD(, nfsuid
) ns_uidlruhead
;
446 struct socket
*ns_so
;
448 struct mbuf
*ns_raw
; /* b: */
449 struct mbuf
*ns_rawend
; /* b: */
450 struct mbuf
*ns_rec
; /* b: */
451 struct mbuf
*ns_recend
; /* b: */
452 struct mbuf
*ns_frag
; /* b: */
453 int ns_flags
; /* s: */
454 int ns_aflags
; /* a: */
455 int ns_gflags
; /* g: */
456 int ns_sflags
; /* b: */
458 int ns_reclen
; /* b: */
460 u_int32_t ns_sref
; /* g: */
461 SIMPLEQ_HEAD(, nfsrv_descript
) ns_sendq
; /* s: send reply list */
462 LIST_HEAD(, nfsrv_descript
) ns_tq
; /* g: Write gather lists */
463 LIST_HEAD(, nfsuid
) ns_uidhashtbl
[NFS_UIDHASHSIZ
];
464 LIST_HEAD(nfsrvw_delayhash
, nfsrv_descript
) ns_wdelayhashtbl
[NFS_WDELAYHASHSIZ
]; /* g: */
467 /* Bits for "ns_flags" */
468 #define SLP_VALID 0x01
469 #define SLP_BUSY 0x10
470 #define SLP_SENDING 0x80
472 /* Bits for "ns_aflags" */
473 #define SLP_A_NEEDQ 0x01
474 #define SLP_A_DISCONN 0x04
476 /* Bits for "ns_gflags" */
477 #define SLP_G_DOREC 0x02 /* on nfssvc_sockpending queue */
479 /* Bits for "ns_sflags" */
480 #define SLP_S_LASTFRAG 0x40
482 extern TAILQ_HEAD(nfssvc_sockhead
, nfssvc_sock
) nfssvc_sockhead
;
483 extern struct nfssvc_sockhead nfssvc_sockpending
;
484 extern int nfssvc_sockhead_flag
;
485 #define SLP_INIT 0x01
488 * One of these structures is allocated for each nfsd.
491 TAILQ_ENTRY(nfsd
) nfsd_chain
; /* List of all nfsd's */
492 SLIST_ENTRY(nfsd
) nfsd_idle
; /* List of idle nfsd's */
494 int nfsd_flag
; /* NFSD_ flags */
495 struct nfssvc_sock
*nfsd_slp
; /* Current socket */
496 int nfsd_authlen
; /* Authenticator len */
497 u_char nfsd_authstr
[RPCAUTH_MAXSIZ
]; /* Authenticator data */
498 int nfsd_verflen
; /* and the Verifier */
499 u_char nfsd_verfstr
[RPCVERF_MAXSIZ
];
500 struct proc
*nfsd_procp
; /* Proc ptr */
501 struct nfsrv_descript
*nfsd_nd
; /* Associated nfsrv_descript */
504 /* Bits for "nfsd_flag" */
505 #define NFSD_NEEDAUTH 0x04
506 #define NFSD_AUTHFAIL 0x08
508 #define NFSD_MAXFHSIZE 64
509 typedef struct nfsrvfh
{
513 uint8_t u_opaque
[NFSD_MAXFHSIZE
];
516 #define NFSRVFH_SIZE(nsfh) ((nsfh)->nsfh_size)
517 #define NFSRVFH_DATA(nsfh) ((nsfh)->nsfh_u.u_opaque)
518 #define NFSRVFH_FHANDLE(nsfh) (&(nsfh)->nsfh_u.u_fh)
521 * This structure is used by the server for describing each request.
522 * Some fields are used only when write request gathering is performed.
524 struct nfsrv_descript
{
525 u_quad_t nd_time
; /* Write deadline (usec) */
526 off_t nd_off
; /* Start byte offset */
527 off_t nd_eoff
; /* and end byte offset */
528 LIST_ENTRY(nfsrv_descript
) nd_hash
; /* Hash list */
529 LIST_ENTRY(nfsrv_descript
) nd_tq
; /* and timer list */
530 LIST_HEAD(,nfsrv_descript
) nd_coalesce
; /* coalesced writes */
531 SIMPLEQ_ENTRY(nfsrv_descript
) nd_sendq
; /* send reply list */
532 struct mbuf
*nd_mrep
; /* Request mbuf list */
533 struct mbuf
*nd_md
; /* Current dissect mbuf */
534 struct mbuf
*nd_mreq
; /* Reply mbuf list */
535 struct mbuf
*nd_nam
; /* and socket addr */
536 struct mbuf
*nd_nam2
; /* return socket addr */
537 void * nd_dpos
; /* Current dissect pos */
538 u_int32_t nd_procnum
; /* RPC # */
539 int nd_stable
; /* storage type */
540 int nd_flag
; /* nd_flag */
541 int nd_len
; /* Length of this write */
542 int nd_repstat
; /* Reply status */
543 u_int32_t nd_retxid
; /* Reply xid */
544 u_int32_t nd_duration
; /* Lease duration */
545 struct timeval nd_starttime
; /* Time RPC initiated */
546 nfsrvfh_t nd_fh
; /* File handle */
547 kauth_cred_t nd_cr
; /* Credentials */
550 /* Bits for "nd_flag" */
551 #define ND_READ LEASE_READ
552 #define ND_WRITE LEASE_WRITE
553 #define ND_CHECK 0x04
554 #define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK)
555 #define ND_NFSV3 0x08
556 #define ND_KERBNICK 0x20
557 #define ND_KERBFULL 0x40
558 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL)
560 extern kmutex_t nfsd_lock
;
561 extern kcondvar_t nfsd_initcv
;
562 extern TAILQ_HEAD(nfsdhead
, nfsd
) nfsd_head
;
563 extern SLIST_HEAD(nfsdidlehead
, nfsd
) nfsd_idle_head
;
564 extern int nfsd_head_flag
;
565 #define NFSD_CHECKSLP 0x01
567 extern struct mowner nfs_mowner
;
568 extern struct nfsstats nfsstats
;
569 extern int nfs_numasync
;
572 * These macros compare nfsrv_descript structures.
574 #define NFSW_CONTIG(o, n) \
575 ((o)->nd_eoff >= (n)->nd_off && \
576 !memcmp((void *)&(o)->nd_fh, (void *)&(n)->nd_fh, NFSX_V3FH))
582 #define WEBNFS_ESC_CHAR '%'
583 #define WEBNFS_SPECCHAR_START 0x80
585 #define WEBNFS_NATIVE_CHAR 0x80
588 * Possibly more here in the future.
592 * Macro for converting escape characters in WebNFS pathnames.
593 * Should really be in libkern.
597 ((c) >= 'a' ? ((c) - ('a' - 10)) : \
598 ((c) >= 'A' ? ((c) - ('A' - 10)) : ((c) - '0')))
599 #define HEXSTRTOI(p) \
600 ((HEXTOC(p[0]) << 4) + HEXTOC(p[1]))
603 * Structure holding information for a publicly exported filesystem
604 * (WebNFS). Currently the specs allow just for one such filesystem.
607 int np_valid
; /* Do we hold valid information */
608 fhandle_t
*np_handle
; /* Filehandle for pub fs (internal) */
609 struct mount
*np_mount
; /* Mountpoint of exported fs */
610 char *np_index
; /* Index file */
614 #endif /* _NFS_NFS_H */