4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
28 #ifndef _NFS_NFS_CLNT_H
29 #define _NFS_NFS_CLNT_H
31 #include <sys/utsname.h>
32 #include <sys/kstat.h>
35 #include <sys/thread.h>
36 #include <nfs/rnode.h>
38 #include <sys/condvar_impl.h>
46 #define ACREGMIN 3 /* min secs to hold cached file attr */
47 #define ACREGMAX 60 /* max secs to hold cached file attr */
48 #define ACDIRMIN 30 /* min secs to hold cached dir attr */
49 #define ACDIRMAX 60 /* max secs to hold cached dir attr */
50 #define ACMINMAX 3600 /* 1 hr is longest min timeout */
51 #define ACMAXMAX 36000 /* 10 hr is longest max timeout */
53 #define NFS_CALLTYPES 3 /* Lookups, Reads, Writes */
58 #define RFSCALL_SOFT 0x00000001 /* Do op as if fs was soft-mounted */
61 * Fake errno passed back from rfscall to indicate transfer size adjustment
63 #define ENFS_TRYAGAIN 999
66 * The NFS specific async_reqs structure. iotype is grouped to support two
67 * types of async thread pools, please read comments section of mntinfo_t
68 * definition for more information. Care should be taken while adding new
69 * members to this group.
81 #define NFS_ASYNC_PGOPS_TYPES (NFS_COMMIT + 1)
84 * NFS async requests queue type.
89 NFS_ASYNC_PGOPS_QUEUE
,
94 * Number of NFS async threads operating exclusively on page op requests.
96 #define NUM_ASYNC_PGOPS_THREADS 0x2
98 struct nfs_async_read_req
{
99 void (*readahead
)(); /* pointer to readahead function */
100 uoff_t blkoff
; /* offset in file */
101 struct seg
*seg
; /* segment to do i/o to */
102 caddr_t addr
; /* address to do i/o to */
105 struct nfs_pageio_req
{
106 int (*pageio
)(); /* pointer to pageio function */
107 page_t
*pp
; /* page list */
108 uoff_t io_off
; /* offset in file */
109 uint_t io_len
; /* size of request */
113 struct nfs_readdir_req
{
114 int (*readdir
)(); /* pointer to readdir function */
115 struct rddir_cache
*rdc
; /* pointer to cache entry to fill */
118 struct nfs_commit_req
{
119 void (*commit
)(); /* pointer to commit function */
120 page_t
*plist
; /* page list */
121 offset3 offset
; /* starting offset */
122 count3 count
; /* size of range to be commited */
125 struct nfs_inactive_req
{
126 void (*inactive
)(); /* pointer to inactive function */
129 struct nfs_async_reqs
{
130 struct nfs_async_reqs
*a_next
; /* pointer to next arg struct */
132 kthread_t
*a_queuer
; /* thread id of queueing thread */
134 struct vnode
*a_vp
; /* vnode pointer */
135 struct cred
*a_cred
; /* cred pointer */
136 enum iotype a_io
; /* i/o type */
138 struct nfs_async_read_req a_read_args
;
139 struct nfs_pageio_req a_pageio_args
;
140 struct nfs_readdir_req a_readdir_args
;
141 struct nfs_commit_req a_commit_args
;
142 struct nfs_inactive_req a_inactive_args
;
146 #define a_nfs_readahead a_args.a_read_args.readahead
147 #define a_nfs_blkoff a_args.a_read_args.blkoff
148 #define a_nfs_seg a_args.a_read_args.seg
149 #define a_nfs_addr a_args.a_read_args.addr
151 #define a_nfs_putapage a_args.a_pageio_args.pageio
152 #define a_nfs_pageio a_args.a_pageio_args.pageio
153 #define a_nfs_pp a_args.a_pageio_args.pp
154 #define a_nfs_off a_args.a_pageio_args.io_off
155 #define a_nfs_len a_args.a_pageio_args.io_len
156 #define a_nfs_flags a_args.a_pageio_args.flags
158 #define a_nfs_readdir a_args.a_readdir_args.readdir
159 #define a_nfs_rdc a_args.a_readdir_args.rdc
161 #define a_nfs_commit a_args.a_commit_args.commit
162 #define a_nfs_plist a_args.a_commit_args.plist
163 #define a_nfs_offset a_args.a_commit_args.offset
164 #define a_nfs_count a_args.a_commit_args.count
166 #define a_nfs_inactive a_args.a_inactive_args.inactive
169 * Due to the way the address space callbacks are used to execute a delmap,
170 * we must keep track of how many times the same thread has called
171 * fop_delmap()->nfs_delmap()/nfs3_delmap(). This is done by having a list of
172 * nfs_delmapcall_t's associated with each rnode_t. This list is protected
173 * by the rnode_t's r_statelock. The individual elements do not need to be
174 * protected as they will only ever be created, modified and destroyed by
175 * one thread (the call_id).
176 * See nfs_delmap()/nfs3_delmap() for further explanation.
178 typedef struct nfs_delmapcall
{
180 int error
; /* error from delmap */
181 list_node_t call_node
;
185 * delmap address space callback args
187 typedef struct nfs_delmap_args
{
196 nfs_delmapcall_t
*caller
; /* to retrieve errors from the cb */
200 extern nfs_delmapcall_t
*nfs_init_delmapcall(void);
201 extern void nfs_free_delmapcall(nfs_delmapcall_t
*);
202 extern int nfs_find_and_delete_delmapcall(rnode_t
*, int *errp
);
206 * The following structures, chhead and chtab, make up the client handle
207 * cache. chhead represents a quadruple(RPC program, RPC version, Protocol
208 * Family, and Transport). For example, a chhead entry could represent
209 * NFS/V3/IPv4/TCP requests. chhead nodes are linked together as a singly
210 * linked list and is referenced from chtable.
212 * chtab represents an allocated client handle bound to a particular
213 * quadruple. These nodes chain down from a chhead node. chtab
214 * entries which are on the chain are considered free, so a thread may simply
215 * unlink the first node without traversing the chain. When the thread is
216 * completed with its request, it puts the chtab node back on the chain.
218 typedef struct chhead
{
219 struct chhead
*ch_next
; /* next quadruple */
220 struct chtab
*ch_list
; /* pointer to free client handle(s) */
221 uint64_t ch_timesused
; /* times this quadruple was requested */
222 rpcprog_t ch_prog
; /* RPC program number */
223 rpcvers_t ch_vers
; /* RPC version number */
224 dev_t ch_dev
; /* pseudo device number (i.e. /dev/udp) */
225 char *ch_protofmly
; /* protocol (i.e. NC_INET, NC_LOOPBACK) */
228 typedef struct chtab
{
229 struct chtab
*ch_list
; /* next free client handle */
230 struct chhead
*ch_head
; /* associated quadruple */
231 time_t ch_freed
; /* timestamp when freed */
232 CLIENT
*ch_client
; /* pointer to client handle */
236 * clinfo is a structure which encapsulates data that is needed to
237 * obtain a client handle from the cache
239 typedef struct clinfo
{
240 rpcprog_t cl_prog
; /* RPC program number */
241 rpcvers_t cl_vers
; /* RPC version number */
242 uint_t cl_readsize
; /* transfer size */
243 int cl_retrans
; /* times to retry request */
244 uint_t cl_flags
; /* info flags */
248 * Failover information, passed opaquely through rfscall()
250 typedef struct failinfo
{
253 void (*copyproc
)(caddr_t
, vnode_t
*);
254 int (*lookupproc
)(vnode_t
*, char *, vnode_t
**, struct pathname
*,
255 int, vnode_t
*, struct cred
*, int);
256 int (*xattrdirproc
)(vnode_t
*, vnode_t
**, bool_t
, cred_t
*, int);
260 * Static server information
262 * These fields are protected by sv_lock:
265 typedef struct servinfo
{
266 struct knetconfig
*sv_knconf
; /* bound TLI fd */
267 struct knetconfig
*sv_origknconf
; /* For RDMA save orig knconf */
268 struct netbuf sv_addr
; /* server's address */
269 nfs_fhandle sv_fhandle
; /* this server's filehandle */
270 struct sec_data
*sv_secdata
; /* security data for rpcsec module */
271 char *sv_hostname
; /* server's hostname */
272 int sv_hostnamelen
; /* server's hostname length */
273 uint_t sv_flags
; /* see below */
274 struct servinfo
*sv_next
; /* next in list */
279 * The values for sv_flags.
281 #define SV_ROOT_STALE 0x1 /* root vnode got ESTALE */
284 * Switch from RDMA knconf to original mount knconf
287 #define ORIG_KNCONF(mi) (mi->mi_curr_serv->sv_origknconf ? \
288 mi->mi_curr_serv->sv_origknconf : mi->mi_curr_serv->sv_knconf)
292 * NFS private data per mounted file system
293 * The mi_lock mutex protects the following fields:
306 * The mi_async_lock mutex protects the following fields:
310 * mi_async_curr[NFS_MAX_ASYNC_QUEUES]
312 * mi_async_init_clusters
313 * mi_threads[NFS_MAX_ASYNC_QUEUES]
316 * Normally the netconfig information for the mount comes from
317 * mi_curr_serv and mi_klmconfig is NULL. If NLM calls need to use a
318 * different transport, mi_klmconfig contains the necessary netconfig
321 * 'mi_zone' is initialized at structure creation time, and never
322 * changes; it may be read without a lock.
324 * mi_zone_node is linkage into the mi4_globals.mig_list, and is
325 * protected by mi4_globals.mig_list_lock.
328 * mi_globals::mig_lock > mi_async_lock > mi_lock
330 typedef struct mntinfo
{
331 kmutex_t mi_lock
; /* protects mntinfo fields */
332 struct servinfo
*mi_servers
; /* server list */
333 struct servinfo
*mi_curr_serv
; /* current server */
334 kcondvar_t mi_failover_cv
; /* failover synchronization */
335 int mi_readers
; /* failover - users of mi_curr_serv */
336 struct vfs
*mi_vfsp
; /* back pointer to vfs */
337 enum vtype mi_type
; /* file type of the root vnode */
338 uint_t mi_flags
; /* see below */
339 uint_t mi_tsize
; /* max read transfer size (bytes) */
340 uint_t mi_stsize
; /* max write transfer size (bytes) */
341 int mi_timeo
; /* inital timeout in 10th sec */
342 int mi_retrans
; /* times to retry request */
343 hrtime_t mi_acregmin
; /* min time to hold cached file attr */
344 hrtime_t mi_acregmax
; /* max time to hold cached file attr */
345 hrtime_t mi_acdirmin
; /* min time to hold cached dir attr */
346 hrtime_t mi_acdirmax
; /* max time to hold cached dir attr */
347 len_t mi_maxfilesize
; /* for pathconf _PC_FILESIZEBITS */
349 * Extra fields for congestion control, one per NFS call type,
350 * plus one global one.
352 struct rpc_timers mi_timers
[NFS_CALLTYPES
+1];
353 int mi_curread
; /* current read size */
354 int mi_curwrite
; /* current write size */
356 * Async I/O management
357 * We have 2 pools of threads working on async I/O:
358 * (i) Threads which work on all async queues. Default number of
359 * threads in this queue is 8. Threads in this pool work on async
360 * queue pointed by mi_async_curr[NFS_ASYNC_QUEUE]. Number of
361 * active threads in this pool is tracked by
362 * mi_threads[NFS_ASYNC_QUEUE].
363 * (ii)Threads which work only on page op async queues.
364 * Page ops queue comprises of NFS_PUTAPAGE, NFS_PAGEIO &
365 * NFS_COMMIT. Default number of threads in this queue is 2
366 * (NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async
367 * queue pointed by mi_async_curr[NFS_ASYNC_PGOPS_QUEUE]. Number
368 * of active threads in this pool is tracked by
369 * mi_threads[NFS_ASYNC_PGOPS_QUEUE].
371 struct nfs_async_reqs
*mi_async_reqs
[NFS_ASYNC_TYPES
];
372 struct nfs_async_reqs
*mi_async_tail
[NFS_ASYNC_TYPES
];
373 struct nfs_async_reqs
**mi_async_curr
[NFS_MAX_ASYNC_QUEUES
];
374 /* current async queue */
375 uint_t mi_async_clusters
[NFS_ASYNC_TYPES
];
376 uint_t mi_async_init_clusters
;
377 uint_t mi_async_req_count
; /* # outstanding work requests */
378 kcondvar_t mi_async_reqs_cv
; /* signaled when there's work */
379 ushort_t mi_threads
[NFS_MAX_ASYNC_QUEUES
];
380 /* number of active async threads */
381 ushort_t mi_max_threads
; /* max number of async worker threads */
382 kthread_t
*mi_manager_thread
; /* async manager thread */
383 kcondvar_t mi_async_cv
; /* signaled when the last worker dies */
384 kcondvar_t mi_async_work_cv
[NFS_MAX_ASYNC_QUEUES
];
385 /* tell workers to work */
386 kmutex_t mi_async_lock
; /* lock to protect async list */
390 struct pathcnf
*mi_pathconf
; /* static pathconf kludge */
391 rpcprog_t mi_prog
; /* RPC program number */
392 rpcvers_t mi_vers
; /* RPC program version number */
393 char **mi_rfsnames
; /* mapping to proc names */
394 kstat_named_t
*mi_reqs
; /* count of requests */
395 uchar_t
*mi_call_type
; /* dynamic retrans call types */
396 uchar_t
*mi_ss_call_type
; /* semisoft call type */
397 uchar_t
*mi_timer_type
; /* dynamic retrans timer types */
398 clock_t mi_printftime
; /* last error printf time */
402 char **mi_aclnames
; /* mapping to proc names */
403 kstat_named_t
*mi_aclreqs
; /* count of acl requests */
404 uchar_t
*mi_acl_call_type
; /* dynamic retrans call types */
405 uchar_t
*mi_acl_ss_call_type
; /* semisoft call types */
406 uchar_t
*mi_acl_timer_type
; /* dynamic retrans timer types */
408 * Client Side Failover stats
410 uint_t mi_noresponse
; /* server not responding count */
411 uint_t mi_failover
; /* failover to new server count */
412 uint_t mi_remap
; /* remap to new server count */
416 struct kstat
*mi_io_kstats
;
417 struct kstat
*mi_ro_kstats
;
418 struct knetconfig
*mi_klmconfig
;
422 struct zone
*mi_zone
; /* Zone in which FS is mounted */
423 zone_ref_t mi_zone_ref
; /* Reference to aforementioned zone */
424 list_node_t mi_zone_node
; /* Linkage into per-zone mi list */
426 * Serializes threads in failover_remap.
427 * Need to acquire this lock first in failover_remap() function
428 * before acquiring any other rnode lock.
430 kmutex_t mi_remap_lock
;
432 * List of rnode_t structures that belongs to this mntinfo
434 kmutex_t mi_rnodes_lock
; /* protects the mi_rnodes list */
435 list_t mi_rnodes
; /* the list */
440 * vfs pointer to mount info
442 #define VFTOMI(vfsp) ((mntinfo_t *)((vfsp)->vfs_data))
445 * vnode pointer to mount info
447 #define VTOMI(vp) ((mntinfo_t *)(((vp)->v_vfsp)->vfs_data))
450 * The values for mi_flags.
452 #define MI_HARD 0x1 /* hard or soft mount */
453 #define MI_PRINTED 0x2 /* not responding message printed */
454 #define MI_INT 0x4 /* interrupts allowed on hard mount */
455 #define MI_DOWN 0x8 /* server is down */
456 #define MI_NOAC 0x10 /* don't cache attributes */
457 #define MI_NOCTO 0x20 /* no close-to-open consistency */
458 #define MI_DYNAMIC 0x40 /* dynamic transfer size adjustment */
459 #define MI_LLOCK 0x80 /* local locking only (no lockmgr) */
460 #define MI_GRPID 0x100 /* System V group id inheritance */
461 #define MI_RPCTIMESYNC 0x200 /* RPC time sync */
462 #define MI_LINK 0x400 /* server supports link */
463 #define MI_SYMLINK 0x800 /* server supports symlink */
464 #define MI_READDIRONLY 0x1000 /* use readdir instead of readdirplus */
465 #define MI_ACL 0x2000 /* server supports NFS_ACL */
466 #define MI_BINDINPROG 0x4000 /* binding to server is changing */
467 #define MI_LOOPBACK 0x8000 /* Set if this is a loopback mount */
468 #define MI_SEMISOFT 0x10000 /* soft reads, hard modify */
469 #define MI_NOPRINT 0x20000 /* don't print messages */
470 #define MI_DIRECTIO 0x40000 /* do direct I/O */
471 #define MI_EXTATTR 0x80000 /* server supports extended attrs */
472 #define MI_ASYNC_MGR_STOP 0x100000 /* tell async mgr to die */
473 #define MI_DEAD 0x200000 /* mount has been terminated */
476 * Read-only mntinfo statistics
478 struct mntinfo_kstat
{
479 char mik_proto
[KNC_STRSIZE
];
483 uint32_t mik_curread
;
484 uint32_t mik_curwrite
;
495 } mik_timers
[NFS_CALLTYPES
+1];
496 uint32_t mik_noresponse
;
497 uint32_t mik_failover
;
499 char mik_curserver
[SYS_NMLN
];
503 * Macro to wakeup sleeping async worker threads.
505 #define NFS_WAKE_ASYNC_WORKER(work_cv) { \
506 if (CV_HAS_WAITERS(&work_cv[NFS_ASYNC_QUEUE])) \
507 cv_signal(&work_cv[NFS_ASYNC_QUEUE]); \
508 else if (CV_HAS_WAITERS(&work_cv[NFS_ASYNC_PGOPS_QUEUE])) \
509 cv_signal(&work_cv[NFS_ASYNC_PGOPS_QUEUE]); \
512 #define NFS_WAKEALL_ASYNC_WORKERS(work_cv) { \
513 cv_broadcast(&work_cv[NFS_ASYNC_QUEUE]); \
514 cv_broadcast(&work_cv[NFS_ASYNC_PGOPS_QUEUE]); \
518 * Mark cached attributes as timed out
520 * The caller must not be holding the rnode r_statelock mutex.
522 #define PURGE_ATTRCACHE(vp) { \
523 rnode_t *rp = VTOR(vp); \
524 mutex_enter(&rp->r_statelock); \
525 PURGE_ATTRCACHE_LOCKED(rp); \
526 mutex_exit(&rp->r_statelock); \
529 #define PURGE_ATTRCACHE_LOCKED(rp) { \
530 ASSERT(MUTEX_HELD(&rp->r_statelock)); \
531 rp->r_attrtime = gethrtime(); \
532 rp->r_mtime = rp->r_attrtime; \
536 * Is the attribute cache valid?
538 #define ATTRCACHE_VALID(vp) (gethrtime() < VTOR(vp)->r_attrtime)
541 * Flags to indicate whether to purge the DNLC for non-directory vnodes
542 * in a call to nfs_purge_caches.
544 #define NFS_NOPURGE_DNLC 0
545 #define NFS_PURGE_DNLC 1
548 * If returned error is ESTALE flush all caches.
550 #define PURGE_STALE_FH(error, vp, cr) \
551 if ((error) == ESTALE) { \
552 struct rnode *rp = VTOR(vp); \
553 if (vp->v_flag & VROOT) { \
554 servinfo_t *svp = rp->r_server; \
555 mutex_enter(&svp->sv_lock); \
556 svp->sv_flags |= SV_ROOT_STALE; \
557 mutex_exit(&svp->sv_lock); \
559 mutex_enter(&rp->r_statelock); \
560 rp->r_flags |= RSTALE; \
562 rp->r_error = (error); \
563 mutex_exit(&rp->r_statelock); \
564 if (vn_has_cached_data(vp)) \
565 nfs_invalidate_pages((vp), 0, (cr)); \
566 nfs_purge_caches((vp), NFS_PURGE_DNLC, (cr)); \
571 * Swap is always valid, if no attributes (attrtime == 0) or
572 * if mtime matches cached mtime it is valid
573 * NOTE: mtime is now a timestruc_t.
574 * Caller should be holding the rnode r_statelock mutex.
576 #define CACHE_VALID(rp, mtime, fsize) \
577 ((RTOV(rp)->v_flag & VISSWAP) == VISSWAP || \
578 (((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec && \
579 (mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) && \
580 ((fsize) == (rp)->r_attr.va_size)))
583 * Macro to detect forced unmount or a zone shutdown.
585 #define FS_OR_ZONE_GONE(vfsp) \
586 (((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
587 zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
590 * Convert NFS tunables to hrtime_t units, seconds to nanoseconds.
592 #define SEC2HR(sec) ((sec) * (long long)NANOSEC)
593 #define HR2SEC(hr) ((hr) / (long long)NANOSEC)
596 * Structure to identify owner of a PC file share reservation.
599 int magic
; /* magic uniquifying number */
600 char hname
[16]; /* first 16 bytes of hostname */
601 char lowner
[8]; /* local owner from fcntl */
607 #define NFS_OWNER_MAGIC 0x1D81E
610 * Support for extended attributes
612 #define XATTR_DIR_NAME "/@/" /* used for DNLC entries */
613 #define XATTR_RPATH "ExTaTtR" /* used for r_path for failover */
616 * Short hand for checking to see whether the file system was mounted
617 * interruptible or not.
619 #define INTR(vp) (VTOMI(vp)->mi_flags & MI_INT)
622 * Short hand for checking whether failover is enabled or not
624 #define FAILOVER_MOUNT(mi) (mi->mi_servers->sv_next)
627 * How long will async threads wait for additional work.
629 #define NFS_ASYNC_TIMEOUT (60 * 1 * hz) /* 1 minute */
632 extern int clget(clinfo_t
*, servinfo_t
*, cred_t
*, CLIENT
**,
634 extern void clfree(CLIENT
*, struct chtab
*);
635 extern void nfs_mi_zonelist_add(mntinfo_t
*);
636 extern void nfs_free_mi(mntinfo_t
*);
637 extern void nfs_mnt_kstat_init(struct vfs
*);
641 * Per-zone data for managing client handles. Included here solely for the
645 * client side statistics
648 kstat_named_t calls
; /* client requests */
649 kstat_named_t badcalls
; /* rpc failures */
650 kstat_named_t clgets
; /* client handle gets */
651 kstat_named_t cltoomany
; /* client handle cache misses */
653 kstat_named_t clalloc
; /* number of client handles */
654 kstat_named_t noresponse
; /* server not responding cnt */
655 kstat_named_t failover
; /* server failover count */
656 kstat_named_t remap
; /* server remap count */
661 struct chhead
*nfscl_chtable
;
662 kmutex_t nfscl_chtable_lock
;
663 zoneid_t nfscl_zoneid
;
664 list_node_t nfscl_node
;
665 struct clstat nfscl_stat
;
672 #endif /* _NFS_NFS_CLNT_H */