4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
29 * Portions of this source code were derived from Berkeley 4.3 BSD
30 * under license from the Regents of the University of California.
36 #include <sys/errno.h>
37 #include <sys/types.h>
38 #include <sys/kstat.h>
40 #include <sys/flock.h>
42 #include <nfs/nfs4_kprot.h>
44 #include <nfs/rnode.h>
49 #include <sys/condvar_impl.h>
56 #define NFS4_SIZE_OK(size) ((size) <= MAXOFFSET_T)
58 /* Four states of nfs4_server's lease_valid */
59 #define NFS4_LEASE_INVALID 0
60 #define NFS4_LEASE_VALID 1
61 #define NFS4_LEASE_UNINITIALIZED 2
62 #define NFS4_LEASE_NOT_STARTED 3
64 /* flag to tell the renew thread it should exit */
65 #define NFS4_THREAD_EXIT 1
67 /* Default number of seconds to wait on GRACE and DELAY errors */
68 #define NFS4ERR_DELAY_TIME 10
70 /* Number of hash buckets for open owners for each nfs4_server */
71 #define NFS4_NUM_OO_BUCKETS 53
73 /* Number of freed open owners (per mntinfo4_t) to keep around */
74 #define NFS4_NUM_FREED_OPEN_OWNERS 8
76 /* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */
77 #define NFS4_RETRY_SCLID_DELAY 10
79 /* Number of times we should retry a SETCLIENTID(_CONFIRM) op */
80 #define NFS4_NUM_SCLID_RETRIES 3
82 /* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */
83 #define NFS4_NUM_RETRY_BAD_SEQID 3
86 * Macro to wakeup sleeping async worker threads.
88 #define NFS4_WAKE_ASYNC_WORKER(work_cv) { \
89 if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_QUEUE])) \
90 cv_signal(&work_cv[NFS4_ASYNC_QUEUE]); \
91 else if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_PGOPS_QUEUE])) \
92 cv_signal(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]); \
95 #define NFS4_WAKEALL_ASYNC_WORKERS(work_cv) { \
96 cv_broadcast(&work_cv[NFS4_ASYNC_QUEUE]); \
97 cv_broadcast(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]); \
101 * Is the attribute cache valid? If client holds a delegation, then attrs
102 * are by definition valid. If not, then check to see if attrs have timed out.
104 #define ATTRCACHE4_VALID(vp) (VTOR4(vp)->r_deleg_type != OPEN_DELEGATE_NONE || \
105 gethrtime() < VTOR4(vp)->r_time_attr_inval)
108 * Flags to indicate whether to purge the DNLC for non-directory vnodes
109 * in a call to nfs_purge_caches.
111 #define NFS4_NOPURGE_DNLC 0
112 #define NFS4_PURGE_DNLC 1
116 * Swap is always valid, if no attributes (attrtime == 0) or
117 * if mtime matches cached mtime it is valid
118 * NOTE: mtime is now a timestruc_t.
119 * Caller should be holding the rnode r_statelock mutex.
121 #define CACHE4_VALID(rp, mtime, fsize) \
122 ((RTOV4(rp)->v_flag & VISSWAP) == VISSWAP || \
123 (((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec && \
124 (mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) && \
125 ((fsize) == (rp)->r_attr.va_size)))
128 * Macro to detect forced unmount or a zone shutdown.
130 #define FS_OR_ZONE_GONE4(vfsp) \
131 (((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
132 zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
135 * Macro to help determine whether a request failed because the underlying
136 * filesystem has been forcibly unmounted or because of zone shutdown.
138 #define NFS4_FRC_UNMT_ERR(err, vfsp) \
139 ((err) == EIO && FS_OR_ZONE_GONE4((vfsp)))
142 * Due to the way the address space callbacks are used to execute a delmap,
143 * we must keep track of how many times the same thread has called
144 * fop_delmap()->nfs4_delmap(). This is done by having a list of
145 * nfs4_delmapcall_t's associated with each rnode4_t. This list is protected
146 * by the rnode4_t's r_statelock. The individual elements do not need to be
147 * protected as they will only ever be created, modified and destroyed by
148 * one thread (the call_id).
149 * See nfs4_delmap() for further explanation.
151 typedef struct nfs4_delmapcall
{
153 int error
; /* error from delmap */
154 list_node_t call_node
;
158 * delmap address space callback args
160 typedef struct nfs4_delmap_args
{
169 nfs4_delmapcall_t
*caller
; /* to retrieve errors from the cb */
170 } nfs4_delmap_args_t
;
173 * client side statistics
179 kstat_named_t calls
; /* client requests */
180 kstat_named_t badcalls
; /* rpc failures */
181 kstat_named_t referrals
; /* referrals */
182 kstat_named_t referlinks
; /* referrals as symlinks */
183 kstat_named_t clgets
; /* client handle gets */
184 kstat_named_t cltoomany
; /* client handle cache misses */
186 kstat_named_t clalloc
; /* number of client handles */
187 kstat_named_t noresponse
; /* server not responding cnt */
188 kstat_named_t failover
; /* server failover count */
189 kstat_named_t remap
; /* server remap count */
195 * The following are statistics that describe the behavior of the system as a
196 * whole and don't correspond to any particular zone.
198 struct clstat4_debug
{
199 kstat_named_t nrnode
; /* number of allocated rnodes */
200 kstat_named_t access
; /* size of access cache */
201 kstat_named_t dirent
; /* size of readdir cache */
202 kstat_named_t dirents
; /* size of readdir buf cache */
203 kstat_named_t reclaim
; /* number of reclaims */
204 kstat_named_t clreclaim
; /* number of cl reclaims */
205 kstat_named_t f_reclaim
; /* number of free reclaims */
206 kstat_named_t a_reclaim
; /* number of active reclaims */
207 kstat_named_t r_reclaim
; /* number of rnode reclaims */
208 kstat_named_t rpath
; /* bytes used to store rpaths */
210 extern struct clstat4_debug clstat4_debug
;
215 * The NFS specific async_reqs structure. iotype4 is grouped to support two
216 * types of async thread pools, please read comments section of mntinfo4_t
217 * definition for more information. Care should be taken while adding new
218 * members to this group.
230 #define NFS4_ASYNC_PGOPS_TYPES (NFS4_COMMIT + 1)
233 * NFS async requests queue type.
237 NFS4_ASYNC_PGOPS_QUEUE
,
238 NFS4_MAX_ASYNC_QUEUES
242 * Number of NFS async threads operating exclusively on page op requests.
244 #define NUM_ASYNC_PGOPS_THREADS 0x2
246 struct nfs4_async_read_req
{
247 void (*readahead
)(); /* pointer to readahead function */
248 uoff_t blkoff
; /* offset in file */
249 struct seg
*seg
; /* segment to do i/o to */
250 caddr_t addr
; /* address to do i/o to */
253 struct nfs4_pageio_req
{
254 int (*pageio
)(); /* pointer to pageio function */
255 page_t
*pp
; /* page list */
256 uoff_t io_off
; /* offset in file */
257 uint_t io_len
; /* size of request */
261 struct nfs4_readdir_req
{
262 int (*readdir
)(); /* pointer to readdir function */
263 struct rddir4_cache
*rdc
; /* pointer to cache entry to fill */
266 struct nfs4_commit_req
{
267 void (*commit
)(); /* pointer to commit function */
268 page_t
*plist
; /* page list */
269 offset4 offset
; /* starting offset */
270 count4 count
; /* size of range to be commited */
273 struct nfs4_async_reqs
{
274 struct nfs4_async_reqs
*a_next
; /* pointer to next arg struct */
276 kthread_t
*a_queuer
; /* thread id of queueing thread */
278 struct vnode
*a_vp
; /* vnode pointer */
279 struct cred
*a_cred
; /* cred pointer */
280 enum iotype4 a_io
; /* i/o type */
282 struct nfs4_async_read_req a_read_args
;
283 struct nfs4_pageio_req a_pageio_args
;
284 struct nfs4_readdir_req a_readdir_args
;
285 struct nfs4_commit_req a_commit_args
;
289 #define a_nfs4_readahead a_args.a_read_args.readahead
290 #define a_nfs4_blkoff a_args.a_read_args.blkoff
291 #define a_nfs4_seg a_args.a_read_args.seg
292 #define a_nfs4_addr a_args.a_read_args.addr
294 #define a_nfs4_putapage a_args.a_pageio_args.pageio
295 #define a_nfs4_pageio a_args.a_pageio_args.pageio
296 #define a_nfs4_pp a_args.a_pageio_args.pp
297 #define a_nfs4_off a_args.a_pageio_args.io_off
298 #define a_nfs4_len a_args.a_pageio_args.io_len
299 #define a_nfs4_flags a_args.a_pageio_args.flags
301 #define a_nfs4_readdir a_args.a_readdir_args.readdir
302 #define a_nfs4_rdc a_args.a_readdir_args.rdc
304 #define a_nfs4_commit a_args.a_commit_args.commit
305 #define a_nfs4_plist a_args.a_commit_args.plist
306 #define a_nfs4_offset a_args.a_commit_args.offset
307 #define a_nfs4_count a_args.a_commit_args.count
310 * Security information
312 typedef struct sv_secinfo
{
313 uint_t count
; /* how many sdata there are */
314 uint_t index
; /* which sdata[index] */
315 struct sec_data
*sdata
;
319 * Hash bucket for the mi's open owner list (mi_oo_list).
321 typedef struct nfs4_oo_hash_bucket
{
322 list_t b_oo_hash_list
;
324 } nfs4_oo_hash_bucket_t
;
327 * Global array of ctags.
329 extern ctag_t nfs4_ctags
[];
331 typedef enum nfs4_tag_type
{
342 TAG_GETATTR_FSLOCATION
,
363 TAG_OPEN_CONFIRM_LOST
,
394 #define NFS4_TAG_INITIALIZER { \
396 {0x20202020, 0x20202020, 0x20202020}}, \
397 {TAG_ACCESS, "access", \
398 {0x61636365, 0x73732020, 0x20202020}}, \
399 {TAG_CLOSE, "close", \
400 {0x636c6f73, 0x65202020, 0x20202020}}, \
401 {TAG_CLOSE_LOST, "lost close", \
402 {0x6c6f7374, 0x20636c6f, 0x73652020}}, \
403 {TAG_CLOSE_UNDO, "undo close", \
404 {0x756e646f, 0x20636c6f, 0x73652020}}, \
405 {TAG_COMMIT, "commit", \
406 {0x636f6d6d, 0x69742020, 0x20202020}}, \
407 {TAG_DELEGRETURN, "delegreturn", \
408 {0x64656c65, 0x67726574, 0x75726e20}}, \
409 {TAG_FSINFO, "fsinfo", \
410 {0x6673696e, 0x666f2020, 0x20202020}}, \
411 {TAG_GET_SYMLINK, "get symlink text", \
412 {0x67657420, 0x736c6e6b, 0x20747874}}, \
413 {TAG_GETATTR, "getattr", \
414 {0x67657461, 0x74747220, 0x20202020}}, \
415 {TAG_GETATTR_FSLOCATION, "getattr fslocation", \
416 {0x67657461, 0x74747220, 0x66736c6f}}, \
417 {TAG_INACTIVE, "inactive", \
418 {0x696e6163, 0x74697665, 0x20202020}}, \
420 {0x6c696e6b, 0x20202020, 0x20202020}}, \
422 {0x6c6f636b, 0x20202020, 0x20202020}}, \
423 {TAG_LOCK_RECLAIM, "reclaim lock", \
424 {0x7265636c, 0x61696d20, 0x6c6f636b}}, \
425 {TAG_LOCK_RESEND, "resend lock", \
426 {0x72657365, 0x6e64206c, 0x6f636b20}}, \
427 {TAG_LOCK_REINSTATE, "reinstate lock", \
428 {0x7265696e, 0x7374206c, 0x6f636b20}}, \
429 {TAG_LOCK_UNKNOWN, "unknown lock", \
430 {0x756e6b6e, 0x6f776e20, 0x6c6f636b}}, \
431 {TAG_LOCKT, "lock test", \
432 {0x6c6f636b, 0x5f746573, 0x74202020}}, \
433 {TAG_LOCKU, "unlock", \
434 {0x756e6c6f, 0x636b2020, 0x20202020}}, \
435 {TAG_LOCKU_RESEND, "resend locku", \
436 {0x72657365, 0x6e64206c, 0x6f636b75}}, \
437 {TAG_LOCKU_REINSTATE, "reinstate unlock", \
438 {0x7265696e, 0x73742075, 0x6e6c636b}}, \
439 {TAG_LOOKUP, "lookup", \
440 {0x6c6f6f6b, 0x75702020, 0x20202020}}, \
441 {TAG_LOOKUP_PARENT, "lookup parent", \
442 {0x6c6f6f6b, 0x75702070, 0x6172656e}}, \
443 {TAG_LOOKUP_VALID, "lookup valid", \
444 {0x6c6f6f6b, 0x75702076, 0x616c6964}}, \
445 {TAG_LOOKUP_VPARENT, "lookup valid parent", \
446 {0x6c6f6f6b, 0x766c6420, 0x7061726e}}, \
447 {TAG_MKDIR, "mkdir", \
448 {0x6d6b6469, 0x72202020, 0x20202020}}, \
449 {TAG_MKNOD, "mknod", \
450 {0x6d6b6e6f, 0x64202020, 0x20202020}}, \
451 {TAG_MOUNT, "mount", \
452 {0x6d6f756e, 0x74202020, 0x20202020}}, \
454 {0x6f70656e, 0x20202020, 0x20202020}}, \
455 {TAG_OPEN_CONFIRM, "open confirm", \
456 {0x6f70656e, 0x5f636f6e, 0x6669726d}}, \
457 {TAG_OPEN_CONFIRM_LOST, "lost open confirm", \
458 {0x6c6f7374, 0x206f7065, 0x6e5f636f}}, \
459 {TAG_OPEN_DG, "open downgrade", \
460 {0x6f70656e, 0x20646772, 0x61646520}}, \
461 {TAG_OPEN_DG_LOST, "lost open downgrade", \
462 {0x6c737420, 0x6f70656e, 0x20646772}}, \
463 {TAG_OPEN_LOST, "lost open", \
464 {0x6c6f7374, 0x206f7065, 0x6e202020}}, \
465 {TAG_OPENATTR, "openattr", \
466 {0x6f70656e, 0x61747472, 0x20202020}}, \
467 {TAG_PATHCONF, "pathconf", \
468 {0x70617468, 0x636f6e66, 0x20202020}}, \
469 {TAG_PUTROOTFH, "putrootfh", \
470 {0x70757472, 0x6f6f7466, 0x68202020}}, \
472 {0x72656164, 0x20202020, 0x20202020}}, \
473 {TAG_READAHEAD, "readahead", \
474 {0x72656164, 0x61686561, 0x64202020}}, \
475 {TAG_READDIR, "readdir", \
476 {0x72656164, 0x64697220, 0x20202020}}, \
477 {TAG_READLINK, "readlink", \
478 {0x72656164, 0x6c696e6b, 0x20202020}}, \
479 {TAG_RELOCK, "relock", \
480 {0x72656c6f, 0x636b2020, 0x20202020}}, \
481 {TAG_REMAP_LOOKUP, "remap lookup", \
482 {0x72656d61, 0x70206c6f, 0x6f6b7570}}, \
483 {TAG_REMAP_LOOKUP_AD, "remap lookup attr dir", \
484 {0x72656d70, 0x206c6b75, 0x70206164}}, \
485 {TAG_REMAP_LOOKUP_NA, "remap lookup named attrs", \
486 {0x72656d70, 0x206c6b75, 0x70206e61}}, \
487 {TAG_REMAP_MOUNT, "remap mount", \
488 {0x72656d61, 0x70206d6f, 0x756e7420}}, \
489 {TAG_RMDIR, "rmdir", \
490 {0x726d6469, 0x72202020, 0x20202020}}, \
491 {TAG_REMOVE, "remove", \
492 {0x72656d6f, 0x76652020, 0x20202020}}, \
493 {TAG_RENAME, "rename", \
494 {0x72656e61, 0x6d652020, 0x20202020}}, \
495 {TAG_RENAME_VFH, "rename volatile fh", \
496 {0x72656e61, 0x6d652028, 0x76666829}}, \
497 {TAG_RENEW, "renew", \
498 {0x72656e65, 0x77202020, 0x20202020}}, \
499 {TAG_REOPEN, "reopen", \
500 {0x72656f70, 0x656e2020, 0x20202020}}, \
501 {TAG_REOPEN_LOST, "lost reopen", \
502 {0x6c6f7374, 0x2072656f, 0x70656e20}}, \
503 {TAG_SECINFO, "secinfo", \
504 {0x73656369, 0x6e666f20, 0x20202020}}, \
505 {TAG_SETATTR, "setattr", \
506 {0x73657461, 0x74747220, 0x20202020}}, \
507 {TAG_SETCLIENTID, "setclientid", \
508 {0x73657463, 0x6c69656e, 0x74696420}}, \
509 {TAG_SETCLIENTID_CF, "setclientid_confirm", \
510 {0x73636c6e, 0x7469645f, 0x636f6e66}}, \
511 {TAG_SYMLINK, "symlink", \
512 {0x73796d6c, 0x696e6b20, 0x20202020}}, \
513 {TAG_WRITE, "write", \
514 {0x77726974, 0x65202020, 0x20202020}} \
518 * These flags are for differentiating the search criterian for
519 * find_open_owner(). The comparison is done with the open_owners's
520 * 'oo_just_created' flag.
522 #define NFS4_PERM_CREATED 0x0
523 #define NFS4_JUST_CREATED 0x1
526 * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw'
527 * is stored upon a successful OPEN. This is needed when the user's effective
528 * and real uid's don't match. The 'oo_cred_otw' overrides the credential
529 * passed down by VFS for async read/write, commit, lock, and close operations.
531 * The oo_ref_count keeps track the number of active references on this
532 * data structure + number of nfs4_open_streams point to this structure.
534 * 'oo_valid' tells whether this stuct is about to be freed or not.
536 * 'oo_just_created' tells us whether this struct has just been created but
537 * not been fully finalized (that is created upon an OPEN request and
538 * finalized upon the OPEN success).
540 * The 'oo_seqid_inuse' is for the open seqid synchronization. If a thread
541 * is currently using the open owner and it's open_seqid, then it sets the
542 * oo_seqid_inuse to true if it currently is not set. If it is set then it
543 * does a cv_wait on the oo_cv_seqid_sync condition variable. When the thread
544 * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process
545 * waiting on the condition variable.
547 * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW,
548 * and 'oo_last_good_op' is the operation that issued the last valid seqid.
551 * mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list)
553 * oo_seqid_inuse > mntinfo4_t::mi_lock
554 * oo_seqid_inuse > rnode4_t::r_statelock
555 * oo_seqid_inuse > rnode4_t::r_statev4_lock
556 * oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock
558 * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects:
564 * The 'oo_lock' protects:
574 typedef struct nfs4_open_owner
{
580 seqid4 oo_last_good_seqid
;
581 nfs4_tag_type_t oo_last_good_op
;
582 unsigned oo_seqid_inuse
:1;
584 kcondvar_t oo_cv_seqid_sync
;
586 * Fix this to always be 8 bytes
589 list_node_t oo_hash_node
;
590 list_node_t oo_foo_node
;
595 * Static server information.
596 * These fields are read-only once they are initialized; sv_lock
597 * should be held as writer if they are changed during mount:
606 * These fields are protected by sv_lock:
621 * nfs_rtable4_lock > sv_lock
622 * rnode4_t::r_statelock > sv_lock
624 typedef struct servinfo4
{
625 struct knetconfig
*sv_knconf
; /* bound TLI fd */
626 struct knetconfig
*sv_origknconf
; /* For RDMA save orig knconf */
627 struct netbuf sv_addr
; /* server's address */
628 nfs4_fhandle_t sv_fhandle
; /* this server's filehandle */
629 nfs4_fhandle_t sv_pfhandle
; /* parent dir filehandle */
630 int sv_pathlen
; /* Length of server path */
631 char *sv_path
; /* Path name on server */
632 uint32_t sv_flags
; /* flags for this server */
633 sec_data_t
*sv_secdata
; /* client initiated security data */
634 sv_secinfo_t
*sv_secinfo
; /* server security information */
635 sec_data_t
*sv_currsec
; /* security data currently used; */
636 /* points to one of the sec_data */
637 /* entries in sv_secinfo */
638 sv_secinfo_t
*sv_save_secinfo
; /* saved secinfo */
639 sec_data_t
*sv_savesec
; /* saved security data */
640 sec_data_t
*sv_dhsec
; /* AUTH_DH data from the user land */
641 char *sv_hostname
; /* server's hostname */
642 int sv_hostnamelen
; /* server's hostname length */
643 fattr4_fsid sv_fsid
; /* fsid of shared obj */
644 fattr4_supported_attrs sv_supp_attrs
;
645 struct servinfo4
*sv_next
; /* next in list */
646 nfs_rwlock_t sv_lock
;
649 /* sv_flags fields */
650 #define SV4_TRYSECINFO 0x001 /* try secinfo data from the server */
651 #define SV4_TRYSECDEFAULT 0x002 /* try a default flavor */
652 #define SV4_NOTINUSE 0x004 /* servinfo4_t had fatal errors */
653 #define SV4_ROOT_STALE 0x008 /* root vnode got ESTALE */
656 * Lock call types. See nfs4frlock().
658 typedef enum nfs4_lock_call_type
{
660 NFS4_LCK_CTYPE_RECLAIM
,
661 NFS4_LCK_CTYPE_RESEND
,
662 NFS4_LCK_CTYPE_REINSTATE
663 } nfs4_lock_call_type_t
;
666 * This structure holds the information for a lost open/close/open downgrade/
667 * lock/locku request. It is also used for requests that are queued up so
668 * that the recovery thread can release server state after a forced
670 * "lr_op" is 0 if the struct is uninitialized. Otherwise, it is set to
671 * the proper OP_* nfs_opnum4 number. The other fields contain information
672 * to reconstruct the call.
674 * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the
675 * parent directroy without relying on vtodv (since we may not have a vp
676 * for the file we wish to create).
678 * lr_putfirst means that the request should go to the front of the resend
679 * queue, rather than the end.
681 typedef struct nfs4_lost_rqst
{
686 nfs4_open_owner_t
*lr_oop
;
687 struct nfs4_open_stream
*lr_osp
;
688 struct nfs4_lock_owner
*lr_lop
;
694 nfs4_lock_call_type_t lru_ctype
;
695 nfs_lock_type4 lru_locktype
;
696 } lru_lockargs
; /* LOCK, LOCKU */
698 uint32_t lru_oaccess
;
700 enum open_claim_type4 lru_oclaim
;
701 stateid4 lru_ostateid
; /* reopen only */
702 component4 lru_ofile
;
705 uint32_t lru_dg_access
;
706 uint32_t lru_dg_deny
;
711 #define lr_oacc nfs4_lr_u.lru_open_args.lru_oaccess
712 #define lr_odeny nfs4_lr_u.lru_open_args.lru_odeny
713 #define lr_oclaim nfs4_lr_u.lru_open_args.lru_oclaim
714 #define lr_ostateid nfs4_lr_u.lru_open_args.lru_ostateid
715 #define lr_ofile nfs4_lr_u.lru_open_args.lru_ofile
716 #define lr_dg_acc nfs4_lr_u.lru_open_dg_args.lru_dg_access
717 #define lr_dg_deny nfs4_lr_u.lru_open_dg_args.lru_dg_deny
718 #define lr_ctype nfs4_lr_u.lru_lockargs.lru_ctype
719 #define lr_locktype nfs4_lr_u.lru_lockargs.lru_locktype
722 * Recovery actions. Some actions can imply further recovery using a
723 * different recovery action (e.g., recovering the clientid leads to
724 * recovering open files and locks).
748 * Administrative and debug message framework.
751 #define NFS4_MSG_MAX 100
752 extern int nfs4_msg_max
;
754 #define NFS4_REFERRAL_LOOP_MAX 20
772 RE_UNEXPECTED_ACTION
,
774 RE_UNEXPECTED_STATUS
,
776 RE_LOST_STATE_BAD_OP
,
783 } nfs4_fact_status_t
;
802 typedef struct nfs4_rfact
{
803 nfs4_fact_type_t rf_type
;
804 nfs4_fact_status_t rf_status
;
806 nfs4_recov_t rf_action
;
811 struct rnode4
*rf_rp1
;
815 typedef struct nfs4_revent
{
816 nfs4_event_type_t re_type
;
820 struct mntinfo4
*re_mi
;
821 struct rnode4
*re_rp1
;
822 struct rnode4
*re_rp2
;
825 nfs4_tag_type_t re_tag1
;
826 nfs4_tag_type_t re_tag2
;
836 typedef struct nfs4_debug_msg
{
838 nfs4_msg_type_t msg_type
;
842 nfs4_rfact_t msg_fact
;
843 nfs4_revent_t msg_event
;
845 nfs4_msg_status_t msg_status
;
846 list_node_t msg_node
;
850 * NFS private data per mounted file system
851 * The mi_lock mutex protects the following fields:
874 * Normally the netconfig information for the mount comes from
875 * mi_curr_serv and mi_klmconfig is NULL. If NLM calls need to use a
876 * different transport, mi_klmconfig contains the necessary netconfig
879 * The mi_async_lock mutex protects the following fields:
883 * mi_async_curr[NFS4_MAX_ASYNC_QUEUES]
885 * mi_async_init_clusters
886 * mi_threads[NFS4_MAX_ASYNC_QUEUES]
890 * The nfs4_server_t::s_lock protects the following fields:
896 * The mntinfo4_t::mi_recovlock protects the following fields:
901 * Changing mi_srv from one nfs4_server_t to a different one requires
902 * holding the mi_recovlock as RW_WRITER.
903 * Exception: setting mi_srv the first time in mount/mountroot is done
904 * holding the mi_recovlock as RW_READER.
907 * mi4_globals::mig_lock > mi_async_lock
908 * mi_async_lock > nfs4_server_t::s_lock > mi_lock
909 * mi_recovlock > mi_rename_lock > nfs_rtable4_lock
910 * nfs4_server_t::s_recovlock > mi_recovlock
911 * rnode4_t::r_rwlock > mi_rename_lock
912 * nfs_rtable4_lock > mi_lock
913 * nfs4_server_t::s_lock > mi_msg_list_lock
914 * mi_recovlock > nfs4_server_t::s_lock
915 * mi_recovlock > nfs4_server_lst_lock
917 * The 'mi_oo_list' represents the hash buckets that contain the
918 * nfs4_open_owenrs for this particular mntinfo4.
920 * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4.
921 * 'mi_foo_num' is the current number of freed open owners on the list,
922 * 'mi_foo_max' is the maximum number of freed open owners that are allowable
925 * mi_rootfh and mi_srvparentfh are read-only once created, but that just
926 * refers to the pointer. The contents must be updated to keep in sync
929 * The mi_msg_list_lock protects against adding/deleting entries to the
930 * mi_msg_list, and also the updating/retrieving of mi_lease_period;
932 * 'mi_zone' is initialized at structure creation time, and never
933 * changes; it may be read without a lock.
935 * mi_zone_node is linkage into the mi4_globals.mig_list, and is
936 * protected by mi4_globals.mig_list_lock.
938 * If MI4_EPHEMERAL is set in mi_flags, then mi_ephemeral points to an
939 * ephemeral structure for this ephemeral mount point. It can not be
940 * NULL. Also, mi_ephemeral_tree points to the root of the ephemeral
943 * If MI4_EPHEMERAL is not set in mi_flags, then mi_ephemeral has
944 * to be NULL. If mi_ephemeral_tree is non-NULL, then this node
945 * is the enclosing mntinfo4 for the ephemeral tree.
948 struct nfs4_ephemeral
;
949 struct nfs4_ephemeral_tree
;
951 typedef struct mntinfo4
{
952 kmutex_t mi_lock
; /* protects mntinfo4 fields */
953 struct servinfo4
*mi_servers
; /* server list */
954 struct servinfo4
*mi_curr_serv
; /* current server */
955 struct nfs4_sharedfh
*mi_rootfh
; /* root filehandle */
956 struct nfs4_sharedfh
*mi_srvparentfh
; /* root's parent on server */
957 kcondvar_t mi_failover_cv
; /* failover synchronization */
958 struct vfs
*mi_vfsp
; /* back pointer to vfs */
959 enum vtype mi_type
; /* file type of the root vnode */
960 uint_t mi_flags
; /* see below */
961 uint_t mi_recovflags
; /* if recovery active; see below */
962 kthread_t
*mi_recovthread
; /* active recov thread or NULL */
963 uint_t mi_error
; /* only set/valid when MI4_RECOV_FAIL */
964 /* is set in mi_flags */
965 int mi_tsize
; /* transfer size (bytes) */
966 /* really read size */
967 int mi_stsize
; /* server's max transfer size (bytes) */
968 /* really write size */
969 int mi_timeo
; /* inital timeout in 10th sec */
970 int mi_retrans
; /* times to retry request */
971 hrtime_t mi_acregmin
; /* min time to hold cached file attr */
972 hrtime_t mi_acregmax
; /* max time to hold cached file attr */
973 hrtime_t mi_acdirmin
; /* min time to hold cached dir attr */
974 hrtime_t mi_acdirmax
; /* max time to hold cached dir attr */
975 len_t mi_maxfilesize
; /* for pathconf _PC_FILESIZEBITS */
976 int mi_curread
; /* current read size */
977 int mi_curwrite
; /* current write size */
978 uint_t mi_count
; /* ref count */
980 * Async I/O management
981 * We have 2 pools of threads working on async I/O:
982 * (1) Threads which work on all async queues. Default number of
983 * threads in this queue is 8. Threads in this pool work on async
984 * queue pointed by mi_async_curr[NFS4_ASYNC_QUEUE]. Number of
985 * active threads in this pool is tracked by
986 * mi_threads[NFS4_ASYNC_QUEUE].
987 * (ii)Threads which work only on page op async queues.
988 * Page ops queue comprises of NFS4_PUTAPAGE, NFS4_PAGEIO &
989 * NFS4_COMMIT. Default number of threads in this queue is 2
990 * (NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async
991 * queue pointed by mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE]. Number
992 * of active threads in this pool is tracked by
993 * mi_threads[NFS4_ASYNC_PGOPS_QUEUE].
995 * In addition to above two pools, there is always one thread that
996 * handles over-the-wire requests for fop_inactive.
998 struct nfs4_async_reqs
*mi_async_reqs
[NFS4_ASYNC_TYPES
];
999 struct nfs4_async_reqs
*mi_async_tail
[NFS4_ASYNC_TYPES
];
1000 struct nfs4_async_reqs
**mi_async_curr
[NFS4_MAX_ASYNC_QUEUES
];
1001 /* current async queue */
1002 uint_t mi_async_clusters
[NFS4_ASYNC_TYPES
];
1003 uint_t mi_async_init_clusters
;
1004 uint_t mi_async_req_count
; /* # outstanding work requests */
1005 kcondvar_t mi_async_reqs_cv
; /* signaled when there's work */
1006 ushort_t mi_threads
[NFS4_MAX_ASYNC_QUEUES
];
1007 /* number of active async threads */
1008 ushort_t mi_max_threads
; /* max number of async threads */
1009 kthread_t
*mi_manager_thread
; /* async manager thread id */
1010 kthread_t
*mi_inactive_thread
; /* inactive thread id */
1011 kcondvar_t mi_inact_req_cv
; /* notify fop_inactive thread */
1012 kcondvar_t mi_async_work_cv
[NFS4_MAX_ASYNC_QUEUES
];
1013 /* tell workers to work */
1014 kcondvar_t mi_async_cv
; /* all pool threads exited */
1015 kmutex_t mi_async_lock
;
1019 struct pathcnf
*mi_pathconf
; /* static pathconf kludge */
1020 rpcprog_t mi_prog
; /* RPC program number */
1021 rpcvers_t mi_vers
; /* RPC program version number */
1022 char **mi_rfsnames
; /* mapping to proc names */
1023 kstat_named_t
*mi_reqs
; /* count of requests */
1024 clock_t mi_printftime
; /* last error printf time */
1025 nfs_rwlock_t mi_recovlock
; /* separate ops from recovery (v4) */
1026 time_t mi_grace_wait
; /* non-zero represents time to wait */
1027 /* when we switched nfs4_server_t - only for observability purposes */
1028 time_t mi_srvsettime
;
1029 nfs_rwlock_t mi_rename_lock
; /* atomic volfh rename */
1030 struct nfs4_fname
*mi_fname
; /* root fname */
1031 list_t mi_lost_state
; /* resend list */
1032 list_t mi_bseqid_list
; /* bad seqid list */
1034 * Client Side Failover stats
1036 uint_t mi_noresponse
; /* server not responding count */
1037 uint_t mi_failover
; /* failover to new server count */
1038 uint_t mi_remap
; /* remap to new server count */
1042 struct kstat
*mi_io_kstats
;
1043 struct kstat
*mi_ro_kstats
;
1044 kstat_t
*mi_recov_ksp
; /* ptr to the recovery kstat */
1047 * Volatile fh flags (nfsv4)
1049 uint32_t mi_fh_expire_type
;
1053 struct mntinfo4
*mi_clientid_next
;
1054 struct mntinfo4
*mi_clientid_prev
;
1055 clientid4 mi_clientid
; /* redundant info found in nfs4_server */
1056 int mi_open_files
; /* count of open files */
1057 int mi_in_recovery
; /* count of recovery instances */
1058 kcondvar_t mi_cv_in_recov
; /* cv for recovery threads */
1062 struct nfs4_oo_hash_bucket mi_oo_list
[NFS4_NUM_OO_BUCKETS
];
1067 * Shared filehandle pool.
1069 nfs_rwlock_t mi_fh_lock
;
1070 avl_tree_t mi_filehandles
;
1073 * Debug message queue.
1077 time_t mi_lease_period
;
1079 * not guaranteed to be accurate.
1080 * only should be used by debug queue.
1082 kmutex_t mi_msg_list_lock
;
1086 struct zone
*mi_zone
; /* Zone in which FS is mounted */
1087 zone_ref_t mi_zone_ref
; /* Reference to aforementioned zone */
1088 list_node_t mi_zone_node
; /* linkage into per-zone mi list */
1091 * Links for unmounting ephemeral mounts.
1093 struct nfs4_ephemeral
*mi_ephemeral
;
1094 struct nfs4_ephemeral_tree
*mi_ephemeral_tree
;
1096 uint_t mi_srvset_cnt
; /* increment when changing the nfs4_server_t */
1097 struct nfs4_server
*mi_srv
; /* backpointer to nfs4_server_t */
1099 * Referral related info.
1101 int mi_vfs_referral_loop_cnt
;
1103 * List of rnode4_t structures that belongs to this mntinfo4
1105 kmutex_t mi_rnodes_lock
; /* protects the mi_rnodes list */
1106 list_t mi_rnodes
; /* the list */
1110 * The values for mi_flags.
1112 * MI4_HARD hard or soft mount
1113 * MI4_PRINTED responding message printed
1114 * MI4_INT allow INTR on hard mount
1115 * MI4_DOWN server is down
1116 * MI4_NOAC don't cache attributes
1117 * MI4_NOCTO no close-to-open consistency
1118 * MI4_LLOCK local locking only (no lockmgr)
1119 * MI4_GRPID System V group id inheritance
1120 * MI4_SHUTDOWN System is rebooting or shutting down
1121 * MI4_LINK server supports link
1122 * MI4_SYMLINK server supports symlink
1123 * MI4_EPHEMERAL_RECURSED an ephemeral mount being unmounted
1124 * due to a recursive call - no need
1125 * for additional recursion
1126 * MI4_ACL server supports NFSv4 ACLs
1127 * MI4_MIRRORMOUNT is a mirrormount
1128 * MI4_NOPRINT don't print messages
1129 * MI4_DIRECTIO do direct I/O
1130 * MI4_RECOV_ACTIV filesystem has recovery a thread
1131 * MI4_REMOVE_ON_LAST_CLOSE remove from server's list
1132 * MI4_RECOV_FAIL client recovery failed
1133 * MI4_PUBLIC public/url option used
1134 * MI4_MOUNTING mount in progress, don't failover
1135 * MI4_POSIX_LOCK if server is using POSIX locking
1136 * MI4_LOCK_DEBUG cmn_err'd posix lock err msg
1137 * MI4_DEAD zone has released it
1138 * MI4_INACTIVE_IDLE inactive thread idle
1139 * MI4_BADOWNER_DEBUG badowner error msg per mount
1140 * MI4_ASYNC_MGR_STOP tell async manager to die
1141 * MI4_TIMEDOUT saw a timeout during zone shutdown
1142 * MI4_EPHEMERAL is an ephemeral mount
1144 #define MI4_HARD 0x1
1145 #define MI4_PRINTED 0x2
1147 #define MI4_DOWN 0x8
1148 #define MI4_NOAC 0x10
1149 #define MI4_NOCTO 0x20
1150 #define MI4_LLOCK 0x80
1151 #define MI4_GRPID 0x100
1152 #define MI4_SHUTDOWN 0x200
1153 #define MI4_LINK 0x400
1154 #define MI4_SYMLINK 0x800
1155 #define MI4_EPHEMERAL_RECURSED 0x1000
1156 #define MI4_ACL 0x2000
1157 /* MI4_MIRRORMOUNT is also defined in nfsstat.c */
1158 #define MI4_MIRRORMOUNT 0x4000
1159 #define MI4_REFERRAL 0x8000
1160 /* 0x10000 is available */
1161 #define MI4_NOPRINT 0x20000
1162 #define MI4_DIRECTIO 0x40000
1163 /* 0x80000 is available */
1164 #define MI4_RECOV_ACTIV 0x100000
1165 #define MI4_REMOVE_ON_LAST_CLOSE 0x200000
1166 #define MI4_RECOV_FAIL 0x400000
1167 #define MI4_PUBLIC 0x800000
1168 #define MI4_MOUNTING 0x1000000
1169 #define MI4_POSIX_LOCK 0x2000000
1170 #define MI4_LOCK_DEBUG 0x4000000
1171 #define MI4_DEAD 0x8000000
1172 #define MI4_INACTIVE_IDLE 0x10000000
1173 #define MI4_BADOWNER_DEBUG 0x20000000
1174 #define MI4_ASYNC_MGR_STOP 0x40000000
1175 #define MI4_TIMEDOUT 0x80000000
1177 #define MI4_EPHEMERAL (MI4_MIRRORMOUNT | MI4_REFERRAL)
1179 #define INTR4(vp) (VTOMI4(vp)->mi_flags & MI4_INT)
1181 #define FAILOVER_MOUNT4(mi) (mi->mi_servers->sv_next)
1186 * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag
1187 * that's important), but some flag is needed to indicate that recovery is
1188 * going on for the filesystem.
1190 #define MI4R_NEED_CLIENTID 0x1
1191 #define MI4R_REOPEN_FILES 0x2
1192 #define MI4R_NEED_SECINFO 0x4
1193 #define MI4R_NEED_NEW_SERVER 0x8
1194 #define MI4R_REMAP_FILES 0x10
1195 #define MI4R_SRV_REBOOT 0x20 /* server has rebooted */
1196 #define MI4R_LOST_STATE 0x40
1197 #define MI4R_BAD_SEQID 0x80
1198 #define MI4R_MOVED 0x100
1200 #define MI4_HOLD(mi) { \
1204 #define MI4_RELE(mi) { \
1209 * vfs pointer to mount info
1211 #define VFTOMI4(vfsp) ((mntinfo4_t *)((vfsp)->vfs_data))
1214 * vnode pointer to mount info
1216 #define VTOMI4(vp) ((mntinfo4_t *)(((vp)->v_vfsp)->vfs_data))
1221 * lease_valid is initially set to NFS4_LEASE_NOT_STARTED. This is when the
1222 * nfs4_server is first created. lease_valid is then set to
1223 * NFS4_LEASE_UNITIALIZED when the renew thread is started. The extra state of
1224 * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread
1225 * already exists when we do SETCLIENTID). lease_valid is then set to
1226 * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating
1227 * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as
1228 * the lease is renewed. It is set to NFS4_LEASE_INVALID when the lease
1229 * expires. Client recovery is needed to set the lease back to
1230 * NFS4_LEASE_VALID from NFS4_LEASE_INVALID.
1232 * The s_cred is the credential used to mount the first file system for this
1233 * server. It used as the credential for the renew thread's calls to the
1236 * The renew thread waits on the condition variable cv_thread_exit. If the cv
1237 * is signalled, then the thread knows it must check s_thread_exit to see if
1238 * it should exit. The cv is signaled when the last file system is unmounted
1239 * from a particular server. s_thread_exit is set to 0 upon thread startup,
1240 * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby
1241 * telling the thread to exit. s_thread_exit is needed to avoid spurious
1244 * state_ref_count is incremented every time a new file is opened and
1245 * decremented every time a file is closed otw. This keeps track of whether
1246 * the nfs4_server has state associated with it or not.
1248 * s_refcnt is the reference count for storage management of the struct
1251 * mntinfo4_list points to the doubly linked list of mntinfo4s that share
1252 * this nfs4_server (ie: <clientid, saddr> pair) in the current zone. This is
1253 * needed for a nfs4_server to get a mntinfo4 for use in rfs4call.
1255 * s_recovlock is used to synchronize recovery operations. The thread
1256 * that is recovering the client must acquire it as a writer. If the
1257 * thread is using the clientid (including recovery operations on other
1258 * state), acquire it as a reader.
1260 * The 's_otw_call_count' keeps track of the number of outstanding over the
1261 * wire requests for this structure. The struct will not go away as long
1262 * as this is non-zero (or s_refcnt is non-zero).
1264 * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count'
1265 * variable to let the renew thread when an outstanding otw request has
1268 * 'zoneid' and 'zone_globals' are set at creation of this structure
1269 * and are read-only after that; no lock is required to read them.
1271 * s_lock protects: everything except cv_thread_exit and s_recovlock.
1273 * s_program is used as the index into the nfs4_callback_globals's
1274 * nfs4prog2server table. When a callback request comes in, we can
1275 * use that request's program number (minus NFS4_CALLBACK) as an index
1276 * into the nfs4prog2server. That entry will hold the nfs4_server_t ptr.
1277 * We can then access that nfs4_server_t and its 's_deleg_list' (its list of
1278 * delegated rnode4_ts).
1281 * nfs4_server::s_lock > mntinfo4::mi_lock
1282 * nfs_rtable4_lock > s_lock
1283 * nfs4_server_lst_lock > s_lock
1284 * s_recovlock > s_lock
1286 struct nfs4_callback_globals
;
1288 typedef struct nfs4_server
{
1289 struct nfs4_server
*forw
;
1290 struct nfs4_server
*back
;
1291 struct netbuf saddr
;
1292 uint_t s_flags
; /* see below */
1294 clientid4 clientid
; /* what we get from server */
1295 nfs_client_id4 clidtosend
; /* what we send to server */
1296 mntinfo4_t
*mntinfo4_list
;
1298 time_t s_lease_time
;
1299 time_t last_renewal_time
;
1300 timespec_t propagation_delay
;
1302 kcondvar_t cv_thread_exit
;
1304 int state_ref_count
;
1305 int s_otw_call_count
;
1306 kcondvar_t s_cv_otw_count
;
1307 kcondvar_t s_clientid_pend
;
1309 list_t s_deleg_list
;
1310 rpcprog_t s_program
;
1311 nfs_rwlock_t s_recovlock
;
1312 kcondvar_t wait_cb_null
; /* used to wait for CB_NULL */
1313 zoneid_t zoneid
; /* zone using this nfs4_server_t */
1314 struct nfs4_callback_globals
*zone_globals
; /* globals */
1317 /* nfs4_server flags */
1318 #define N4S_CLIENTID_SET 1 /* server has our clientid */
1319 #define N4S_CLIENTID_PEND 0x2 /* server doesn't have clientid */
1320 #define N4S_CB_PINGED 0x4 /* server has sent us a CB_NULL */
1321 #define N4S_CB_WAITER 0x8 /* is/has wait{ing/ed} for cb_null */
1322 #define N4S_INSERTED 0x10 /* list has reference for server */
1323 #define N4S_BADOWNER_DEBUG 0x20 /* bad owner err msg per client */
1325 #define N4S_CB_PAUSE_TIME 10000 /* Amount of time to pause (10ms) */
1327 struct lease_time_arg
{
1331 enum nfs4_delegreturn_policy
{
1339 * Operation hints for the recovery framework (mostly).
1342 * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR
1343 * These hints exist to allow user visit/readdir a R4SRVSTUB dir.
1344 * (dir represents the root of a server fs that has not yet been
1345 * mounted at client)
1365 * This data structure is used to track ephemeral mounts for both
1366 * mirror mounts and referrals.
1368 * Note that each nfs4_ephemeral can only have one other nfs4_ephemeral
1369 * pointing at it. So we don't need two backpointers to walk
1372 * An ephemeral tree is pointed to by an enclosing non-ephemeral
1373 * mntinfo4. The root is also pointed to by its ephemeral
1374 * mntinfo4. ne_child will get us back to it, while ne_prior
1375 * will get us back to the non-ephemeral mntinfo4. This is an
1376 * edge case we will need to be wary of when walking back up the
1379 * The way we handle this edge case is to have ne_prior be NULL
1380 * for the root nfs4_ephemeral node.
1382 typedef struct nfs4_ephemeral
{
1383 mntinfo4_t
*ne_mount
; /* who encloses us */
1384 struct nfs4_ephemeral
*ne_child
; /* first child node */
1385 struct nfs4_ephemeral
*ne_peer
; /* next sibling */
1386 struct nfs4_ephemeral
*ne_prior
; /* who points at us */
1387 time_t ne_ref_time
; /* time last referenced */
1388 uint_t ne_mount_to
; /* timeout at */
1389 int ne_state
; /* used to traverse */
1393 * State for the node (set in ne_state):
1395 #define NFS4_EPHEMERAL_OK 0x0
1396 #define NFS4_EPHEMERAL_VISIT_CHILD 0x1
1397 #define NFS4_EPHEMERAL_VISIT_SIBLING 0x2
1398 #define NFS4_EPHEMERAL_PROCESS_ME 0x4
1399 #define NFS4_EPHEMERAL_CHILD_ERROR 0x8
1400 #define NFS4_EPHEMERAL_PEER_ERROR 0x10
1403 * These are the locks used in processing ephemeral data:
1407 * net->net_tree_lock
1408 * This lock is used to gate all tree operations.
1409 * If it is held, then no other process may
1410 * traverse the tree. This allows us to not
1411 * throw a hold on each vfs_t in the tree.
1412 * Can be held for a "long" time.
1415 * Used to protect refcnt and status.
1416 * Must be held for a really short time.
1418 * nfs4_ephemeral_thread_lock
1419 * Is only held to create the harvester for the zone.
1420 * There is no ordering imposed on it.
1421 * Held for a really short time.
1423 * Some further detail on the interactions:
1425 * net_tree_lock controls access to net_root. Access needs to first be
1426 * attempted in a non-blocking check.
1428 * net_cnt_lock controls access to net_refcnt and net_status. It must only be
1429 * held for very short periods of time, unless the refcnt is 0 and the status
1432 * Before a caller can grab net_tree_lock, it must first grab net_cnt_lock
1433 * to bump the net_refcnt. It then releases it and does the action specific
1434 * algorithm to get the net_tree_lock. Once it has that, then it is okay to
1435 * grab the net_cnt_lock and change the status. The status can only be
1436 * changed if the caller has the net_tree_lock held as well.
1438 * Note that the initial grab of net_cnt_lock must occur whilst
1439 * mi_lock is being held. This prevents stale data in that if the
1440 * ephemeral tree is non-NULL, then the harvester can not remove
1441 * the tree from the mntinfo node until it grabs that lock. I.e.,
1442 * we get the pointer to the tree and hold the lock atomically
1443 * with respect to being in mi_lock.
1445 * When a caller is done with net_tree_lock, it can decrement the net_refcnt
1446 * either before it releases net_tree_lock or after.
1448 * In either event, to decrement net_refcnt, it must hold net_cnt_lock.
1450 * Note that the overall locking scheme for the nodes is to control access
1451 * via the tree. The current scheme could easily be extended such that
1452 * the enclosing root referenced a "forest" of trees. The underlying trees
1453 * would be autonomous with respect to locks.
1455 * Note that net_next is controlled by external locks
1456 * particular to the data structure that the tree is being added to.
1458 typedef struct nfs4_ephemeral_tree
{
1459 mntinfo4_t
*net_mount
;
1460 nfs4_ephemeral_t
*net_root
;
1461 struct nfs4_ephemeral_tree
*net_next
;
1462 kmutex_t net_tree_lock
;
1463 kmutex_t net_cnt_lock
;
1466 } nfs4_ephemeral_tree_t
;
1469 * State for the tree (set in net_status):
1471 #define NFS4_EPHEMERAL_TREE_OK 0x0
1472 #define NFS4_EPHEMERAL_TREE_BUILDING 0x1
1473 #define NFS4_EPHEMERAL_TREE_DEROOTING 0x2
1474 #define NFS4_EPHEMERAL_TREE_INVALID 0x4
1475 #define NFS4_EPHEMERAL_TREE_MOUNTING 0x8
1476 #define NFS4_EPHEMERAL_TREE_UMOUNTING 0x10
1477 #define NFS4_EPHEMERAL_TREE_LOCKED 0x20
1479 #define NFS4_EPHEMERAL_TREE_PROCESSING (NFS4_EPHEMERAL_TREE_DEROOTING | \
1480 NFS4_EPHEMERAL_TREE_INVALID | NFS4_EPHEMERAL_TREE_UMOUNTING | \
1481 NFS4_EPHEMERAL_TREE_LOCKED)
1484 * This macro evaluates to non-zero if the given op releases state at the
1487 #define OH_IS_STATE_RELE(op) ((op) == OH_CLOSE || (op) == OH_LOCKU || \
1488 (op) == OH_DELEGRETURN)
1492 extern void nfs4_async_manager(struct vfs
*);
1493 extern void nfs4_async_manager_stop(struct vfs
*);
1494 extern void nfs4_async_stop(struct vfs
*);
1495 extern int nfs4_async_stop_sig(struct vfs
*);
1496 extern int nfs4_async_readahead(vnode_t
*, uoff_t
, caddr_t
,
1497 struct seg
*, cred_t
*,
1498 void (*)(vnode_t
*, uoff_t
,
1499 caddr_t
, struct seg
*, cred_t
*));
1500 extern int nfs4_async_putapage(vnode_t
*, page_t
*, uoff_t
, size_t,
1501 int, cred_t
*, int (*)(vnode_t
*, page_t
*,
1502 uoff_t
, size_t, int, cred_t
*));
1503 extern int nfs4_async_pageio(vnode_t
*, page_t
*, uoff_t
, size_t,
1504 int, cred_t
*, int (*)(vnode_t
*, page_t
*,
1505 uoff_t
, size_t, int, cred_t
*));
1506 extern void nfs4_async_commit(vnode_t
*, page_t
*, offset3
, count3
,
1507 cred_t
*, void (*)(vnode_t
*, page_t
*,
1508 offset3
, count3
, cred_t
*));
1509 extern void nfs4_async_inactive(vnode_t
*, cred_t
*);
1510 extern void nfs4_inactive_thread(mntinfo4_t
*mi
);
1511 extern void nfs4_inactive_otw(vnode_t
*, cred_t
*);
1512 extern int nfs4_putpages(vnode_t
*, uoff_t
, size_t, int, cred_t
*);
1514 extern int nfs4_setopts(vnode_t
*, model_t
, struct nfs_args
*);
1515 extern void nfs4_mnt_kstat_init(struct vfs
*);
1517 extern void rfs4call(struct mntinfo4
*, struct COMPOUND4args_clnt
*,
1518 struct COMPOUND4res_clnt
*, cred_t
*, int *, int,
1520 extern void nfs4_acl_fill_cache(struct rnode4
*, vsecattr_t
*);
1521 extern int nfs4_attr_otw(vnode_t
*, nfs4_tag_type_t
,
1522 nfs4_ga_res_t
*, bitmap4
, cred_t
*);
1524 extern void nfs4_attrcache_noinval(vnode_t
*, nfs4_ga_res_t
*, hrtime_t
);
1525 extern void nfs4_attr_cache(vnode_t
*, nfs4_ga_res_t
*,
1526 hrtime_t
, cred_t
*, int,
1528 extern void nfs4_purge_rddir_cache(vnode_t
*);
1529 extern void nfs4_invalidate_pages(vnode_t
*, uoff_t
, cred_t
*);
1530 extern void nfs4_purge_caches(vnode_t
*, int, cred_t
*, int);
1531 extern void nfs4_purge_stale_fh(int, vnode_t
*, cred_t
*);
1532 extern void nfs4_flush_pages(vnode_t
*vp
, cred_t
*cr
);
1534 extern void nfs4rename_update(vnode_t
*, vnode_t
*, nfs_fh4
*, char *);
1535 extern void nfs4_update_paths(vnode_t
*, char *, vnode_t
*, char *,
1538 extern void nfs4args_lookup_free(nfs_argop4
*, int);
1539 extern void nfs4args_copen_free(OPEN4cargs
*);
1541 extern void nfs4_printfhandle(nfs4_fhandle_t
*);
1543 extern void nfs_free_mi4(mntinfo4_t
*);
1544 extern void sv4_free(servinfo4_t
*);
1545 extern void nfs4_mi_zonelist_add(mntinfo4_t
*);
1546 extern int nfs4_mi_zonelist_remove(mntinfo4_t
*);
1547 extern int nfs4_secinfo_recov(mntinfo4_t
*, vnode_t
*, vnode_t
*);
1548 extern void nfs4_secinfo_init(void);
1549 extern void nfs4_secinfo_fini(void);
1550 extern int nfs4_secinfo_path(mntinfo4_t
*, cred_t
*, int);
1551 extern int nfs4_secinfo_vnode_otw(vnode_t
*, char *, cred_t
*);
1552 extern void secinfo_free(sv_secinfo_t
*);
1553 extern void save_mnt_secinfo(servinfo4_t
*);
1554 extern void check_mnt_secinfo(servinfo4_t
*, vnode_t
*);
1555 extern int vattr_to_fattr4(vattr_t
*, vsecattr_t
*, fattr4
*, int,
1556 enum nfs_opnum4
, bitmap4 supp_mask
);
1557 extern int nfs4_putapage(vnode_t
*, page_t
*, uoff_t
*, size_t *,
1559 extern void nfs4_write_error(vnode_t
*, int, cred_t
*);
1560 extern void nfs4_lockcompletion(vnode_t
*, int);
1561 extern bool_t
nfs4_map_lost_lock_conflict(vnode_t
*);
1562 extern int vtodv(vnode_t
*, vnode_t
**, cred_t
*, bool_t
);
1563 extern int vtoname(vnode_t
*, char *, ssize_t
);
1564 extern void nfs4open_confirm(vnode_t
*, seqid4
*, stateid4
*, cred_t
*,
1565 bool_t
, bool_t
*, nfs4_open_owner_t
*, bool_t
,
1566 nfs4_error_t
*, int *);
1567 extern void nfs4_error_zinit(nfs4_error_t
*);
1568 extern void nfs4_error_init(nfs4_error_t
*, int);
1569 extern void nfs4_free_args(struct nfs_args
*);
1571 extern void mi_hold(mntinfo4_t
*);
1572 extern void mi_rele(mntinfo4_t
*);
1574 extern vnode_t
*find_referral_stubvp(vnode_t
*, char *, cred_t
*);
1575 extern int nfs4_setup_referral(vnode_t
*, char *, vnode_t
**, cred_t
*);
1577 extern sec_data_t
*copy_sec_data(sec_data_t
*);
1578 extern gss_clntdata_t
*copy_sec_data_gss(gss_clntdata_t
*);
1581 extern int nfs4_consistent_type(vnode_t
*);
1584 extern void nfs4_init_dot_entries(void);
1585 extern void nfs4_destroy_dot_entries(void);
1586 extern struct nfs4_callback_globals
*nfs4_get_callback_globals(void);
1588 extern struct nfs4_server nfs4_server_lst
;
1590 extern clock_t nfs_write_error_interval
;
1592 #endif /* _KERNEL */
1595 * Flags for nfs4getfh_otw.
1598 #define NFS4_GETFH_PUBLIC 0x01
1599 #define NFS4_GETFH_NEEDSOP 0x02
1602 * Found through rnodes.
1604 * The os_open_ref_count keeps track the number of open file descriptor
1605 * references on this data structure. It will be bumped for any successful
1606 * OTW OPEN call and any OPEN call that determines the OTW call is not
1607 * necessary and the open stream hasn't just been created (see
1608 * nfs4_is_otw_open_necessary).
1610 * os_mapcnt is a count of the number of mmapped pages for a particular
1611 * open stream; this in conjunction w/ os_open_ref_count is used to
1612 * determine when to do a close to the server. This is necessary because
1613 * of the semantics of doing open, mmap, close; the OTW close must be wait
1614 * until all open and mmap references have vanished.
1616 * 'os_valid' tells us whether this structure is about to be freed or not,
1617 * if it is then don't return it in find_open_stream().
1619 * 'os_final_close' is set when a CLOSE OTW was attempted. This is needed
1620 * so we can properly count the os_open_ref_count in cases where we fop_close
1621 * without a fop_open, and have nfs4_inactive() drive the OTW CLOSE. It
1622 * also helps differentiate the fop_open/VN_RELE case from the fop_close
1623 * that tried to close OTW but failed, and left the state cleanup to
1624 * nfs4_inactive/CLOSE_FORCE.
1626 * 'os_force_close' is used to let us know if an intervening thread came
1627 * and reopened the open stream after we decided to issue a CLOSE_FORCE,
1628 * but before we could actually process the CLOSE_FORCE.
1630 * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the
1633 * 'open_stateid' is set to the last open stateid returned by the server unless
1634 * 'os_delegation' is 1, in which case 'open_stateid' refers to the
1635 * delegation stateid returned by the server. This is used in cases where the
1636 * client tries to OPEN a file but already has a suitable delegation, so we
1637 * just stick the delegation stateid in the open stream.
1639 * os_dc_openacc are open access bits which have been granted to the
1640 * open stream by virtue of a delegation, but which have not been seen
1641 * by the server. This applies even if the open stream does not have
1642 * os_delegation set. These bits are used when setting file locks to
1643 * determine whether an open with CLAIM_DELEGATE_CUR needs to be done
1644 * before the lock request can be sent to the server. See
1645 * nfs4frlock_check_deleg().
1647 * 'os_mmap_read/write' keep track of the read and write access our memory
1648 * maps require. We need to keep track of this so we can provide the proper
1649 * access bits in the open/mmap/close/reboot/reopen case.
1651 * 'os_failed_reopen' tells us that we failed to successfully reopen this
1652 * open stream; therefore, we should not use this open stateid as it is
1653 * not valid anymore. This flag is also used to indicate an unsuccessful
1654 * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR.
1656 * If 'os_orig_oo_name' is different than os_open_owner's oo_name
1657 * then this tells us that this open stream's open owner used a
1658 * bad seqid (that is, got NFS4ERR_BAD_SEQID). If different, this open
1659 * stream will no longer be used for future OTW state releasing calls.
1662 * rnode4_t::r_os_lock > os_sync_lock
1663 * os_sync_lock > rnode4_t::r_statelock
1664 * os_sync_lock > rnode4_t::r_statev4_lock
1665 * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call)
1667 * The 'os_sync_lock' protects:
1680 * os_share_acc_write
1681 * os_share_deny_none
1682 * os_share_deny_read
1683 * os_share_deny_write
1687 * The rnode4_t::r_os_lock protects:
1690 * These fields are set at creation time and
1691 * read only after that:
1695 typedef struct nfs4_open_stream
{
1696 uint64_t os_share_acc_read
;
1697 uint64_t os_share_acc_write
;
1698 uint64_t os_mmap_read
;
1699 uint64_t os_mmap_write
;
1700 uint32_t os_share_deny_none
;
1701 uint32_t os_share_deny_read
;
1702 uint32_t os_share_deny_write
;
1703 stateid4 open_stateid
;
1706 unsigned os_valid
:1;
1707 unsigned os_delegation
:1;
1708 unsigned os_final_close
:1;
1709 unsigned os_pending_close
:1;
1710 unsigned os_failed_reopen
:1;
1711 unsigned os_force_close
:1;
1712 int os_open_ref_count
;
1714 list_node_t os_node
;
1715 struct nfs4_open_owner
*os_open_owner
;
1716 uint64_t os_orig_oo_name
;
1717 kmutex_t os_sync_lock
;
1718 } nfs4_open_stream_t
;
1721 * This structure describes the format of the lock_owner_name
1722 * field of the lock owner.
1725 typedef struct nfs4_lo_name
{
1726 uint64_t ln_seq_num
;
1731 * Flags for lo_flags.
1733 #define NFS4_LOCK_SEQID_INUSE 0x1
1734 #define NFS4_BAD_SEQID_LOCK 0x2
1737 * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs
1738 * off the rnode. If the links are NULL it means this object is not on the
1741 * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and
1742 * didn't get a response back. This is used to figure out if we have
1743 * possible remote v4 locks, so that we can clean up at process exit. In
1744 * theory, the client should be able to figure out if the server received
1745 * the request (based on what seqid works), so maybe we can get rid of this
1748 * 'lo_ref_count' tells us how many processes/threads are using this data
1749 * structure. The rnode's list accounts for one reference.
1751 * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the
1752 * data structure. It is then set to NFS4_PERM_CREATED when a lock request
1753 * is successful using this lock owner structure. We need to keep 'temporary'
1754 * lock owners around so we can properly keep the lock seqid synchronization
1755 * when multiple processes/threads are trying to create the lock owner for the
1756 * first time (especially with the DENIED error case). Once
1757 * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change.
1759 * 'lo_valid' tells us whether this structure is about to be freed or not,
1760 * if it is then don't return it from find_lock_owner().
1762 * Retrieving and setting of 'lock_seqid' is protected by the
1763 * NFS4_LOCK_SEQID_INUSE flag. Waiters for NFS4_LOCK_SEQID_INUSE should
1764 * use 'lo_cv_seqid_sync'.
1766 * The setting of 'lock_stateid' is protected by the
1767 * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'. The retrieving of the
1768 * 'lock_stateid' is protected by 'lo_lock', with the additional
1769 * requirement that the calling function can handle NFS4ERR_OLD_STATEID and
1770 * NFS4ERR_BAD_STATEID as appropiate.
1772 * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock
1773 * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID). With this set,
1774 * this lock owner will no longer be used for future OTW calls. Once set,
1775 * it is never unset.
1778 * rnode4_t::r_statev4_lock > lo_lock
1780 typedef struct nfs4_lock_owner
{
1781 struct nfs4_lock_owner
*lo_next_rnode
;
1782 struct nfs4_lock_owner
*lo_prev_rnode
;
1784 stateid4 lock_stateid
;
1787 * Fix this to always be 12 bytes
1789 nfs4_lo_name_t lock_owner_name
;
1792 int lo_pending_rqsts
;
1793 int lo_just_created
;
1795 kcondvar_t lo_cv_seqid_sync
;
1797 kthread_t
*lo_seqid_holder
; /* debugging aid */
1798 } nfs4_lock_owner_t
;
1800 /* for nfs4_lock_owner_t lookups */
1801 typedef enum {LOWN_ANY
, LOWN_VALID_STATEID
} lown_which_t
;
1803 /* Number of times to retry a call that fails with state independent error */
1804 #define NFS4_NUM_RECOV_RETRIES 3
1812 } nfs4_stateid_type_t
;
1814 typedef struct nfs4_stateid_types
{
1818 nfs4_stateid_type_t cur_sid_type
;
1819 } nfs4_stateid_types_t
;
1822 * Per-zone data for dealing with callbacks. Included here solely for the
1825 struct nfs4_callback_stats
{
1826 kstat_named_t delegations
;
1827 kstat_named_t cb_getattr
;
1828 kstat_named_t cb_recall
;
1829 kstat_named_t cb_null
;
1830 kstat_named_t cb_dispatch
;
1831 kstat_named_t delegaccept_r
;
1832 kstat_named_t delegaccept_rw
;
1833 kstat_named_t delegreturn
;
1834 kstat_named_t callbacks
;
1835 kstat_named_t claim_cur
;
1836 kstat_named_t claim_cur_ok
;
1837 kstat_named_t recall_trunc
;
1838 kstat_named_t recall_failed
;
1839 kstat_named_t return_limit_write
;
1840 kstat_named_t return_limit_addmap
;
1841 kstat_named_t deleg_recover
;
1842 kstat_named_t cb_illegal
;
1845 struct nfs4_callback_globals
{
1846 kmutex_t nfs4_cb_lock
;
1847 kmutex_t nfs4_dlist_lock
;
1848 int nfs4_program_hint
;
1849 /* this table maps the program number to the nfs4_server structure */
1850 struct nfs4_server
**nfs4prog2server
;
1852 list_t nfs4_cb_ports
;
1853 struct nfs4_callback_stats nfs4_callback_stats
;
1855 int nfs4_dlistadd_c
;
1856 int nfs4_dlistclean_c
;
1866 } nfs4_close_type_t
;
1869 * Structure to hold the bad seqid information that is passed
1870 * to the recovery framework.
1872 typedef struct nfs4_bseqid_entry
{
1873 nfs4_open_owner_t
*bs_oop
;
1874 nfs4_lock_owner_t
*bs_lop
;
1877 nfs4_tag_type_t bs_tag
;
1879 list_node_t bs_node
;
1880 } nfs4_bseqid_entry_t
;
1884 extern void nfs4close_one(vnode_t
*, nfs4_open_stream_t
*, cred_t
*, int,
1885 nfs4_lost_rqst_t
*, nfs4_error_t
*, nfs4_close_type_t
,
1886 size_t, uint_t
, uint_t
);
1887 extern void nfs4close_notw(vnode_t
*, nfs4_open_stream_t
*, int *);
1888 extern void nfs4_set_lock_stateid(nfs4_lock_owner_t
*, stateid4
);
1889 extern void open_owner_hold(nfs4_open_owner_t
*);
1890 extern void open_owner_rele(nfs4_open_owner_t
*);
1891 extern nfs4_open_stream_t
*find_or_create_open_stream(nfs4_open_owner_t
*,
1892 struct rnode4
*, int *);
1893 extern nfs4_open_stream_t
*find_open_stream(nfs4_open_owner_t
*,
1895 extern nfs4_open_stream_t
*create_open_stream(nfs4_open_owner_t
*oop
,
1897 extern void open_stream_hold(nfs4_open_stream_t
*);
1898 extern void open_stream_rele(nfs4_open_stream_t
*, struct rnode4
*);
1899 extern int nfs4close_all(vnode_t
*, cred_t
*);
1900 extern void lock_owner_hold(nfs4_lock_owner_t
*);
1901 extern void lock_owner_rele(nfs4_lock_owner_t
*);
1902 extern nfs4_lock_owner_t
*create_lock_owner(struct rnode4
*, pid_t
);
1903 extern nfs4_lock_owner_t
*find_lock_owner(struct rnode4
*, pid_t
, lown_which_t
);
1904 extern void nfs4_rnode_remove_lock_owner(struct rnode4
*,
1905 nfs4_lock_owner_t
*);
1906 extern void nfs4_flush_lock_owners(struct rnode4
*);
1907 extern void nfs4_setlockowner_args(lock_owner4
*, struct rnode4
*, pid_t
);
1908 extern void nfs4_set_open_seqid(seqid4
, nfs4_open_owner_t
*,
1910 extern void nfs4_set_lock_seqid(seqid4
, nfs4_lock_owner_t
*);
1911 extern void nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t
*,
1913 extern void nfs4_end_open_seqid_sync(nfs4_open_owner_t
*);
1914 extern int nfs4_start_open_seqid_sync(nfs4_open_owner_t
*, mntinfo4_t
*);
1915 extern void nfs4_end_lock_seqid_sync(nfs4_lock_owner_t
*);
1916 extern int nfs4_start_lock_seqid_sync(nfs4_lock_owner_t
*, mntinfo4_t
*);
1917 extern void nfs4_setup_lock_args(nfs4_lock_owner_t
*, nfs4_open_owner_t
*,
1918 nfs4_open_stream_t
*, clientid4
, locker4
*);
1919 extern void nfs4_destroy_open_owner(nfs4_open_owner_t
*);
1921 extern void nfs4_renew_lease_thread(nfs4_server_t
*);
1922 extern nfs4_server_t
*find_nfs4_server(mntinfo4_t
*);
1923 extern nfs4_server_t
*find_nfs4_server_all(mntinfo4_t
*, int all
);
1924 extern nfs4_server_t
*new_nfs4_server(servinfo4_t
*, cred_t
*);
1925 extern void nfs4_mark_srv_dead(nfs4_server_t
*);
1926 extern nfs4_server_t
*servinfo4_to_nfs4_server(servinfo4_t
*);
1927 extern void nfs4_inc_state_ref_count(mntinfo4_t
*);
1928 extern void nfs4_inc_state_ref_count_nolock(nfs4_server_t
*,
1930 extern void nfs4_dec_state_ref_count(mntinfo4_t
*);
1931 extern void nfs4_dec_state_ref_count_nolock(nfs4_server_t
*,
1933 extern clientid4
mi2clientid(mntinfo4_t
*);
1934 extern int nfs4_server_in_recovery(nfs4_server_t
*);
1935 extern bool_t
nfs4_server_vlock(nfs4_server_t
*, int);
1936 extern nfs4_open_owner_t
*create_open_owner(cred_t
*, mntinfo4_t
*);
1937 extern uint64_t nfs4_get_new_oo_name(void);
1938 extern nfs4_open_owner_t
*find_open_owner(cred_t
*, int, mntinfo4_t
*);
1939 extern nfs4_open_owner_t
*find_open_owner_nolock(cred_t
*, int, mntinfo4_t
*);
1940 extern void nfs4frlock(nfs4_lock_call_type_t
, vnode_t
*, int, flock64_t
*,
1941 int, uoff_t
, cred_t
*, nfs4_error_t
*,
1942 nfs4_lost_rqst_t
*, int *);
1943 extern void nfs4open_dg_save_lost_rqst(int, nfs4_lost_rqst_t
*,
1944 nfs4_open_owner_t
*, nfs4_open_stream_t
*, cred_t
*,
1945 vnode_t
*, int, int);
1946 extern void nfs4_open_downgrade(int, int, nfs4_open_owner_t
*,
1947 nfs4_open_stream_t
*, vnode_t
*, cred_t
*,
1948 nfs4_lost_rqst_t
*, nfs4_error_t
*, cred_t
**, seqid4
*);
1949 extern seqid4
nfs4_get_open_seqid(nfs4_open_owner_t
*);
1950 extern cred_t
*nfs4_get_otw_cred(cred_t
*, mntinfo4_t
*, nfs4_open_owner_t
*);
1951 extern void nfs4_init_stateid_types(nfs4_stateid_types_t
*);
1952 extern void nfs4_save_stateid(stateid4
*, nfs4_stateid_types_t
*);
1954 extern kmutex_t nfs4_server_lst_lock
;
1956 extern void nfs4callback_destroy(nfs4_server_t
*);
1957 extern void nfs4_callback_init(void);
1958 extern void nfs4_callback_fini(void);
1959 extern void nfs4_cb_args(nfs4_server_t
*, struct knetconfig
*,
1960 SETCLIENTID4args
*);
1961 extern void nfs4delegreturn_async(struct rnode4
*, int, bool_t
);
1963 extern enum nfs4_delegreturn_policy nfs4_delegreturn_policy
;
1965 extern void nfs4_add_mi_to_server(nfs4_server_t
*, mntinfo4_t
*);
1966 extern void nfs4_remove_mi_from_server(mntinfo4_t
*, nfs4_server_t
*);
1967 extern nfs4_server_t
*nfs4_move_mi(mntinfo4_t
*, servinfo4_t
*, servinfo4_t
*);
1968 extern bool_t
nfs4_fs_active(nfs4_server_t
*);
1969 extern void nfs4_server_rele(nfs4_server_t
*);
1970 extern bool_t
inlease(nfs4_server_t
*);
1971 extern bool_t
nfs4_has_pages(vnode_t
*);
1972 extern void nfs4_log_badowner(mntinfo4_t
*, nfs_opnum4
);
1974 #endif /* _KERNEL */
1977 * Client State Recovery
1981 * The following defines are used for rs_flags in
1982 * a nfs4_recov_state_t structure.
1984 * NFS4_RS_RENAME_HELD Indicates that the mi_rename_lock was held.
1985 * NFS4_RS_GRACE_MSG Set once we have uprintf'ed a grace message.
1986 * NFS4_RS_DELAY_MSG Set once we have uprintf'ed a delay message.
1987 * NFS4_RS_RECALL_HELD1 r_deleg_recall_lock for vp1 was held.
1988 * NFS4_RS_RECALL_HELD2 r_deleg_recall_lock for vp2 was held.
1990 #define NFS4_RS_RENAME_HELD 0x000000001
1991 #define NFS4_RS_GRACE_MSG 0x000000002
1992 #define NFS4_RS_DELAY_MSG 0x000000004
1993 #define NFS4_RS_RECALL_HELD1 0x000000008
1994 #define NFS4_RS_RECALL_HELD2 0x000000010
1997 * Information that is retrieved from nfs4_start_op() and that is
1998 * passed into nfs4_end_op().
2000 * rs_sp is a reference to the nfs4_server that was found, or NULL.
2002 * rs_num_retry_despite_err is the number times client retried an
2003 * OTW op despite a recovery error. It is only incremented for hints
2004 * exempt to normal R4RECOVERR processing
2005 * (OH_CLOSE/OH_LOCKU/OH_DELEGRETURN). (XXX this special-case code
2006 * needs review for possible removal.)
2007 * It is initialized wherever nfs4_recov_state_t is declared -- usually
2008 * very near initialization of rs_flags.
2011 nfs4_server_t
*rs_sp
;
2013 int rs_num_retry_despite_err
;
2014 } nfs4_recov_state_t
;
2017 * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root.
2020 #define NFS4_REMAP_CKATTRS 1
2021 #define NFS4_REMAP_NEEDSOP 2
2025 extern int nfs4_is_otw_open_necessary(nfs4_open_owner_t
*, int,
2026 vnode_t
*, int, int *, int, nfs4_recov_state_t
*);
2027 extern void nfs4setclientid(struct mntinfo4
*, struct cred
*, bool_t
,
2029 extern void nfs4_reopen(vnode_t
*, nfs4_open_stream_t
*, nfs4_error_t
*,
2030 open_claim_type4
, bool_t
, bool_t
);
2031 extern void nfs4_remap_root(struct mntinfo4
*, nfs4_error_t
*, int);
2032 extern void nfs4_check_remap(mntinfo4_t
*mi
, vnode_t
*vp
, int,
2034 extern void nfs4_remap_file(mntinfo4_t
*mi
, vnode_t
*vp
, int,
2036 extern int nfs4_make_dotdot(struct nfs4_sharedfh
*, hrtime_t
,
2037 vnode_t
*, cred_t
*, vnode_t
**, int);
2038 extern void nfs4_fail_recov(vnode_t
*, char *, int, nfsstat4
);
2040 extern int nfs4_needs_recovery(nfs4_error_t
*, bool_t
, vfs_t
*);
2041 extern int nfs4_recov_marks_dead(nfsstat4
);
2042 extern bool_t
nfs4_start_recovery(nfs4_error_t
*, struct mntinfo4
*,
2043 vnode_t
*, vnode_t
*, stateid4
*,
2044 nfs4_lost_rqst_t
*, nfs_opnum4
, nfs4_bseqid_entry_t
*,
2046 extern int nfs4_start_op(struct mntinfo4
*, vnode_t
*, vnode_t
*,
2047 nfs4_recov_state_t
*);
2048 extern void nfs4_end_op(struct mntinfo4
*, vnode_t
*, vnode_t
*,
2049 nfs4_recov_state_t
*, bool_t
);
2050 extern int nfs4_start_fop(struct mntinfo4
*, vnode_t
*, vnode_t
*,
2051 nfs4_op_hint_t
, nfs4_recov_state_t
*, bool_t
*);
2052 extern void nfs4_end_fop(struct mntinfo4
*, vnode_t
*, vnode_t
*,
2053 nfs4_op_hint_t
, nfs4_recov_state_t
*, bool_t
);
2054 extern char *nfs4_recov_action_to_str(nfs4_recov_t
);
2057 * In sequence, code desiring to unmount an ephemeral tree must
2058 * call nfs4_ephemeral_umount, nfs4_ephemeral_umount_activate,
2059 * and nfs4_ephemeral_umount_unlock. The _unlock must also be
2060 * called on all error paths that occur before it would naturally
2063 * The caller must also provde a pointer to a boolean to keep track
2064 * of whether or not the code in _unlock is to be ran.
2066 extern void nfs4_ephemeral_umount_activate(mntinfo4_t
*,
2067 bool_t
*, nfs4_ephemeral_tree_t
**);
2068 extern int nfs4_ephemeral_umount(mntinfo4_t
*, int, cred_t
*,
2069 bool_t
*, nfs4_ephemeral_tree_t
**);
2070 extern void nfs4_ephemeral_umount_unlock(bool_t
*,
2071 nfs4_ephemeral_tree_t
**);
2073 extern int nfs4_record_ephemeral_mount(mntinfo4_t
*mi
, vnode_t
*mvp
);
2075 extern int nfs4_callmapid(utf8string
*, struct nfs_fsl_info
*);
2076 extern int nfs4_fetch_locations(mntinfo4_t
*, struct nfs4_sharedfh
*,
2077 char *, cred_t
*, nfs4_ga_res_t
*, COMPOUND4res_clnt
*, bool_t
);
2079 extern int wait_for_recall(vnode_t
*, vnode_t
*, nfs4_op_hint_t
,
2080 nfs4_recov_state_t
*);
2081 extern void nfs4_end_op_recall(vnode_t
*, vnode_t
*, nfs4_recov_state_t
*);
2082 extern void nfs4_send_siglost(pid_t
, mntinfo4_t
*mi
, vnode_t
*vp
, bool_t
,
2084 extern time_t nfs4err_delay_time
;
2085 extern void nfs4_set_grace_wait(mntinfo4_t
*);
2086 extern void nfs4_set_delay_wait(vnode_t
*);
2087 extern int nfs4_wait_for_grace(mntinfo4_t
*, nfs4_recov_state_t
*);
2088 extern int nfs4_wait_for_delay(vnode_t
*, nfs4_recov_state_t
*);
2089 extern nfs4_bseqid_entry_t
*nfs4_create_bseqid_entry(nfs4_open_owner_t
*,
2090 nfs4_lock_owner_t
*, vnode_t
*, pid_t
, nfs4_tag_type_t
,
2093 extern void nfs4_resend_open_otw(vnode_t
**, nfs4_lost_rqst_t
*,
2095 extern void nfs4_resend_delegreturn(nfs4_lost_rqst_t
*, nfs4_error_t
*,
2097 extern int nfs4_rpc_retry_error(int);
2098 extern int nfs4_try_failover(nfs4_error_t
*);
2099 extern void nfs4_free_msg(nfs4_debug_msg_t
*);
2100 extern void nfs4_mnt_recov_kstat_init(vfs_t
*);
2101 extern void nfs4_mi_kstat_inc_delay(mntinfo4_t
*);
2102 extern void nfs4_mi_kstat_inc_no_grace(mntinfo4_t
*);
2103 extern char *nfs4_stat_to_str(nfsstat4
);
2104 extern char *nfs4_op_to_str(nfs_opnum4
);
2106 extern void nfs4_queue_event(nfs4_event_type_t
, mntinfo4_t
*, char *,
2107 uint_t
, vnode_t
*, vnode_t
*, nfsstat4
, char *, pid_t
,
2108 nfs4_tag_type_t
, nfs4_tag_type_t
, seqid4
, seqid4
);
2109 extern void nfs4_queue_fact(nfs4_fact_type_t
, mntinfo4_t
*, nfsstat4
,
2110 nfs4_recov_t
, nfs_opnum4
, bool_t
, char *, int, vnode_t
*);
2111 #pragma rarely_called(nfs4_queue_event)
2112 #pragma rarely_called(nfs4_queue_fact)
2114 /* Used for preformed "." and ".." dirents */
2115 extern char *nfs4_dot_entries
;
2116 extern char *nfs4_dot_dot_entry
;
2119 extern uint_t nfs4_tsd_key
;
2122 #endif /* _KERNEL */
2125 * Filehandle management.
2127 * Filehandles can change in v4, so rather than storing the filehandle
2128 * directly in the rnode, etc., we manage the filehandle through one of
2130 * Locking: sfh_fh and sfh_tree is protected by the filesystem's
2131 * mi_fh_lock. The reference count and flags are protected by sfh_lock.
2132 * sfh_mi is read-only.
2134 * mntinfo4_t::mi_fh_lock > sfh_lock.
2137 typedef struct nfs4_sharedfh
{
2138 nfs_fh4 sfh_fh
; /* key and current filehandle */
2140 uint_t sfh_refcnt
; /* reference count */
2142 mntinfo4_t
*sfh_mi
; /* backptr to filesystem */
2143 avl_node_t sfh_tree
; /* used by avl package */
2146 #define SFH4_SAME(sfh1, sfh2) ((sfh1) == (sfh2))
2151 #define SFH4_IN_TREE 0x1 /* currently in an AVL tree */
2155 extern void sfh4_createtab(avl_tree_t
*);
2156 extern nfs4_sharedfh_t
*sfh4_get(const nfs_fh4
*, mntinfo4_t
*);
2157 extern nfs4_sharedfh_t
*sfh4_put(const nfs_fh4
*, mntinfo4_t
*,
2159 extern void sfh4_update(nfs4_sharedfh_t
*, const nfs_fh4
*);
2160 extern void sfh4_copyval(const nfs4_sharedfh_t
*, nfs4_fhandle_t
*);
2161 extern void sfh4_hold(nfs4_sharedfh_t
*);
2162 extern void sfh4_rele(nfs4_sharedfh_t
**);
2163 extern void sfh4_printfhandle(const nfs4_sharedfh_t
*);
2168 * Path and file name management.
2170 * This type stores the name of an entry in the filesystem and keeps enough
2171 * information that it can provide a complete path. All fields are
2172 * protected by fn_lock, except for the reference count, which is managed
2173 * using atomic add/subtract.
2175 * Additionally shared filehandle for this fname is stored.
2176 * Normally, fn_get() when it creates this fname stores the passed in
2177 * shared fh in fn_sfh by doing sfh_hold. Similarly the path which
2178 * destroys this fname releases the reference on this fh by doing sfh_rele.
2180 * fn_get uses the fn_sfh to refine the comparision in cases
2181 * where we have matched the name but have differing file handles,
2182 * this normally happens due to
2184 * 1. Server side rename of a file/directory.
2185 * 2. Another client renaming a file/directory on the server.
2187 * Differing names but same filehandle is possible as in the case of hardlinks,
2188 * but differing filehandles with same name component will later confuse
2189 * the client and can cause various panics.
2191 * Lock order: child and then parent.
2194 typedef struct nfs4_fname
{
2195 struct nfs4_fname
*fn_parent
; /* parent name; null if fs root */
2196 char *fn_name
; /* the actual name */
2197 ssize_t fn_len
; /* strlen(fn_name) */
2198 uint32_t fn_refcnt
; /* reference count */
2201 avl_tree_t fn_children
; /* children, if any */
2202 nfs4_sharedfh_t
*fn_sfh
; /* The fh for this fname */
2207 extern vnode_t nfs4_xattr_notsupp_vnode
;
2208 #define NFS4_XATTR_DIR_NOTSUPP &nfs4_xattr_notsupp_vnode
2210 extern nfs4_fname_t
*fn_get(nfs4_fname_t
*, char *, nfs4_sharedfh_t
*);
2211 extern void fn_hold(nfs4_fname_t
*);
2212 extern void fn_rele(nfs4_fname_t
**);
2213 extern char *fn_name(nfs4_fname_t
*);
2214 extern char *fn_path(nfs4_fname_t
*);
2215 extern void fn_move(nfs4_fname_t
*, nfs4_fname_t
*, char *);
2216 extern nfs4_fname_t
*fn_parent(nfs4_fname_t
*);
2218 /* Referral Support */
2219 extern int nfs4_process_referral(mntinfo4_t
*, nfs4_sharedfh_t
*, char *,
2220 cred_t
*, nfs4_ga_res_t
*, COMPOUND4res_clnt
*, struct nfs_fsl_info
*);
2225 * Per-zone data for managing client handles, included in this file for the
2229 struct chhead
*nfscl_chtable4
;
2230 kmutex_t nfscl_chtable4_lock
;
2231 zoneid_t nfscl_zoneid
;
2232 list_node_t nfscl_node
;
2233 struct clstat4 nfscl_stat
;
2240 #endif /* _NFS4_CLNT_H */