kernel/fs/nfs/nfs4_rnode.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All Rights Reserved
  29  */
  30
  31 /*
  32  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  33  * Copyright (c) 2017 by Delphix. All rights reserved.
  34  */
  35
  36 #include <sys/param.h>
  37 #include <sys/types.h>
  38 #include <sys/systm.h>
  39 #include <sys/cred.h>
  40 #include <sys/proc.h>
  41 #include <sys/user.h>
  42 #include <sys/time.h>
  43 #include <sys/buf.h>
  44 #include <sys/vfs.h>
  45 #include <sys/vnode.h>
  46 #include <sys/socket.h>
  47 #include <sys/uio.h>
  48 #include <sys/tiuser.h>
  49 #include <sys/swap.h>
  50 #include <sys/errno.h>
  51 #include <sys/debug.h>
  52 #include <sys/kmem.h>
  53 #include <sys/kstat.h>
  54 #include <sys/cmn_err.h>
  55 #include <sys/vtrace.h>
  56 #include <sys/session.h>
  57 #include <sys/dnlc.h>
  58 #include <sys/bitmap.h>
  59 #include <sys/acl.h>
  60 #include <sys/ddi.h>
  61 #include <sys/pathname.h>
  62 #include <sys/flock.h>
  63 #include <sys/dirent.h>
  64 #include <sys/flock.h>
  65 #include <sys/callb.h>
  66 #include <sys/sdt.h>
  67
  68 #include <vm/pvn.h>
  69
  70 #include <rpc/types.h>
  71 #include <rpc/xdr.h>
  72 #include <rpc/auth.h>
  73 #include <rpc/rpcsec_gss.h>
  74 #include <rpc/clnt.h>
  75
  76 #include <nfs/nfs.h>
  77 #include <nfs/nfs_clnt.h>
  78 #include <nfs/nfs_acl.h>
  79
  80 #include <nfs/nfs4.h>
  81 #include <nfs/rnode4.h>
  82 #include <nfs/nfs4_clnt.h>
  83
  84 /*
  85  * The hash queues for the access to active and cached rnodes
  86  * are organized as doubly linked lists.  A reader/writer lock
  87  * for each hash bucket is used to control access and to synchronize
  88  * lookups, additions, and deletions from the hash queue.
  89  *
  90  * The rnode freelist is organized as a doubly linked list with
  91  * a head pointer.  Additions and deletions are synchronized via
  92  * a single mutex.
  93  *
  94  * In order to add an rnode to the free list, it must be hashed into
  95  * a hash queue and the exclusive lock to the hash queue be held.
  96  * If an rnode is not hashed into a hash queue, then it is destroyed
  97  * because it represents no valuable information that can be reused
  98  * about the file.  The exclusive lock to the hash queue must be
  99  * held in order to prevent a lookup in the hash queue from finding
 100  * the rnode and using it and assuming that the rnode is not on the
 101  * freelist.  The lookup in the hash queue will have the hash queue
 102  * locked, either exclusive or shared.
 103  *
 104  * The vnode reference count for each rnode is not allowed to drop
 105  * below 1.  This prevents external entities, such as the VM
 106  * subsystem, from acquiring references to vnodes already on the
 107  * freelist and then trying to place them back on the freelist
 108  * when their reference is released.  This means that the when an
 109  * rnode is looked up in the hash queues, then either the rnode
 110  * is removed from the freelist and that reference is transferred to
 111  * the new reference or the vnode reference count must be incremented
 112  * accordingly.  The mutex for the freelist must be held in order to
 113  * accurately test to see if the rnode is on the freelist or not.
 114  * The hash queue lock might be held shared and it is possible that
 115  * two different threads may race to remove the rnode from the
 116  * freelist.  This race can be resolved by holding the mutex for the
 117  * freelist.  Please note that the mutex for the freelist does not
 118  * need to be held if the rnode is not on the freelist.  It can not be
 119  * placed on the freelist due to the requirement that the thread
 120  * putting the rnode on the freelist must hold the exclusive lock
 121  * to the hash queue and the thread doing the lookup in the hash
 122  * queue is holding either a shared or exclusive lock to the hash
 123  * queue.
 124  *
 125  * The lock ordering is:
 126  *
 127  *      hash bucket lock -> vnode lock
 128  *      hash bucket lock -> freelist lock -> r_statelock
 129  */
 130 r4hashq_t *rtable4;
 131
 132 static kmutex_t rp4freelist_lock;
 133 static rnode4_t *rp4freelist = NULL;
 134 static long rnode4_new = 0;
 135 int rtable4size;
 136 static int rtable4mask;
 137 static struct kmem_cache *rnode4_cache;
 138 static int rnode4_hashlen = 4;
 139
 140 static void     r4inactive(rnode4_t *, cred_t *);
 141 static vnode_t  *make_rnode4(nfs4_sharedfh_t *, r4hashq_t *, struct vfs *,
 142                     const struct vnodeops *,
 143                     int (*)(vnode_t *, page_t *, uoff_t *, size_t *, int,
 144                     cred_t *),
 145                     int *, cred_t *);
 146 static void     rp4_rmfree(rnode4_t *);
 147 int             nfs4_free_data_reclaim(rnode4_t *);
 148 static int      nfs4_active_data_reclaim(rnode4_t *);
 149 static int      nfs4_free_reclaim(void);
 150 static int      nfs4_active_reclaim(void);
 151 static int      nfs4_rnode_reclaim(void);
 152 static void     nfs4_reclaim(void *);
 153 static int      isrootfh(nfs4_sharedfh_t *, rnode4_t *);
 154 static void     uninit_rnode4(rnode4_t *);
 155 static void     destroy_rnode4(rnode4_t *);
 156 static void     r4_stub_set(rnode4_t *, nfs4_stub_type_t);
 157
 158 #ifdef DEBUG
 159 static int r4_check_for_dups = 0; /* Flag to enable dup rnode detection. */
 160 static int nfs4_rnode_debug = 0;
 161 /* if nonzero, kmem_cache_free() rnodes rather than place on freelist */
 162 static int nfs4_rnode_nofreelist = 0;
 163 /* give messages on colliding shared filehandles */
 164 static void     r4_dup_check(rnode4_t *, vfs_t *);
 165 #endif
 166
 167 /*
 168  * If the vnode has pages, run the list and check for any that are
 169  * still dangling.  We call this routine before putting an rnode on
 170  * the free list.
 171  */
 172 static int
 173 nfs4_dross_pages(vnode_t *vp)
 174 {
 175         page_t *pp;
 176
 177         vmobject_lock(&vp->v_object);
 178         for (pp = vmobject_get_head(&vp->v_object);
 179              pp != NULL;
 180              pp = vmobject_get_next(&vp->v_object, pp)) {
 181                 if (PP_ISPVN_TAG(pp) &&
 182                     pp->p_fsdata != C_NOCOMMIT) {
 183                         vmobject_unlock(&vp->v_object);
 184                         return (1);
 185                 }
 186         }
 187         vmobject_unlock(&vp->v_object);
 188
 189         return (0);
 190 }
 191
 192 /*
 193  * Flush any pages left on this rnode.
 194  */
 195 static void
 196 r4flushpages(rnode4_t *rp, cred_t *cr)
 197 {
 198         vnode_t *vp;
 199         int error;
 200
 201         /*
 202          * Before freeing anything, wait until all asynchronous
 203          * activity is done on this rnode.  This will allow all
 204          * asynchronous read ahead and write behind i/o's to
 205          * finish.
 206          */
 207         mutex_enter(&rp->r_statelock);
 208         while (rp->r_count > 0)
 209                 cv_wait(&rp->r_cv, &rp->r_statelock);
 210         mutex_exit(&rp->r_statelock);
 211
 212         /*
 213          * Flush and invalidate all pages associated with the vnode.
 214          */
 215         vp = RTOV4(rp);
 216         if (nfs4_has_pages(vp)) {
 217                 ASSERT(vp->v_type != VCHR);
 218                 if ((rp->r_flags & R4DIRTY) && !rp->r_error) {
 219                         error = fop_putpage(vp, 0, 0, 0, cr, NULL);
 220                         if (error && (error == ENOSPC || error == EDQUOT)) {
 221                                 mutex_enter(&rp->r_statelock);
 222                                 if (!rp->r_error)
 223                                         rp->r_error = error;
 224                                 mutex_exit(&rp->r_statelock);
 225                         }
 226                 }
 227                 nfs4_invalidate_pages(vp, 0, cr);
 228         }
 229 }
 230
 231 /*
 232  * Free the resources associated with an rnode.
 233  */
 234 static void
 235 r4inactive(rnode4_t *rp, cred_t *cr)
 236 {
 237         vnode_t *vp;
 238         char *contents;
 239         int size;
 240         vsecattr_t *vsp;
 241         vnode_t *xattr;
 242
 243         r4flushpages(rp, cr);
 244
 245         vp = RTOV4(rp);
 246
 247         /*
 248          * Free any held caches which may be
 249          * associated with this rnode.
 250          */
 251         mutex_enter(&rp->r_statelock);
 252         contents = rp->r_symlink.contents;
 253         size = rp->r_symlink.size;
 254         rp->r_symlink.contents = NULL;
 255         vsp = rp->r_secattr;
 256         rp->r_secattr = NULL;
 257         xattr = rp->r_xattr_dir;
 258         rp->r_xattr_dir = NULL;
 259         mutex_exit(&rp->r_statelock);
 260
 261         /*
 262          * Free the access cache entries.
 263          */
 264         (void) nfs4_access_purge_rp(rp);
 265
 266         /*
 267          * Free the readdir cache entries.
 268          */
 269         nfs4_purge_rddir_cache(vp);
 270
 271         /*
 272          * Free the symbolic link cache.
 273          */
 274         if (contents != NULL) {
 275
 276                 kmem_free((void *)contents, size);
 277         }
 278
 279         /*
 280          * Free any cached ACL.
 281          */
 282         if (vsp != NULL)
 283                 nfs4_acl_free_cache(vsp);
 284
 285         /*
 286          * Release the cached xattr_dir
 287          */
 288         if (xattr != NULL)
 289                 VN_RELE(xattr);
 290 }
 291
 292 /*
 293  * We have seen a case that the fh passed in is for "." which
 294  * should be a VROOT node, however, the fh is different from the
 295  * root fh stored in the mntinfo4_t. The invalid fh might be
 296  * from a misbehaved server and will panic the client system at
 297  * a later time. To avoid the panic, we drop the bad fh, use
 298  * the root fh from mntinfo4_t, and print an error message
 299  * for attention.
 300  */
 301 nfs4_sharedfh_t *
 302 badrootfh_check(nfs4_sharedfh_t *fh, nfs4_fname_t *nm, mntinfo4_t *mi,
 303     int *wasbad)
 304 {
 305         char *s;
 306
 307         *wasbad = 0;
 308         s = fn_name(nm);
 309         ASSERT(strcmp(s, "..") != 0);
 310
 311         if ((s[0] == '.' && s[1] == '\0') && fh &&
 312             !SFH4_SAME(mi->mi_rootfh, fh)) {
 313 #ifdef DEBUG
 314                 nfs4_fhandle_t fhandle;
 315
 316                 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
 317                     "Server %s returns a different "
 318                     "root filehandle for the path %s:",
 319                     mi->mi_curr_serv->sv_hostname,
 320                     mi->mi_curr_serv->sv_path);
 321
 322                 /* print the bad fh */
 323                 fhandle.fh_len = fh->sfh_fh.nfs_fh4_len;
 324                 bcopy(fh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
 325                     fhandle.fh_len);
 326                 nfs4_printfhandle(&fhandle);
 327
 328                 /* print mi_rootfh */
 329                 fhandle.fh_len = mi->mi_rootfh->sfh_fh.nfs_fh4_len;
 330                 bcopy(mi->mi_rootfh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
 331                     fhandle.fh_len);
 332                 nfs4_printfhandle(&fhandle);
 333 #endif
 334                 /* use mi_rootfh instead; fh will be rele by the caller */
 335                 fh = mi->mi_rootfh;
 336                 *wasbad = 1;
 337         }
 338
 339         kmem_free(s, MAXNAMELEN);
 340         return (fh);
 341 }
 342
 343 void
 344 r4_do_attrcache(vnode_t *vp, nfs4_ga_res_t *garp, int newnode,
 345     hrtime_t t, cred_t *cr, int index)
 346 {
 347         int is_stub;
 348         vattr_t *attr;
 349         /*
 350          * Don't add to attrcache if time overflow, but
 351          * no need to check because either attr is null or the time
 352          * values in it were processed by nfs4_time_ntov(), which checks
 353          * for time overflows.
 354          */
 355         attr = garp ? &garp->n4g_va : NULL;
 356
 357         if (attr) {
 358                 if (!newnode) {
 359                         rw_exit(&rtable4[index].r_lock);
 360 #ifdef DEBUG
 361                         if (vp->v_type != attr->va_type &&
 362                             vp->v_type != VNON && attr->va_type != VNON) {
 363                                 zcmn_err(VTOMI4(vp)->mi_zone->zone_id, CE_WARN,
 364                                     "makenfs4node: type (%d) doesn't "
 365                                     "match type of found node at %p (%d)",
 366                                     attr->va_type, (void *)vp, vp->v_type);
 367                         }
 368 #endif
 369                         nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
 370                 } else {
 371                         rnode4_t *rp = VTOR4(vp);
 372
 373                         vp->v_type = attr->va_type;
 374                         vp->v_rdev = attr->va_rdev;
 375
 376                         /*
 377                          * Turn this object into a "stub" object if we
 378                          * crossed an underlying server fs boundary.
 379                          * To make this check, during mount we save the
 380                          * fsid of the server object being mounted.
 381                          * Here we compare this object's server fsid
 382                          * with the fsid we saved at mount.  If they
 383                          * are different, we crossed server fs boundary.
 384                          *
 385                          * The stub type is set (or not) at rnode
 386                          * creation time and it never changes for life
 387                          * of the rnode.
 388                          *
 389                          * This stub will be for a mirror-mount, rather than
 390                          * a referral (the latter also sets R4SRVSTUB).
 391                          *
 392                          * The stub type is also set during RO failover,
 393                          * nfs4_remap_file().
 394                          *
 395                          * We don't bother with taking r_state_lock to
 396                          * set the stub type because this is a new rnode
 397                          * and we're holding the hash bucket r_lock RW_WRITER.
 398                          * No other thread could have obtained access
 399                          * to this rnode.
 400                          */
 401                         is_stub = 0;
 402                         if (garp->n4g_fsid_valid) {
 403                                 fattr4_fsid ga_fsid = garp->n4g_fsid;
 404                                 servinfo4_t *svp = rp->r_server;
 405
 406                                 rp->r_srv_fsid = ga_fsid;
 407
 408                                 (void) nfs_rw_enter_sig(&svp->sv_lock,
 409                                     RW_READER, 0);
 410                                 if (!FATTR4_FSID_EQ(&ga_fsid, &svp->sv_fsid))
 411                                         is_stub = 1;
 412                                 nfs_rw_exit(&svp->sv_lock);
 413                         }
 414
 415                         if (is_stub)
 416                                 r4_stub_mirrormount(rp);
 417                         else
 418                                 r4_stub_none(rp);
 419
 420                         /* Can not cache partial attr */
 421                         if (attr->va_mask == AT_ALL)
 422                                 nfs4_attrcache_noinval(vp, garp, t);
 423                         else
 424                                 PURGE_ATTRCACHE4(vp);
 425
 426                         rw_exit(&rtable4[index].r_lock);
 427                 }
 428         } else {
 429                 if (newnode) {
 430                         PURGE_ATTRCACHE4(vp);
 431                 }
 432                 rw_exit(&rtable4[index].r_lock);
 433         }
 434 }
 435
 436 /*
 437  * Find or create an rnode based primarily on filehandle.  To be
 438  * used when dvp (vnode for parent directory) is not available;
 439  * otherwise, makenfs4node() should be used.
 440  *
 441  * The nfs4_fname_t argument *npp is consumed and nulled out.
 442  */
 443
 444 vnode_t *
 445 makenfs4node_by_fh(nfs4_sharedfh_t *sfh, nfs4_sharedfh_t *psfh,
 446     nfs4_fname_t **npp, nfs4_ga_res_t *garp,
 447     mntinfo4_t *mi, cred_t *cr, hrtime_t t)
 448 {
 449         vfs_t *vfsp = mi->mi_vfsp;
 450         int newnode = 0;
 451         vnode_t *vp;
 452         rnode4_t *rp;
 453         svnode_t *svp;
 454         nfs4_fname_t *name, *svpname;
 455         int index;
 456
 457         ASSERT(npp && *npp);
 458         name = *npp;
 459         *npp = NULL;
 460
 461         index = rtable4hash(sfh);
 462         rw_enter(&rtable4[index].r_lock, RW_READER);
 463
 464         vp = make_rnode4(sfh, &rtable4[index], vfsp,
 465             &nfs4_vnodeops, nfs4_putapage, &newnode, cr);
 466
 467         svp = VTOSV(vp);
 468         rp = VTOR4(vp);
 469         if (newnode) {
 470                 svp->sv_forw = svp->sv_back = svp;
 471                 svp->sv_name = name;
 472                 if (psfh != NULL)
 473                         sfh4_hold(psfh);
 474                 svp->sv_dfh = psfh;
 475         } else {
 476                 /*
 477                  * It is possible that due to a server
 478                  * side rename fnames have changed.
 479                  * update the fname here.
 480                  */
 481                 mutex_enter(&rp->r_svlock);
 482                 svpname = svp->sv_name;
 483                 if (svp->sv_name != name) {
 484                         svp->sv_name = name;
 485                         mutex_exit(&rp->r_svlock);
 486                         fn_rele(&svpname);
 487                 } else {
 488                         mutex_exit(&rp->r_svlock);
 489                         fn_rele(&name);
 490                 }
 491         }
 492
 493         ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
 494         r4_do_attrcache(vp, garp, newnode, t, cr, index);
 495         ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
 496
 497         return (vp);
 498 }
 499
 500 /*
 501  * Find or create a vnode for the given filehandle, filesystem, parent, and
 502  * name.  The reference to nm is consumed, so the caller must first do an
 503  * fn_hold() if it wants to continue using nm after this call.
 504  */
 505 vnode_t *
 506 makenfs4node(nfs4_sharedfh_t *fh, nfs4_ga_res_t *garp, struct vfs *vfsp,
 507     hrtime_t t, cred_t *cr, vnode_t *dvp, nfs4_fname_t *nm)
 508 {
 509         vnode_t *vp;
 510         int newnode;
 511         int index;
 512         mntinfo4_t *mi = VFTOMI4(vfsp);
 513         int had_badfh = 0;
 514         rnode4_t *rp;
 515
 516         ASSERT(dvp != NULL);
 517
 518         fh = badrootfh_check(fh, nm, mi, &had_badfh);
 519
 520         index = rtable4hash(fh);
 521         rw_enter(&rtable4[index].r_lock, RW_READER);
 522
 523         /*
 524          * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
 525          */
 526         vp = make_rnode4(fh, &rtable4[index], vfsp, &nfs4_vnodeops,
 527             nfs4_putapage, &newnode, cr);
 528
 529         rp = VTOR4(vp);
 530         sv_activate(&vp, dvp, &nm, newnode);
 531         if (dvp->v_flag & V_XATTRDIR) {
 532                 mutex_enter(&rp->r_statelock);
 533                 rp->r_flags |= R4ISXATTR;
 534                 mutex_exit(&rp->r_statelock);
 535         }
 536
 537         /* if getting a bad file handle, do not cache the attributes. */
 538         if (had_badfh) {
 539                 rw_exit(&rtable4[index].r_lock);
 540                 return (vp);
 541         }
 542
 543         ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
 544         r4_do_attrcache(vp, garp, newnode, t, cr, index);
 545         ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
 546
 547         return (vp);
 548 }
 549
 550 /*
 551  * Hash on address of filehandle object.
 552  * XXX totally untuned.
 553  */
 554
 555 int
 556 rtable4hash(nfs4_sharedfh_t *fh)
 557 {
 558         return (((uintptr_t)fh / sizeof (*fh)) & rtable4mask);
 559 }
 560
 561 /*
 562  * Find or create the vnode for the given filehandle and filesystem.
 563  * *newnode is set to zero if the vnode already existed; non-zero if it had
 564  * to be created.
 565  *
 566  * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
 567  */
 568
 569 static vnode_t *
 570 make_rnode4(nfs4_sharedfh_t *fh, r4hashq_t *rhtp, struct vfs *vfsp,
 571     const struct vnodeops *vops,
 572     int (*putapage)(vnode_t *, page_t *, uoff_t *, size_t *, int, cred_t *),
 573     int *newnode, cred_t *cr)
 574 {
 575         rnode4_t *rp;
 576         rnode4_t *trp;
 577         vnode_t *vp;
 578         mntinfo4_t *mi;
 579
 580         ASSERT(RW_READ_HELD(&rhtp->r_lock));
 581
 582         mi = VFTOMI4(vfsp);
 583
 584 start:
 585         if ((rp = r4find(rhtp, fh, vfsp)) != NULL) {
 586                 vp = RTOV4(rp);
 587                 *newnode = 0;
 588                 return (vp);
 589         }
 590         rw_exit(&rhtp->r_lock);
 591
 592         mutex_enter(&rp4freelist_lock);
 593
 594         if (rp4freelist != NULL && rnode4_new >= nrnode) {
 595                 rp = rp4freelist;
 596                 rp4_rmfree(rp);
 597                 mutex_exit(&rp4freelist_lock);
 598
 599                 vp = RTOV4(rp);
 600
 601                 if (rp->r_flags & R4HASHED) {
 602                         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
 603                         mutex_enter(&vp->v_lock);
 604                         if (vp->v_count > 1) {
 605                                 VN_RELE_LOCKED(vp);
 606                                 mutex_exit(&vp->v_lock);
 607                                 rw_exit(&rp->r_hashq->r_lock);
 608                                 rw_enter(&rhtp->r_lock, RW_READER);
 609                                 goto start;
 610                         }
 611                         mutex_exit(&vp->v_lock);
 612                         rp4_rmhash_locked(rp);
 613                         rw_exit(&rp->r_hashq->r_lock);
 614                 }
 615
 616                 r4inactive(rp, cr);
 617
 618                 mutex_enter(&vp->v_lock);
 619                 if (vp->v_count > 1) {
 620                         VN_RELE_LOCKED(vp);
 621                         mutex_exit(&vp->v_lock);
 622                         rw_enter(&rhtp->r_lock, RW_READER);
 623                         goto start;
 624                 }
 625                 mutex_exit(&vp->v_lock);
 626                 vn_invalid(vp);
 627
 628                 /*
 629                  * destroy old locks before bzero'ing and
 630                  * recreating the locks below.
 631                  */
 632                 uninit_rnode4(rp);
 633
 634                 /*
 635                  * Make sure that if rnode is recycled then
 636                  * VFS count is decremented properly before
 637                  * reuse.
 638                  */
 639                 VFS_RELE(vp->v_vfsp);
 640                 vn_reinit(vp);
 641         } else {
 642                 vnode_t *new_vp;
 643
 644                 mutex_exit(&rp4freelist_lock);
 645
 646                 rp = kmem_cache_alloc(rnode4_cache, KM_SLEEP);
 647                 new_vp = vn_alloc(KM_SLEEP);
 648
 649                 atomic_inc_ulong((ulong_t *)&rnode4_new);
 650 #ifdef DEBUG
 651                 clstat4_debug.nrnode.value.ui64++;
 652 #endif
 653                 vp = new_vp;
 654         }
 655
 656         bzero(rp, sizeof (*rp));
 657         rp->r_vnode = vp;
 658         nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
 659         nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
 660         mutex_init(&rp->r_svlock, NULL, MUTEX_DEFAULT, NULL);
 661         mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
 662         mutex_init(&rp->r_statev4_lock, NULL, MUTEX_DEFAULT, NULL);
 663         mutex_init(&rp->r_os_lock, NULL, MUTEX_DEFAULT, NULL);
 664         rp->created_v4 = 0;
 665         list_create(&rp->r_open_streams, sizeof (nfs4_open_stream_t),
 666             offsetof(nfs4_open_stream_t, os_node));
 667         rp->r_lo_head.lo_prev_rnode = &rp->r_lo_head;
 668         rp->r_lo_head.lo_next_rnode = &rp->r_lo_head;
 669         cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
 670         cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
 671         rp->r_flags = R4READDIRWATTR;
 672         rp->r_fh = fh;
 673         rp->r_hashq = rhtp;
 674         sfh4_hold(rp->r_fh);
 675         rp->r_server = mi->mi_curr_serv;
 676         rp->r_deleg_type = OPEN_DELEGATE_NONE;
 677         rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
 678         nfs_rw_init(&rp->r_deleg_recall_lock, NULL, RW_DEFAULT, NULL);
 679
 680         rddir4_cache_create(rp);
 681         rp->r_putapage = putapage;
 682         vn_setops(vp, vops);
 683         vp->v_data = (caddr_t)rp;
 684         vp->v_vfsp = vfsp;
 685         VFS_HOLD(vfsp);
 686         vp->v_type = VNON;
 687         vp->v_flag |= VMODSORT;
 688         if (isrootfh(fh, rp))
 689                 vp->v_flag = VROOT;
 690         vn_exists(vp);
 691
 692         /*
 693          * There is a race condition if someone else
 694          * alloc's the rnode while no locks are held, so we
 695          * check again and recover if found.
 696          */
 697         rw_enter(&rhtp->r_lock, RW_WRITER);
 698         if ((trp = r4find(rhtp, fh, vfsp)) != NULL) {
 699                 vp = RTOV4(trp);
 700                 *newnode = 0;
 701                 rw_exit(&rhtp->r_lock);
 702                 rp4_addfree(rp, cr);
 703                 rw_enter(&rhtp->r_lock, RW_READER);
 704                 return (vp);
 705         }
 706         rp4_addhash(rp);
 707         *newnode = 1;
 708         return (vp);
 709 }
 710
 711 static void
 712 uninit_rnode4(rnode4_t *rp)
 713 {
 714         vnode_t *vp = RTOV4(rp);
 715
 716         ASSERT(rp != NULL);
 717         ASSERT(vp != NULL);
 718         ASSERT(vp->v_count == 1);
 719         ASSERT(rp->r_count == 0);
 720         ASSERT(rp->r_mapcnt == 0);
 721         if (rp->r_flags & R4LODANGLERS) {
 722                 nfs4_flush_lock_owners(rp);
 723         }
 724         ASSERT(rp->r_lo_head.lo_next_rnode == &rp->r_lo_head);
 725         ASSERT(rp->r_lo_head.lo_prev_rnode == &rp->r_lo_head);
 726         ASSERT(!(rp->r_flags & R4HASHED));
 727         ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
 728         nfs4_clear_open_streams(rp);
 729         list_destroy(&rp->r_open_streams);
 730
 731         /*
 732          * Destroy the rddir cache first since we need to grab the r_statelock.
 733          */
 734         mutex_enter(&rp->r_statelock);
 735         rddir4_cache_destroy(rp);
 736         mutex_exit(&rp->r_statelock);
 737         sv_uninit(&rp->r_svnode);
 738         sfh4_rele(&rp->r_fh);
 739         nfs_rw_destroy(&rp->r_rwlock);
 740         nfs_rw_destroy(&rp->r_lkserlock);
 741         mutex_destroy(&rp->r_statelock);
 742         mutex_destroy(&rp->r_statev4_lock);
 743         mutex_destroy(&rp->r_os_lock);
 744         cv_destroy(&rp->r_cv);
 745         cv_destroy(&rp->r_commit.c_cv);
 746         nfs_rw_destroy(&rp->r_deleg_recall_lock);
 747         if (rp->r_flags & R4DELMAPLIST)
 748                 list_destroy(&rp->r_indelmap);
 749 }
 750
 751 /*
 752  * Put an rnode on the free list.
 753  *
 754  * Rnodes which were allocated above and beyond the normal limit
 755  * are immediately freed.
 756  */
 757 void
 758 rp4_addfree(rnode4_t *rp, cred_t *cr)
 759 {
 760         vnode_t *vp;
 761         vnode_t *xattr;
 762         struct vfs *vfsp;
 763
 764         vp = RTOV4(rp);
 765         ASSERT(vp->v_count >= 1);
 766         ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
 767
 768         /*
 769          * If we have too many rnodes allocated and there are no
 770          * references to this rnode, or if the rnode is no longer
 771          * accessible by it does not reside in the hash queues,
 772          * or if an i/o error occurred while writing to the file,
 773          * then just free it instead of putting it on the rnode
 774          * freelist.
 775          */
 776         vfsp = vp->v_vfsp;
 777         if (((rnode4_new > nrnode || !(rp->r_flags & R4HASHED) ||
 778 #ifdef DEBUG
 779             (nfs4_rnode_nofreelist != 0) ||
 780 #endif
 781             rp->r_error || (rp->r_flags & R4RECOVERR) ||
 782             (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
 783                 if (rp->r_flags & R4HASHED) {
 784                         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
 785                         mutex_enter(&vp->v_lock);
 786                         if (vp->v_count > 1) {
 787                                 VN_RELE_LOCKED(vp);
 788                                 mutex_exit(&vp->v_lock);
 789                                 rw_exit(&rp->r_hashq->r_lock);
 790                                 return;
 791                         }
 792                         mutex_exit(&vp->v_lock);
 793                         rp4_rmhash_locked(rp);
 794                         rw_exit(&rp->r_hashq->r_lock);
 795                 }
 796
 797                 /*
 798                  * Make sure we don't have a delegation on this rnode
 799                  * before destroying it.
 800                  */
 801                 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
 802                         (void) nfs4delegreturn(rp,
 803                             NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
 804                 }
 805
 806                 r4inactive(rp, cr);
 807
 808                 /*
 809                  * Recheck the vnode reference count.  We need to
 810                  * make sure that another reference has not been
 811                  * acquired while we were not holding v_lock.  The
 812                  * rnode is not in the rnode hash queues; one
 813                  * way for a reference to have been acquired
 814                  * is for a fop_putpage because the rnode was marked
 815                  * with R4DIRTY or for a modified page.  This
 816                  * reference may have been acquired before our call
 817                  * to r4inactive.  The i/o may have been completed,
 818                  * thus allowing r4inactive to complete, but the
 819                  * reference to the vnode may not have been released
 820                  * yet.  In any case, the rnode can not be destroyed
 821                  * until the other references to this vnode have been
 822                  * released.  The other references will take care of
 823                  * either destroying the rnode or placing it on the
 824                  * rnode freelist.  If there are no other references,
 825                  * then the rnode may be safely destroyed.
 826                  */
 827                 mutex_enter(&vp->v_lock);
 828                 if (vp->v_count > 1) {
 829                         VN_RELE_LOCKED(vp);
 830                         mutex_exit(&vp->v_lock);
 831                         return;
 832                 }
 833                 mutex_exit(&vp->v_lock);
 834
 835                 destroy_rnode4(rp);
 836                 return;
 837         }
 838
 839         /*
 840          * Lock the hash queue and then recheck the reference count
 841          * to ensure that no other threads have acquired a reference
 842          * to indicate that the rnode should not be placed on the
 843          * freelist.  If another reference has been acquired, then
 844          * just release this one and let the other thread complete
 845          * the processing of adding this rnode to the freelist.
 846          */
 847 again:
 848         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
 849
 850         mutex_enter(&vp->v_lock);
 851         if (vp->v_count > 1) {
 852                 VN_RELE_LOCKED(vp);
 853                 mutex_exit(&vp->v_lock);
 854                 rw_exit(&rp->r_hashq->r_lock);
 855                 return;
 856         }
 857         mutex_exit(&vp->v_lock);
 858
 859         /*
 860          * Make sure we don't put an rnode with a delegation
 861          * on the free list.
 862          */
 863         if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
 864                 rw_exit(&rp->r_hashq->r_lock);
 865                 (void) nfs4delegreturn(rp,
 866                     NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
 867                 goto again;
 868         }
 869
 870         /*
 871          * Now that we have the hash queue lock, and we know there
 872          * are not anymore references on the vnode, check to make
 873          * sure there aren't any open streams still on the rnode.
 874          * If so, drop the hash queue lock, remove the open streams,
 875          * and recheck the v_count.
 876          */
 877         mutex_enter(&rp->r_os_lock);
 878         if (list_head(&rp->r_open_streams) != NULL) {
 879                 mutex_exit(&rp->r_os_lock);
 880                 rw_exit(&rp->r_hashq->r_lock);
 881                 if (nfs_zone() != VTOMI4(vp)->mi_zone)
 882                         nfs4_clear_open_streams(rp);
 883                 else
 884                         (void) nfs4close_all(vp, cr);
 885                 goto again;
 886         }
 887         mutex_exit(&rp->r_os_lock);
 888
 889         /*
 890          * Before we put it on the freelist, make sure there are no pages.
 891          * If there are, flush and commit of all of the dirty and
 892          * uncommitted pages, assuming the file system isn't read only.
 893          */
 894         if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY) && nfs4_dross_pages(vp)) {
 895                 rw_exit(&rp->r_hashq->r_lock);
 896                 r4flushpages(rp, cr);
 897                 goto again;
 898         }
 899
 900         /*
 901          * Before we put it on the freelist, make sure there is no
 902          * active xattr directory cached, the freelist will not
 903          * have its entries r4inactive'd if there is still an active
 904          * rnode, thus nothing in the freelist can hold another
 905          * rnode active.
 906          */
 907         xattr = rp->r_xattr_dir;
 908         rp->r_xattr_dir = NULL;
 909
 910         /*
 911          * If there is no cached data or metadata for this file, then
 912          * put the rnode on the front of the freelist so that it will
 913          * be reused before other rnodes which may have cached data or
 914          * metadata associated with them.
 915          */
 916         mutex_enter(&rp4freelist_lock);
 917         if (rp4freelist == NULL) {
 918                 rp->r_freef = rp;
 919                 rp->r_freeb = rp;
 920                 rp4freelist = rp;
 921         } else {
 922                 rp->r_freef = rp4freelist;
 923                 rp->r_freeb = rp4freelist->r_freeb;
 924                 rp4freelist->r_freeb->r_freef = rp;
 925                 rp4freelist->r_freeb = rp;
 926                 if (!nfs4_has_pages(vp) && rp->r_dir == NULL &&
 927                     rp->r_symlink.contents == NULL && rp->r_secattr == NULL)
 928                         rp4freelist = rp;
 929         }
 930         mutex_exit(&rp4freelist_lock);
 931
 932         rw_exit(&rp->r_hashq->r_lock);
 933
 934         if (xattr)
 935                 VN_RELE(xattr);
 936 }
 937
 938 /*
 939  * Remove an rnode from the free list.
 940  *
 941  * The caller must be holding rp4freelist_lock and the rnode
 942  * must be on the freelist.
 943  */
 944 static void
 945 rp4_rmfree(rnode4_t *rp)
 946 {
 947
 948         ASSERT(MUTEX_HELD(&rp4freelist_lock));
 949         ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
 950
 951         if (rp == rp4freelist) {
 952                 rp4freelist = rp->r_freef;
 953                 if (rp == rp4freelist)
 954                         rp4freelist = NULL;
 955         }
 956         rp->r_freeb->r_freef = rp->r_freef;
 957         rp->r_freef->r_freeb = rp->r_freeb;
 958
 959         rp->r_freef = rp->r_freeb = NULL;
 960 }
 961
 962 /*
 963  * Put a rnode in the hash table.
 964  *
 965  * The caller must be holding the exclusive hash queue lock
 966  */
 967 void
 968 rp4_addhash(rnode4_t *rp)
 969 {
 970         ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
 971         ASSERT(!(rp->r_flags & R4HASHED));
 972
 973 #ifdef DEBUG
 974         r4_dup_check(rp, RTOV4(rp)->v_vfsp);
 975 #endif
 976
 977         rp->r_hashf = rp->r_hashq->r_hashf;
 978         rp->r_hashq->r_hashf = rp;
 979         rp->r_hashb = (rnode4_t *)rp->r_hashq;
 980         rp->r_hashf->r_hashb = rp;
 981
 982         mutex_enter(&rp->r_statelock);
 983         rp->r_flags |= R4HASHED;
 984         mutex_exit(&rp->r_statelock);
 985 }
 986
 987 /*
 988  * Remove a rnode from the hash table.
 989  *
 990  * The caller must be holding the hash queue lock.
 991  */
 992 void
 993 rp4_rmhash_locked(rnode4_t *rp)
 994 {
 995         ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
 996         ASSERT(rp->r_flags & R4HASHED);
 997
 998         rp->r_hashb->r_hashf = rp->r_hashf;
 999         rp->r_hashf->r_hashb = rp->r_hashb;
1000
1001         mutex_enter(&rp->r_statelock);
1002         rp->r_flags &= ~R4HASHED;
1003         mutex_exit(&rp->r_statelock);
1004 }
1005
1006 /*
1007  * Remove a rnode from the hash table.
1008  *
1009  * The caller must not be holding the hash queue lock.
1010  */
1011 void
1012 rp4_rmhash(rnode4_t *rp)
1013 {
1014         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1015         rp4_rmhash_locked(rp);
1016         rw_exit(&rp->r_hashq->r_lock);
1017 }
1018
1019 /*
1020  * Lookup a rnode by fhandle.  Ignores rnodes that had failed recovery.
1021  * Returns NULL if no match.  If an rnode is returned, the reference count
1022  * on the master vnode is incremented.
1023  *
1024  * The caller must be holding the hash queue lock, either shared or exclusive.
1025  */
1026 rnode4_t *
1027 r4find(r4hashq_t *rhtp, nfs4_sharedfh_t *fh, struct vfs *vfsp)
1028 {
1029         rnode4_t *rp;
1030         vnode_t *vp;
1031
1032         ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
1033
1034         for (rp = rhtp->r_hashf; rp != (rnode4_t *)rhtp; rp = rp->r_hashf) {
1035                 vp = RTOV4(rp);
1036                 if (vp->v_vfsp == vfsp && SFH4_SAME(rp->r_fh, fh)) {
1037
1038                         mutex_enter(&rp->r_statelock);
1039                         if (rp->r_flags & R4RECOVERR) {
1040                                 mutex_exit(&rp->r_statelock);
1041                                 continue;
1042                         }
1043                         mutex_exit(&rp->r_statelock);
1044 #ifdef DEBUG
1045                         r4_dup_check(rp, vfsp);
1046 #endif
1047                         if (rp->r_freef != NULL) {
1048                                 mutex_enter(&rp4freelist_lock);
1049                                 /*
1050                                  * If the rnode is on the freelist,
1051                                  * then remove it and use that reference
1052                                  * as the new reference.  Otherwise,
1053                                  * need to increment the reference count.
1054                                  */
1055                                 if (rp->r_freef != NULL) {
1056                                         rp4_rmfree(rp);
1057                                         mutex_exit(&rp4freelist_lock);
1058                                 } else {
1059                                         mutex_exit(&rp4freelist_lock);
1060                                         VN_HOLD(vp);
1061                                 }
1062                         } else
1063                                 VN_HOLD(vp);
1064
1065                         /*
1066                          * if root vnode, set v_flag to indicate that
1067                          */
1068                         if (isrootfh(fh, rp)) {
1069                                 if (!(vp->v_flag & VROOT)) {
1070                                         mutex_enter(&vp->v_lock);
1071                                         vp->v_flag |= VROOT;
1072                                         mutex_exit(&vp->v_lock);
1073                                 }
1074                         }
1075                         return (rp);
1076                 }
1077         }
1078         return (NULL);
1079 }
1080
1081 /*
1082  * Lookup an rnode by fhandle. Just a wrapper for r4find()
1083  * that assumes the caller hasn't already got the lock
1084  * on the hash bucket.
1085  */
1086 rnode4_t *
1087 r4find_unlocked(nfs4_sharedfh_t *fh, struct vfs *vfsp)
1088 {
1089         rnode4_t *rp;
1090         int index;
1091
1092         index = rtable4hash(fh);
1093         rw_enter(&rtable4[index].r_lock, RW_READER);
1094         rp = r4find(&rtable4[index], fh, vfsp);
1095         rw_exit(&rtable4[index].r_lock);
1096
1097         return (rp);
1098 }
1099
1100 /*
1101  * Return >0 if there is a active vnode belonging to this vfs in the
1102  * rtable4 cache.
1103  *
1104  * Several of these checks are done without holding the usual
1105  * locks.  This is safe because destroy_rtable(), rp_addfree(),
1106  * etc. will redo the necessary checks before actually destroying
1107  * any rnodes.
1108  */
1109 int
1110 check_rtable4(struct vfs *vfsp)
1111 {
1112         rnode4_t *rp;
1113         vnode_t *vp;
1114         int busy = NFSV4_RTABLE4_OK;
1115         int index;
1116
1117         for (index = 0; index < rtable4size; index++) {
1118                 rw_enter(&rtable4[index].r_lock, RW_READER);
1119
1120                 for (rp = rtable4[index].r_hashf;
1121                     rp != (rnode4_t *)(&rtable4[index]);
1122                     rp = rp->r_hashf) {
1123
1124                         vp = RTOV4(rp);
1125                         if (vp->v_vfsp == vfsp) {
1126                                 if (rp->r_freef == NULL) {
1127                                         busy = NFSV4_RTABLE4_NOT_FREE_LIST;
1128                                 } else if (nfs4_has_pages(vp) &&
1129                                     (rp->r_flags & R4DIRTY)) {
1130                                         busy = NFSV4_RTABLE4_DIRTY_PAGES;
1131                                 } else if (rp->r_count > 0) {
1132                                         busy = NFSV4_RTABLE4_POS_R_COUNT;
1133                                 }
1134
1135                                 if (busy != NFSV4_RTABLE4_OK) {
1136 #ifdef DEBUG
1137                                         char *path;
1138
1139                                         path = fn_path(rp->r_svnode.sv_name);
1140                                         DTRACE_NFSV4_3(rnode__e__debug,
1141                                             int, busy, char *, path,
1142                                             rnode4_t *, rp);
1143                                         kmem_free(path, strlen(path)+1);
1144 #endif
1145                                         rw_exit(&rtable4[index].r_lock);
1146                                         return (busy);
1147                                 }
1148                         }
1149                 }
1150                 rw_exit(&rtable4[index].r_lock);
1151         }
1152         return (busy);
1153 }
1154
1155 /*
1156  * Destroy inactive vnodes from the hash queues which
1157  * belong to this vfs. All of the vnodes should be inactive.
1158  * It is essential that we destroy all rnodes in case of
1159  * forced unmount as well as in normal unmount case.
1160  */
1161
1162 void
1163 destroy_rtable4(struct vfs *vfsp, cred_t *cr)
1164 {
1165         int index;
1166         vnode_t *vp;
1167         rnode4_t *rp, *r_hashf, *rlist;
1168
1169         rlist = NULL;
1170
1171         for (index = 0; index < rtable4size; index++) {
1172                 rw_enter(&rtable4[index].r_lock, RW_WRITER);
1173                 for (rp = rtable4[index].r_hashf;
1174                     rp != (rnode4_t *)(&rtable4[index]);
1175                     rp = r_hashf) {
1176                         /* save the hash pointer before destroying */
1177                         r_hashf = rp->r_hashf;
1178
1179                         vp = RTOV4(rp);
1180                         if (vp->v_vfsp == vfsp) {
1181                                 mutex_enter(&rp4freelist_lock);
1182                                 if (rp->r_freef != NULL) {
1183                                         rp4_rmfree(rp);
1184                                         mutex_exit(&rp4freelist_lock);
1185                                         rp4_rmhash_locked(rp);
1186                                         rp->r_hashf = rlist;
1187                                         rlist = rp;
1188                                 } else
1189                                         mutex_exit(&rp4freelist_lock);
1190                         }
1191                 }
1192                 rw_exit(&rtable4[index].r_lock);
1193         }
1194
1195         for (rp = rlist; rp != NULL; rp = r_hashf) {
1196                 r_hashf = rp->r_hashf;
1197                 /*
1198                  * This call to rp4_addfree will end up destroying the
1199                  * rnode, but in a safe way with the appropriate set
1200                  * of checks done.
1201                  */
1202                 rp4_addfree(rp, cr);
1203         }
1204 }
1205
1206 /*
1207  * This routine destroys all the resources of an rnode
1208  * and finally the rnode itself.
1209  */
1210 static void
1211 destroy_rnode4(rnode4_t *rp)
1212 {
1213         vnode_t *vp;
1214         vfs_t *vfsp;
1215
1216         ASSERT(rp->r_deleg_type == OPEN_DELEGATE_NONE);
1217
1218         vp = RTOV4(rp);
1219         vfsp = vp->v_vfsp;
1220
1221         uninit_rnode4(rp);
1222         atomic_dec_ulong((ulong_t *)&rnode4_new);
1223 #ifdef DEBUG
1224         clstat4_debug.nrnode.value.ui64--;
1225 #endif
1226         kmem_cache_free(rnode4_cache, rp);
1227         vn_invalid(vp);
1228         vn_free(vp);
1229         VFS_RELE(vfsp);
1230 }
1231
1232 /*
1233  * Invalidate the attributes on all rnodes forcing the next getattr
1234  * to go over the wire.  Used to flush stale uid and gid mappings.
1235  * Maybe done on a per vfsp, or all rnodes (vfsp == NULL)
1236  */
1237 void
1238 nfs4_rnode_invalidate(struct vfs *vfsp)
1239 {
1240         int index;
1241         rnode4_t *rp;
1242         vnode_t *vp;
1243
1244         /*
1245          * Walk the hash queues looking for rnodes.
1246          */
1247         for (index = 0; index < rtable4size; index++) {
1248                 rw_enter(&rtable4[index].r_lock, RW_READER);
1249                 for (rp = rtable4[index].r_hashf;
1250                     rp != (rnode4_t *)(&rtable4[index]);
1251                     rp = rp->r_hashf) {
1252                         vp = RTOV4(rp);
1253                         if (vfsp != NULL && vp->v_vfsp != vfsp)
1254                                 continue;
1255
1256                         if (!mutex_tryenter(&rp->r_statelock))
1257                                 continue;
1258
1259                         /*
1260                          * Expire the attributes by resetting the change
1261                          * and attr timeout.
1262                          */
1263                         rp->r_change = 0;
1264                         PURGE_ATTRCACHE4_LOCKED(rp);
1265                         mutex_exit(&rp->r_statelock);
1266                 }
1267                 rw_exit(&rtable4[index].r_lock);
1268         }
1269 }
1270
1271 /*
1272  * Flush all vnodes in this (or every) vfs.
1273  * Used by nfs_sync and by nfs_unmount.
1274  */
1275 void
1276 r4flush(struct vfs *vfsp, cred_t *cr)
1277 {
1278         int index;
1279         rnode4_t *rp;
1280         vnode_t *vp, **vplist;
1281         long num, cnt;
1282
1283         /*
1284          * Check to see whether there is anything to do.
1285          */
1286         num = rnode4_new;
1287         if (num == 0)
1288                 return;
1289
1290         /*
1291          * Allocate a slot for all currently active rnodes on the
1292          * supposition that they all may need flushing.
1293          */
1294         vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1295         cnt = 0;
1296
1297         /*
1298          * Walk the hash queues looking for rnodes with page
1299          * lists associated with them.  Make a list of these
1300          * files.
1301          */
1302         for (index = 0; index < rtable4size; index++) {
1303                 rw_enter(&rtable4[index].r_lock, RW_READER);
1304                 for (rp = rtable4[index].r_hashf;
1305                     rp != (rnode4_t *)(&rtable4[index]);
1306                     rp = rp->r_hashf) {
1307                         vp = RTOV4(rp);
1308                         /*
1309                          * Don't bother sync'ing a vp if it
1310                          * is part of virtual swap device or
1311                          * if VFS is read-only
1312                          */
1313                         if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1314                                 continue;
1315                         /*
1316                          * If flushing all mounted file systems or
1317                          * the vnode belongs to this vfs, has pages
1318                          * and is marked as either dirty or mmap'd,
1319                          * hold and add this vnode to the list of
1320                          * vnodes to flush.
1321                          */
1322                         if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
1323                             nfs4_has_pages(vp) &&
1324                             ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
1325                                 VN_HOLD(vp);
1326                                 vplist[cnt++] = vp;
1327                                 if (cnt == num) {
1328                                         rw_exit(&rtable4[index].r_lock);
1329                                         goto toomany;
1330                                 }
1331                         }
1332                 }
1333                 rw_exit(&rtable4[index].r_lock);
1334         }
1335 toomany:
1336
1337         /*
1338          * Flush and release all of the files on the list.
1339          */
1340         while (cnt-- > 0) {
1341                 vp = vplist[cnt];
1342                 (void) fop_putpage(vp, 0, 0, B_ASYNC, cr, NULL);
1343                 VN_RELE(vp);
1344         }
1345
1346         /*
1347          * Free the space allocated to hold the list.
1348          */
1349         kmem_free(vplist, num * sizeof (*vplist));
1350 }
1351
1352 int
1353 nfs4_free_data_reclaim(rnode4_t *rp)
1354 {
1355         char *contents;
1356         vnode_t *xattr;
1357         int size;
1358         vsecattr_t *vsp;
1359         int freed;
1360         bool_t rdc = FALSE;
1361
1362         /*
1363          * Free any held caches which may
1364          * be associated with this rnode.
1365          */
1366         mutex_enter(&rp->r_statelock);
1367         if (rp->r_dir != NULL)
1368                 rdc = TRUE;
1369         contents = rp->r_symlink.contents;
1370         size = rp->r_symlink.size;
1371         rp->r_symlink.contents = NULL;
1372         vsp = rp->r_secattr;
1373         rp->r_secattr = NULL;
1374         xattr = rp->r_xattr_dir;
1375         rp->r_xattr_dir = NULL;
1376         mutex_exit(&rp->r_statelock);
1377
1378         /*
1379          * Free the access cache entries.
1380          */
1381         freed = nfs4_access_purge_rp(rp);
1382
1383         if (rdc == FALSE && contents == NULL && vsp == NULL && xattr == NULL)
1384                 return (freed);
1385
1386         /*
1387          * Free the readdir cache entries, incompletely if we can't block.
1388          */
1389         nfs4_purge_rddir_cache(RTOV4(rp));
1390
1391         /*
1392          * Free the symbolic link cache.
1393          */
1394         if (contents != NULL) {
1395
1396                 kmem_free((void *)contents, size);
1397         }
1398
1399         /*
1400          * Free any cached ACL.
1401          */
1402         if (vsp != NULL)
1403                 nfs4_acl_free_cache(vsp);
1404
1405         /*
1406          * Release the xattr directory vnode
1407          */
1408         if (xattr != NULL)
1409                 VN_RELE(xattr);
1410
1411         return (1);
1412 }
1413
1414 static int
1415 nfs4_active_data_reclaim(rnode4_t *rp)
1416 {
1417         char *contents;
1418         vnode_t *xattr = NULL;
1419         int size;
1420         vsecattr_t *vsp;
1421         int freed;
1422         bool_t rdc = FALSE;
1423
1424         /*
1425          * Free any held credentials and caches which
1426          * may be associated with this rnode.
1427          */
1428         if (!mutex_tryenter(&rp->r_statelock))
1429                 return (0);
1430         contents = rp->r_symlink.contents;
1431         size = rp->r_symlink.size;
1432         rp->r_symlink.contents = NULL;
1433         vsp = rp->r_secattr;
1434         rp->r_secattr = NULL;
1435         if (rp->r_dir != NULL)
1436                 rdc = TRUE;
1437         /*
1438          * To avoid a deadlock, do not free r_xattr_dir cache if it is hashed
1439          * on the same r_hashq queue. We are not mandated to free all caches.
1440          * VN_RELE(rp->r_xattr_dir) will be done sometime later - e.g. when the
1441          * rnode 'rp' is freed or put on the free list.
1442          *
1443          * We will retain NFS4_XATTR_DIR_NOTSUPP because:
1444          * - it has no associated rnode4_t (its v_data is NULL),
1445          * - it is preallocated statically and will never go away,
1446          * so we cannot save anything by releasing it.
1447          */
1448         if (rp->r_xattr_dir && rp->r_xattr_dir != NFS4_XATTR_DIR_NOTSUPP &&
1449             VTOR4(rp->r_xattr_dir)->r_hashq != rp->r_hashq) {
1450                 xattr = rp->r_xattr_dir;
1451                 rp->r_xattr_dir = NULL;
1452         }
1453         mutex_exit(&rp->r_statelock);
1454
1455         /*
1456          * Free the access cache entries.
1457          */
1458         freed = nfs4_access_purge_rp(rp);
1459
1460         if (contents == NULL && vsp == NULL && rdc == FALSE && xattr == NULL)
1461                 return (freed);
1462
1463         /*
1464          * Free the symbolic link cache.
1465          */
1466         if (contents != NULL) {
1467
1468                 kmem_free((void *)contents, size);
1469         }
1470
1471         /*
1472          * Free any cached ACL.
1473          */
1474         if (vsp != NULL)
1475                 nfs4_acl_free_cache(vsp);
1476
1477         nfs4_purge_rddir_cache(RTOV4(rp));
1478
1479         /*
1480          * Release the xattr directory vnode
1481          */
1482         if (xattr != NULL)
1483                 VN_RELE(xattr);
1484
1485         return (1);
1486 }
1487
1488 static int
1489 nfs4_free_reclaim(void)
1490 {
1491         int freed;
1492         rnode4_t *rp;
1493
1494 #ifdef DEBUG
1495         clstat4_debug.f_reclaim.value.ui64++;
1496 #endif
1497         freed = 0;
1498         mutex_enter(&rp4freelist_lock);
1499         rp = rp4freelist;
1500         if (rp != NULL) {
1501                 do {
1502                         if (nfs4_free_data_reclaim(rp))
1503                                 freed = 1;
1504                 } while ((rp = rp->r_freef) != rp4freelist);
1505         }
1506         mutex_exit(&rp4freelist_lock);
1507         return (freed);
1508 }
1509
1510 static int
1511 nfs4_active_reclaim(void)
1512 {
1513         int freed;
1514         int index;
1515         rnode4_t *rp;
1516
1517 #ifdef DEBUG
1518         clstat4_debug.a_reclaim.value.ui64++;
1519 #endif
1520         freed = 0;
1521         for (index = 0; index < rtable4size; index++) {
1522                 rw_enter(&rtable4[index].r_lock, RW_READER);
1523                 for (rp = rtable4[index].r_hashf;
1524                     rp != (rnode4_t *)(&rtable4[index]);
1525                     rp = rp->r_hashf) {
1526                         if (nfs4_active_data_reclaim(rp))
1527                                 freed = 1;
1528                 }
1529                 rw_exit(&rtable4[index].r_lock);
1530         }
1531         return (freed);
1532 }
1533
1534 static int
1535 nfs4_rnode_reclaim(void)
1536 {
1537         int freed;
1538         rnode4_t *rp;
1539         vnode_t *vp;
1540
1541 #ifdef DEBUG
1542         clstat4_debug.r_reclaim.value.ui64++;
1543 #endif
1544         freed = 0;
1545         mutex_enter(&rp4freelist_lock);
1546         while ((rp = rp4freelist) != NULL) {
1547                 rp4_rmfree(rp);
1548                 mutex_exit(&rp4freelist_lock);
1549                 if (rp->r_flags & R4HASHED) {
1550                         vp = RTOV4(rp);
1551                         rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1552                         mutex_enter(&vp->v_lock);
1553                         if (vp->v_count > 1) {
1554                                 VN_RELE_LOCKED(vp);
1555                                 mutex_exit(&vp->v_lock);
1556                                 rw_exit(&rp->r_hashq->r_lock);
1557                                 mutex_enter(&rp4freelist_lock);
1558                                 continue;
1559                         }
1560                         mutex_exit(&vp->v_lock);
1561                         rp4_rmhash_locked(rp);
1562                         rw_exit(&rp->r_hashq->r_lock);
1563                 }
1564                 /*
1565                  * This call to rp_addfree will end up destroying the
1566                  * rnode, but in a safe way with the appropriate set
1567                  * of checks done.
1568                  */
1569                 rp4_addfree(rp, CRED());
1570                 mutex_enter(&rp4freelist_lock);
1571         }
1572         mutex_exit(&rp4freelist_lock);
1573         return (freed);
1574 }
1575
1576 /*ARGSUSED*/
1577 static void
1578 nfs4_reclaim(void *cdrarg)
1579 {
1580 #ifdef DEBUG
1581         clstat4_debug.reclaim.value.ui64++;
1582 #endif
1583         if (nfs4_free_reclaim())
1584                 return;
1585
1586         if (nfs4_active_reclaim())
1587                 return;
1588
1589         (void) nfs4_rnode_reclaim();
1590 }
1591
1592 /*
1593  * Returns the clientid4 to use for the given mntinfo4.  Note that the
1594  * clientid can change if the caller drops mi_recovlock.
1595  */
1596
1597 clientid4
1598 mi2clientid(mntinfo4_t *mi)
1599 {
1600         nfs4_server_t   *sp;
1601         clientid4       clientid = 0;
1602
1603         /* this locks down sp if it is found */
1604         sp = find_nfs4_server(mi);
1605         if (sp != NULL) {
1606                 clientid = sp->clientid;
1607                 mutex_exit(&sp->s_lock);
1608                 nfs4_server_rele(sp);
1609         }
1610         return (clientid);
1611 }
1612
1613 /*
1614  * Return the current lease time for the server associated with the given
1615  * file.  Note that the lease time could change immediately after this
1616  * call.
1617  */
1618
1619 time_t
1620 r2lease_time(rnode4_t *rp)
1621 {
1622         nfs4_server_t   *sp;
1623         time_t          lease_time;
1624         mntinfo4_t      *mi = VTOMI4(RTOV4(rp));
1625
1626         (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
1627
1628         /* this locks down sp if it is found */
1629         sp = find_nfs4_server(VTOMI4(RTOV4(rp)));
1630
1631         if (VTOMI4(RTOV4(rp))->mi_vfsp->vfs_flag & VFS_UNMOUNTED) {
1632                 if (sp != NULL) {
1633                         mutex_exit(&sp->s_lock);
1634                         nfs4_server_rele(sp);
1635                 }
1636                 nfs_rw_exit(&mi->mi_recovlock);
1637                 return (1);             /* 1 second */
1638         }
1639
1640         ASSERT(sp != NULL);
1641
1642         lease_time = sp->s_lease_time;
1643
1644         mutex_exit(&sp->s_lock);
1645         nfs4_server_rele(sp);
1646         nfs_rw_exit(&mi->mi_recovlock);
1647
1648         return (lease_time);
1649 }
1650
1651 /*
1652  * Return a list with information about all the known open instances for
1653  * a filesystem. The caller must call r4releopenlist() when done with the
1654  * list.
1655  *
1656  * We are safe at looking at os_valid and os_pending_close across dropping
1657  * the 'os_sync_lock' to count up the number of open streams and then
1658  * allocate memory for the osp list due to:
1659  *      -Looking at os_pending_close is safe since this routine is
1660  *      only called via recovery, and os_pending_close can only be set via
1661  *      a non-recovery operation (which are all blocked when recovery
1662  *      is active).
1663  *
1664  *      -Examining os_valid is safe since non-recovery operations, which
1665  *      could potentially switch os_valid to 0, are blocked (via
1666  *      nfs4_start_fop) and recovery is single-threaded per mntinfo4_t
1667  *      (which means we are the only recovery thread potentially acting
1668  *      on this open stream).
1669  */
1670
1671 nfs4_opinst_t *
1672 r4mkopenlist(mntinfo4_t *mi)
1673 {
1674         nfs4_opinst_t *reopenlist, *rep;
1675         rnode4_t *rp;
1676         vnode_t *vp;
1677         vfs_t *vfsp = mi->mi_vfsp;
1678         int numosp;
1679         nfs4_open_stream_t *osp;
1680         int index;
1681         open_delegation_type4 dtype;
1682         int hold_vnode;
1683
1684         reopenlist = NULL;
1685
1686         for (index = 0; index < rtable4size; index++) {
1687                 rw_enter(&rtable4[index].r_lock, RW_READER);
1688                 for (rp = rtable4[index].r_hashf;
1689                     rp != (rnode4_t *)(&rtable4[index]);
1690                     rp = rp->r_hashf) {
1691
1692                         vp = RTOV4(rp);
1693                         if (vp->v_vfsp != vfsp)
1694                                 continue;
1695                         hold_vnode = 0;
1696
1697                         mutex_enter(&rp->r_os_lock);
1698
1699                         /* Count the number of valid open_streams of the file */
1700                         numosp = 0;
1701                         for (osp = list_head(&rp->r_open_streams); osp != NULL;
1702                             osp = list_next(&rp->r_open_streams, osp)) {
1703                                 mutex_enter(&osp->os_sync_lock);
1704                                 if (osp->os_valid && !osp->os_pending_close)
1705                                         numosp++;
1706                                 mutex_exit(&osp->os_sync_lock);
1707                         }
1708
1709                         /* Fill in the valid open streams per vp */
1710                         if (numosp > 0) {
1711                                 int j;
1712
1713                                 hold_vnode = 1;
1714
1715                                 /*
1716                                  * Add a new open instance to the list
1717                                  */
1718                                 rep = kmem_zalloc(sizeof (*reopenlist),
1719                                     KM_SLEEP);
1720                                 rep->re_next = reopenlist;
1721                                 reopenlist = rep;
1722
1723                                 rep->re_vp = vp;
1724                                 rep->re_osp = kmem_zalloc(
1725                                     numosp * sizeof (*(rep->re_osp)),
1726                                     KM_SLEEP);
1727                                 rep->re_numosp = numosp;
1728
1729                                 j = 0;
1730                                 for (osp = list_head(&rp->r_open_streams);
1731                                     osp != NULL;
1732                                     osp = list_next(&rp->r_open_streams, osp)) {
1733
1734                                         mutex_enter(&osp->os_sync_lock);
1735                                         if (osp->os_valid &&
1736                                             !osp->os_pending_close) {
1737                                                 osp->os_ref_count++;
1738                                                 rep->re_osp[j] = osp;
1739                                                 j++;
1740                                         }
1741                                         mutex_exit(&osp->os_sync_lock);
1742                                 }
1743                                 /*
1744                                  * Assuming valid osp(s) stays valid between
1745                                  * the time obtaining j and numosp.
1746                                  */
1747                                 ASSERT(j == numosp);
1748                         }
1749
1750                         mutex_exit(&rp->r_os_lock);
1751                         /* do this here to keep v_lock > r_os_lock */
1752                         if (hold_vnode)
1753                                 VN_HOLD(vp);
1754                         mutex_enter(&rp->r_statev4_lock);
1755                         if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
1756                                 /*
1757                                  * If this rnode holds a delegation,
1758                                  * but if there are no valid open streams,
1759                                  * then just discard the delegation
1760                                  * without doing delegreturn.
1761                                  */
1762                                 if (numosp > 0)
1763                                         rp->r_deleg_needs_recovery =
1764                                             rp->r_deleg_type;
1765                         }
1766                         /* Save the delegation type for use outside the lock */
1767                         dtype = rp->r_deleg_type;
1768                         mutex_exit(&rp->r_statev4_lock);
1769
1770                         /*
1771                          * If we have a delegation then get rid of it.
1772                          * We've set rp->r_deleg_needs_recovery so we have
1773                          * enough information to recover.
1774                          */
1775                         if (dtype != OPEN_DELEGATE_NONE) {
1776                                 (void) nfs4delegreturn(rp, NFS4_DR_DISCARD);
1777                         }
1778                 }
1779                 rw_exit(&rtable4[index].r_lock);
1780         }
1781         return (reopenlist);
1782 }
1783
1784 /*
1785  * Given a filesystem id, check to see if any rnodes
1786  * within this fsid reside in the rnode cache, other
1787  * than one we know about.
1788  *
1789  * Return 1 if an rnode is found, 0 otherwise
1790  */
1791 int
1792 r4find_by_fsid(mntinfo4_t *mi, fattr4_fsid *moved_fsid)
1793 {
1794         rnode4_t *rp;
1795         vnode_t *vp;
1796         vfs_t *vfsp = mi->mi_vfsp;
1797         fattr4_fsid *fsid;
1798         int index, found = 0;
1799
1800         for (index = 0; index < rtable4size; index++) {
1801                 rw_enter(&rtable4[index].r_lock, RW_READER);
1802                 for (rp = rtable4[index].r_hashf;
1803                     rp != (rnode4_t *)(&rtable4[index]);
1804                     rp = rp->r_hashf) {
1805
1806                         vp = RTOV4(rp);
1807                         if (vp->v_vfsp != vfsp)
1808                                 continue;
1809
1810                         /*
1811                          * XXX there might be a case where a
1812                          * replicated fs may have the same fsid
1813                          * across two different servers. This
1814                          * check isn't good enough in that case
1815                          */
1816                         fsid = &rp->r_srv_fsid;
1817                         if (FATTR4_FSID_EQ(moved_fsid, fsid)) {
1818                                 found = 1;
1819                                 break;
1820                         }
1821                 }
1822                 rw_exit(&rtable4[index].r_lock);
1823
1824                 if (found)
1825                         break;
1826         }
1827         return (found);
1828 }
1829
1830 /*
1831  * Release the list of open instance references.
1832  */
1833
1834 void
1835 r4releopenlist(nfs4_opinst_t *reopenp)
1836 {
1837         nfs4_opinst_t *rep, *next;
1838         int i;
1839
1840         for (rep = reopenp; rep; rep = next) {
1841                 next = rep->re_next;
1842
1843                 for (i = 0; i < rep->re_numosp; i++)
1844                         open_stream_rele(rep->re_osp[i], VTOR4(rep->re_vp));
1845
1846                 VN_RELE(rep->re_vp);
1847                 kmem_free(rep->re_osp,
1848                     rep->re_numosp * sizeof (*(rep->re_osp)));
1849
1850                 kmem_free(rep, sizeof (*rep));
1851         }
1852 }
1853
1854 int
1855 nfs4_rnode_init(void)
1856 {
1857         ulong_t nrnode4_max;
1858         int i;
1859
1860         /*
1861          * Compute the size of the rnode4 hash table
1862          */
1863         if (nrnode <= 0)
1864                 nrnode = ncsize;
1865         nrnode4_max =
1866             (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode4));
1867         if (nrnode > nrnode4_max || (nrnode == 0 && ncsize == 0)) {
1868                 zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1869                     "!setting nrnode to max value of %ld", nrnode4_max);
1870                 nrnode = nrnode4_max;
1871         }
1872         rtable4size = 1 << highbit(nrnode / rnode4_hashlen);
1873         rtable4mask = rtable4size - 1;
1874
1875         /*
1876          * Allocate and initialize the hash buckets
1877          */
1878         rtable4 = kmem_alloc(rtable4size * sizeof (*rtable4), KM_SLEEP);
1879         for (i = 0; i < rtable4size; i++) {
1880                 rtable4[i].r_hashf = (rnode4_t *)(&rtable4[i]);
1881                 rtable4[i].r_hashb = (rnode4_t *)(&rtable4[i]);
1882                 rw_init(&rtable4[i].r_lock, NULL, RW_DEFAULT, NULL);
1883         }
1884
1885         rnode4_cache = kmem_cache_create("rnode4_cache", sizeof (rnode4_t),
1886             0, NULL, NULL, nfs4_reclaim, NULL, NULL, 0);
1887
1888         return (0);
1889 }
1890
1891 int
1892 nfs4_rnode_fini(void)
1893 {
1894         int i;
1895
1896         /*
1897          * Deallocate the rnode hash queues
1898          */
1899         kmem_cache_destroy(rnode4_cache);
1900
1901         for (i = 0; i < rtable4size; i++)
1902                 rw_destroy(&rtable4[i].r_lock);
1903
1904         kmem_free(rtable4, rtable4size * sizeof (*rtable4));
1905
1906         return (0);
1907 }
1908
1909 /*
1910  * Return non-zero if the given filehandle refers to the root filehandle
1911  * for the given rnode.
1912  */
1913
1914 static int
1915 isrootfh(nfs4_sharedfh_t *fh, rnode4_t *rp)
1916 {
1917         int isroot;
1918
1919         isroot = 0;
1920         if (SFH4_SAME(VTOMI4(RTOV4(rp))->mi_rootfh, fh))
1921                 isroot = 1;
1922
1923         return (isroot);
1924 }
1925
1926 /*
1927  * The r4_stub_* routines assume that the rnode is newly activated, and
1928  * that the caller either holds the hash bucket r_lock for this rnode as
1929  * RW_WRITER, or holds r_statelock.
1930  */
1931 static void
1932 r4_stub_set(rnode4_t *rp, nfs4_stub_type_t type)
1933 {
1934         vnode_t *vp = RTOV4(rp);
1935         krwlock_t *hash_lock = &rp->r_hashq->r_lock;
1936
1937         ASSERT(RW_WRITE_HELD(hash_lock) || MUTEX_HELD(&rp->r_statelock));
1938
1939         rp->r_stub_type = type;
1940
1941         /*
1942          * Safely switch this vnode to the trigger vnodeops.
1943          *
1944          * Currently, we don't ever switch a trigger vnode back to using
1945          * "regular" v4 vnodeops. NFS4_STUB_NONE is only used to note that
1946          * a new v4 object is not a trigger, and it will already have the
1947          * correct v4 vnodeops by default. So, no "else" case required here.
1948          */
1949         if (type != NFS4_STUB_NONE)
1950                 vn_setops(vp, &nfs4_trigger_vnodeops);
1951 }
1952
1953 void
1954 r4_stub_mirrormount(rnode4_t *rp)
1955 {
1956         r4_stub_set(rp, NFS4_STUB_MIRRORMOUNT);
1957 }
1958
1959 void
1960 r4_stub_referral(rnode4_t *rp)
1961 {
1962         DTRACE_PROBE1(nfs4clnt__func__referral__moved,
1963             vnode_t *, RTOV4(rp));
1964         r4_stub_set(rp, NFS4_STUB_REFERRAL);
1965 }
1966
1967 void
1968 r4_stub_none(rnode4_t *rp)
1969 {
1970         r4_stub_set(rp, NFS4_STUB_NONE);
1971 }
1972
1973 #ifdef DEBUG
1974
1975 /*
1976  * Look in the rnode table for other rnodes that have the same filehandle.
1977  * Assume the lock is held for the hash chain of checkrp
1978  */
1979
1980 static void
1981 r4_dup_check(rnode4_t *checkrp, vfs_t *vfsp)
1982 {
1983         rnode4_t *rp;
1984         vnode_t *tvp;
1985         nfs4_fhandle_t fh, fh2;
1986         int index;
1987
1988         if (!r4_check_for_dups)
1989                 return;
1990
1991         ASSERT(RW_LOCK_HELD(&checkrp->r_hashq->r_lock));
1992
1993         sfh4_copyval(checkrp->r_fh, &fh);
1994
1995         for (index = 0; index < rtable4size; index++) {
1996
1997                 if (&rtable4[index] != checkrp->r_hashq)
1998                         rw_enter(&rtable4[index].r_lock, RW_READER);
1999
2000                 for (rp = rtable4[index].r_hashf;
2001                     rp != (rnode4_t *)(&rtable4[index]);
2002                     rp = rp->r_hashf) {
2003
2004                         if (rp == checkrp)
2005                                 continue;
2006
2007                         tvp = RTOV4(rp);
2008                         if (tvp->v_vfsp != vfsp)
2009                                 continue;
2010
2011                         sfh4_copyval(rp->r_fh, &fh2);
2012                         if (nfs4cmpfhandle(&fh, &fh2) == 0) {
2013                                 cmn_err(CE_PANIC, "rnodes with same fs, fh "
2014                                     "(%p, %p)", (void *)checkrp, (void *)rp);
2015                         }
2016                 }
2017
2018                 if (&rtable4[index] != checkrp->r_hashq)
2019                         rw_exit(&rtable4[index].r_lock);
2020         }
2021 }
2022
2023 #endif /* DEBUG */