kernel/fs/nfs/nfs4_srv.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  26  */
  27
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/uio.h>
  41 #include <sys/errno.h>
  42 #include <sys/sysmacros.h>
  43 #include <sys/statvfs.h>
  44 #include <sys/kmem.h>
  45 #include <sys/dirent.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/debug.h>
  48 #include <sys/systeminfo.h>
  49 #include <sys/flock.h>
  50 #include <sys/pathname.h>
  51 #include <sys/nbmlock.h>
  52 #include <sys/share.h>
  53 #include <sys/atomic.h>
  54 #include <sys/policy.h>
  55 #include <sys/fem.h>
  56 #include <sys/sdt.h>
  57 #include <sys/ddi.h>
  58 #include <sys/zone.h>
  59
  60 #include <sys/fs_reparse.h>
  61
  62 #include <rpc/types.h>
  63 #include <rpc/auth.h>
  64 #include <rpc/rpcsec_gss.h>
  65 #include <rpc/svc.h>
  66
  67 #include <nfs/nfs.h>
  68 #include <nfs/export.h>
  69 #include <nfs/nfs_cmd.h>
  70 #include <nfs/lm.h>
  71 #include <nfs/nfs4.h>
  72
  73 #include <sys/strsubr.h>
  74 #include <sys/strsun.h>
  75
  76 #include <inet/common.h>
  77 #include <inet/ip.h>
  78 #include <inet/ip6.h>
  79
  80 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  81 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  82 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  83 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  84 extern struct svc_ops rdma_svc_ops;
  85 extern int nfs_loaned_buffers;
  86 /* End of Tunables */
  87
  88 static int rdma_setup_read_data4(READ4args *, READ4res *);
  89
  90 /*
  91  * Used to bump the stateid4.seqid value and show changes in the stateid
  92  */
  93 #define next_stateid(sp) (++(sp)->bits.chgseq)
  94
  95 /*
  96  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
  97  *      This is used to return NFS4ERR_TOOSMALL when clients specify
  98  *      maxcount that isn't large enough to hold the smallest possible
  99  *      XDR encoded dirent.
 100  *
 101  *          sizeof cookie (8 bytes) +
 102  *          sizeof name_len (4 bytes) +
 103  *          sizeof smallest (padded) name (4 bytes) +
 104  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 105  *          sizeof attrlist4_len (4 bytes) +
 106  *          sizeof next boolean (4 bytes)
 107  *
 108  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 109  * the smallest possible entry4 (assumes no attrs requested).
 110  *      sizeof nfsstat4 (4 bytes) +
 111  *      sizeof verifier4 (8 bytes) +
 112  *      sizeof entry4list bool (4 bytes) +
 113  *      sizeof entry4   (36 bytes) +
 114  *      sizeof eof bool  (4 bytes)
 115  *
 116  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 117  *      fop_readdir.  Its value is the size of the maximum possible dirent
 118  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 119  *      required for a given name length.  MAXNAMELEN is the maximum
 120  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 121  *      macros are to allow for . and .. entries -- just a minor tweak to try
 122  *      and guarantee that buffer we give to fop_readdir will be large enough
 123  *      to hold ., .., and the largest possible solaris dirent64.
 124  */
 125 #define RFS4_MINLEN_ENTRY4 36
 126 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 127 #define RFS4_MINLEN_RDDIR_BUF \
 128         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 129
 130 /*
 131  * It would be better to pad to 4 bytes since that's what XDR would do,
 132  * but the dirents UFS gives us are already padded to 8, so just take
 133  * what we're given.  Dircount is only a hint anyway.  Currently the
 134  * solaris kernel is ASCII only, so there's no point in calling the
 135  * UTF8 functions.
 136  *
 137  * dirent64: named padded to provide 8 byte struct alignment
 138  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 139  *
 140  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 141  *
 142  */
 143 #define DIRENT64_TO_DIRCOUNT(dp) \
 144         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 145
 146 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 147
 148 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 149
 150 u_longlong_t    nfs4_srv_caller_id;
 151 uint_t          nfs4_srv_vkey = 0;
 152
 153 verifier4       Write4verf;
 154 verifier4       Readdir4verf;
 155
 156 void    rfs4_init_compound_state(struct compound_state *);
 157
 158 static void     nullfree(caddr_t);
 159 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 160                         struct compound_state *);
 161 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 162                         struct compound_state *);
 163 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164                         struct compound_state *);
 165 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                         struct compound_state *);
 167 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                         struct compound_state *);
 169 static void     rfs4_op_create_free(nfs_resop4 *resop);
 170 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 171                         struct svc_req *, struct compound_state *);
 172 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 173                         struct svc_req *, struct compound_state *);
 174 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 175                         struct compound_state *);
 176 static void     rfs4_op_getattr_free(nfs_resop4 *);
 177 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 178                         struct compound_state *);
 179 static void     rfs4_op_getfh_free(nfs_resop4 *);
 180 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                         struct compound_state *);
 182 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 183                         struct compound_state *);
 184 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                         struct compound_state *);
 186 static void     lock_denied_free(nfs_resop4 *);
 187 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188                         struct compound_state *);
 189 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190                         struct compound_state *);
 191 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                         struct compound_state *);
 193 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                         struct compound_state *);
 195 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 196                                 struct svc_req *req, struct compound_state *cs);
 197 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                         struct compound_state *);
 199 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200                         struct compound_state *);
 201 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 202                         struct svc_req *, struct compound_state *);
 203 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 204                         struct svc_req *, struct compound_state *);
 205 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206                         struct compound_state *);
 207 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 208                         struct compound_state *);
 209 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210                         struct compound_state *);
 211 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                         struct compound_state *);
 213 static void     rfs4_op_read_free(nfs_resop4 *);
 214 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 215 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                         struct compound_state *);
 217 static void     rfs4_op_readlink_free(nfs_resop4 *);
 218 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 219                         struct svc_req *, struct compound_state *);
 220 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 221                         struct compound_state *);
 222 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 223                         struct compound_state *);
 224 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225                         struct compound_state *);
 226 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                         struct compound_state *);
 228 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                         struct compound_state *);
 230 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                         struct compound_state *);
 232 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                         struct compound_state *);
 234 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                         struct compound_state *);
 236 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 237                         struct svc_req *, struct compound_state *);
 238 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 239                         struct svc_req *req, struct compound_state *);
 240 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241                         struct compound_state *);
 242 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 243
 244 static nfsstat4 check_open_access(uint32_t,
 245                                 struct compound_state *, struct svc_req *);
 246 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 247 void rfs4_ss_clid(rfs4_client_t *);
 248
 249 /*
 250  * translation table for attrs
 251  */
 252 struct nfs4_ntov_table {
 253         union nfs4_attr_u *na;
 254         uint8_t amap[NFS4_MAXNUM_ATTRS];
 255         int attrcnt;
 256         bool_t vfsstat;
 257 };
 258
 259 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 260 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 261                                     struct nfs4_svgetit_arg *sargp);
 262
 263 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 264                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 265                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 266
 267 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 268 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 269 int             rfs4_seen_first_compound;       /* set first time we see one */
 270
 271 /*
 272  * NFS4 op dispatch table
 273  */
 274
 275 struct rfsv4disp {
 276         void    (*dis_proc)();          /* proc to call */
 277         void    (*dis_resfree)();       /* frees space allocated by proc */
 278         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 279 };
 280
 281 static struct rfsv4disp rfsv4disptab[] = {
 282         /*
 283          * NFS VERSION 4
 284          */
 285
 286         /* RFS_NULL = 0 */
 287         {rfs4_op_illegal, nullfree, 0},
 288
 289         /* UNUSED = 1 */
 290         {rfs4_op_illegal, nullfree, 0},
 291
 292         /* UNUSED = 2 */
 293         {rfs4_op_illegal, nullfree, 0},
 294
 295         /* OP_ACCESS = 3 */
 296         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 297
 298         /* OP_CLOSE = 4 */
 299         {rfs4_op_close, nullfree, 0},
 300
 301         /* OP_COMMIT = 5 */
 302         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 303
 304         /* OP_CREATE = 6 */
 305         {rfs4_op_create, nullfree, 0},
 306
 307         /* OP_DELEGPURGE = 7 */
 308         {rfs4_op_delegpurge, nullfree, 0},
 309
 310         /* OP_DELEGRETURN = 8 */
 311         {rfs4_op_delegreturn, nullfree, 0},
 312
 313         /* OP_GETATTR = 9 */
 314         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 315
 316         /* OP_GETFH = 10 */
 317         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 318
 319         /* OP_LINK = 11 */
 320         {rfs4_op_link, nullfree, 0},
 321
 322         /* OP_LOCK = 12 */
 323         {rfs4_op_lock, lock_denied_free, 0},
 324
 325         /* OP_LOCKT = 13 */
 326         {rfs4_op_lockt, lock_denied_free, 0},
 327
 328         /* OP_LOCKU = 14 */
 329         {rfs4_op_locku, nullfree, 0},
 330
 331         /* OP_LOOKUP = 15 */
 332         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 333
 334         /* OP_LOOKUPP = 16 */
 335         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 336
 337         /* OP_NVERIFY = 17 */
 338         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 339
 340         /* OP_OPEN = 18 */
 341         {rfs4_op_open, rfs4_free_reply, 0},
 342
 343         /* OP_OPENATTR = 19 */
 344         {rfs4_op_openattr, nullfree, 0},
 345
 346         /* OP_OPEN_CONFIRM = 20 */
 347         {rfs4_op_open_confirm, nullfree, 0},
 348
 349         /* OP_OPEN_DOWNGRADE = 21 */
 350         {rfs4_op_open_downgrade, nullfree, 0},
 351
 352         /* OP_OPEN_PUTFH = 22 */
 353         {rfs4_op_putfh, nullfree, RPC_ALL},
 354
 355         /* OP_PUTPUBFH = 23 */
 356         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 357
 358         /* OP_PUTROOTFH = 24 */
 359         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 360
 361         /* OP_READ = 25 */
 362         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 363
 364         /* OP_READDIR = 26 */
 365         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 366
 367         /* OP_READLINK = 27 */
 368         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 369
 370         /* OP_REMOVE = 28 */
 371         {rfs4_op_remove, nullfree, 0},
 372
 373         /* OP_RENAME = 29 */
 374         {rfs4_op_rename, nullfree, 0},
 375
 376         /* OP_RENEW = 30 */
 377         {rfs4_op_renew, nullfree, 0},
 378
 379         /* OP_RESTOREFH = 31 */
 380         {rfs4_op_restorefh, nullfree, RPC_ALL},
 381
 382         /* OP_SAVEFH = 32 */
 383         {rfs4_op_savefh, nullfree, RPC_ALL},
 384
 385         /* OP_SECINFO = 33 */
 386         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 387
 388         /* OP_SETATTR = 34 */
 389         {rfs4_op_setattr, nullfree, 0},
 390
 391         /* OP_SETCLIENTID = 35 */
 392         {rfs4_op_setclientid, nullfree, 0},
 393
 394         /* OP_SETCLIENTID_CONFIRM = 36 */
 395         {rfs4_op_setclientid_confirm, nullfree, 0},
 396
 397         /* OP_VERIFY = 37 */
 398         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 399
 400         /* OP_WRITE = 38 */
 401         {rfs4_op_write, nullfree, 0},
 402
 403         /* OP_RELEASE_LOCKOWNER = 39 */
 404         {rfs4_op_release_lockowner, nullfree, 0},
 405 };
 406
 407 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 408
 409 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 410
 411 #ifdef DEBUG
 412
 413 int             rfs4_fillone_debug = 0;
 414 int             rfs4_no_stub_access = 1;
 415 int             rfs4_rddir_debug = 0;
 416
 417 static char    *rfs4_op_string[] = {
 418         "rfs4_op_null",
 419         "rfs4_op_1 unused",
 420         "rfs4_op_2 unused",
 421         "rfs4_op_access",
 422         "rfs4_op_close",
 423         "rfs4_op_commit",
 424         "rfs4_op_create",
 425         "rfs4_op_delegpurge",
 426         "rfs4_op_delegreturn",
 427         "rfs4_op_getattr",
 428         "rfs4_op_getfh",
 429         "rfs4_op_link",
 430         "rfs4_op_lock",
 431         "rfs4_op_lockt",
 432         "rfs4_op_locku",
 433         "rfs4_op_lookup",
 434         "rfs4_op_lookupp",
 435         "rfs4_op_nverify",
 436         "rfs4_op_open",
 437         "rfs4_op_openattr",
 438         "rfs4_op_open_confirm",
 439         "rfs4_op_open_downgrade",
 440         "rfs4_op_putfh",
 441         "rfs4_op_putpubfh",
 442         "rfs4_op_putrootfh",
 443         "rfs4_op_read",
 444         "rfs4_op_readdir",
 445         "rfs4_op_readlink",
 446         "rfs4_op_remove",
 447         "rfs4_op_rename",
 448         "rfs4_op_renew",
 449         "rfs4_op_restorefh",
 450         "rfs4_op_savefh",
 451         "rfs4_op_secinfo",
 452         "rfs4_op_setattr",
 453         "rfs4_op_setclientid",
 454         "rfs4_op_setclient_confirm",
 455         "rfs4_op_verify",
 456         "rfs4_op_write",
 457         "rfs4_op_release_lockowner",
 458         "rfs4_op_illegal"
 459 };
 460 #endif
 461
 462 void    rfs4_ss_chkclid(rfs4_client_t *);
 463
 464 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 465
 466 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 467
 468 #ifdef  nextdp
 469 #undef nextdp
 470 #endif
 471 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 472
 473 fem_t deleg_rdops = {
 474         .name = "deleg_rdops",
 475         .femop_open = deleg_rd_open,
 476         .femop_write = deleg_rd_write,
 477         .femop_setattr = deleg_rd_setattr,
 478         .femop_rwlock = deleg_rd_rwlock,
 479         .femop_space = deleg_rd_space,
 480         .femop_setsecattr = deleg_rd_setsecattr,
 481         .femop_vnevent = deleg_rd_vnevent,
 482 };
 483
 484 fem_t deleg_wrops = {
 485         .name = "deleg_wrops",
 486         .femop_open = deleg_wr_open,
 487         .femop_read = deleg_wr_read,
 488         .femop_write = deleg_wr_write,
 489         .femop_setattr = deleg_wr_setattr,
 490         .femop_rwlock = deleg_wr_rwlock,
 491         .femop_space = deleg_wr_space,
 492         .femop_setsecattr = deleg_wr_setsecattr,
 493         .femop_vnevent = deleg_wr_vnevent,
 494 };
 495
 496 int
 497 rfs4_srvrinit(void)
 498 {
 499         timespec32_t verf;
 500         int error;
 501         extern void rfs4_attr_init();
 502         extern krwlock_t rfs4_deleg_policy_lock;
 503
 504         /*
 505          * The following algorithm attempts to find a unique verifier
 506          * to be used as the write verifier returned from the server
 507          * to the client.  It is important that this verifier change
 508          * whenever the server reboots.  Of secondary importance, it
 509          * is important for the verifier to be unique between two
 510          * different servers.
 511          *
 512          * Thus, an attempt is made to use the system hostid and the
 513          * current time in seconds when the nfssrv kernel module is
 514          * loaded.  It is assumed that an NFS server will not be able
 515          * to boot and then to reboot in less than a second.  If the
 516          * hostid has not been set, then the current high resolution
 517          * time is used.  This will ensure different verifiers each
 518          * time the server reboots and minimize the chances that two
 519          * different servers will have the same verifier.
 520          * XXX - this is broken on LP64 kernels.
 521          */
 522         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 523         if (verf.tv_sec != 0) {
 524                 verf.tv_nsec = gethrestime_sec();
 525         } else {
 526                 timespec_t tverf;
 527
 528                 gethrestime(&tverf);
 529                 verf.tv_sec = (time_t)tverf.tv_sec;
 530                 verf.tv_nsec = tverf.tv_nsec;
 531         }
 532
 533         Write4verf = *(uint64_t *)&verf;
 534
 535         rfs4_attr_init();
 536         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 537
 538         /* Used to manage create/destroy of server state */
 539         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 540
 541         /* Used to manage access to server instance linked list */
 542         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 543
 544         /* Used to manage access to rfs4_deleg_policy */
 545         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 546
 547         nfs4_srv_caller_id = fs_new_caller_id();
 548
 549         lockt_sysid = lm_alloc_sysidt();
 550
 551         vsd_create(&nfs4_srv_vkey, NULL);
 552
 553         return (0);
 554 }
 555
 556 void
 557 rfs4_srvrfini(void)
 558 {
 559         extern krwlock_t rfs4_deleg_policy_lock;
 560
 561         if (lockt_sysid != LM_NOSYSID) {
 562                 lm_free_sysidt(lockt_sysid);
 563                 lockt_sysid = LM_NOSYSID;
 564         }
 565
 566         mutex_destroy(&rfs4_deleg_lock);
 567         mutex_destroy(&rfs4_state_lock);
 568         rw_destroy(&rfs4_deleg_policy_lock);
 569 }
 570
 571 void
 572 rfs4_init_compound_state(struct compound_state *cs)
 573 {
 574         bzero(cs, sizeof (*cs));
 575         cs->cont = TRUE;
 576         cs->access = CS_ACCESS_DENIED;
 577         cs->deleg = FALSE;
 578         cs->mandlock = FALSE;
 579         cs->fh.nfs_fh4_val = cs->fhbuf;
 580 }
 581
 582 void
 583 rfs4_grace_start(rfs4_servinst_t *sip)
 584 {
 585         rw_enter(&sip->rwlock, RW_WRITER);
 586         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 587         sip->grace_period = rfs4_grace_period;
 588         rw_exit(&sip->rwlock);
 589 }
 590
 591 /*
 592  * returns true if the instance's grace period has never been started
 593  */
 594 int
 595 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 596 {
 597         time_t start_time;
 598
 599         rw_enter(&sip->rwlock, RW_READER);
 600         start_time = sip->start_time;
 601         rw_exit(&sip->rwlock);
 602
 603         return (start_time == 0);
 604 }
 605
 606 /*
 607  * Indicates if server instance is within the
 608  * grace period.
 609  */
 610 int
 611 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 612 {
 613         time_t grace_expiry;
 614
 615         rw_enter(&sip->rwlock, RW_READER);
 616         grace_expiry = sip->start_time + sip->grace_period;
 617         rw_exit(&sip->rwlock);
 618
 619         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 620 }
 621
 622 int
 623 rfs4_clnt_in_grace(rfs4_client_t *cp)
 624 {
 625         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 626
 627         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 628 }
 629
 630 /*
 631  * reset all currently active grace periods
 632  */
 633 void
 634 rfs4_grace_reset_all(void)
 635 {
 636         rfs4_servinst_t *sip;
 637
 638         mutex_enter(&rfs4_servinst_lock);
 639         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 640                 if (rfs4_servinst_in_grace(sip))
 641                         rfs4_grace_start(sip);
 642         mutex_exit(&rfs4_servinst_lock);
 643 }
 644
 645 /*
 646  * start any new instances' grace periods
 647  */
 648 void
 649 rfs4_grace_start_new(void)
 650 {
 651         rfs4_servinst_t *sip;
 652
 653         mutex_enter(&rfs4_servinst_lock);
 654         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 655                 if (rfs4_servinst_grace_new(sip))
 656                         rfs4_grace_start(sip);
 657         mutex_exit(&rfs4_servinst_lock);
 658 }
 659
 660 static rfs4_dss_path_t *
 661 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 662 {
 663         size_t len;
 664         rfs4_dss_path_t *dss_path;
 665
 666         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 667
 668         /*
 669          * Take a copy of the string, since the original may be overwritten.
 670          * Sadly, no strdup() in the kernel.
 671          */
 672         /* allow for NUL */
 673         len = strlen(path) + 1;
 674         dss_path->path = kmem_alloc(len, KM_SLEEP);
 675         (void) strlcpy(dss_path->path, path, len);
 676
 677         /* associate with servinst */
 678         dss_path->sip = sip;
 679         dss_path->index = index;
 680
 681         /*
 682          * Add to list of served paths.
 683          * No locking required, as we're only ever called at startup.
 684          */
 685         if (rfs4_dss_pathlist == NULL) {
 686                 /* this is the first dss_path_t */
 687
 688                 /* needed for insque/remque */
 689                 dss_path->next = dss_path->prev = dss_path;
 690
 691                 rfs4_dss_pathlist = dss_path;
 692         } else {
 693                 insque(dss_path, rfs4_dss_pathlist);
 694         }
 695
 696         return (dss_path);
 697 }
 698
 699 /*
 700  * Create a new server instance, and make it the currently active instance.
 701  * Note that starting the grace period too early will reduce the clients'
 702  * recovery window.
 703  */
 704 void
 705 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 706 {
 707         unsigned i;
 708         rfs4_servinst_t *sip;
 709         rfs4_oldstate_t *oldstate;
 710
 711         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 712         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 713
 714         sip->start_time = (time_t)0;
 715         sip->grace_period = (time_t)0;
 716         sip->next = NULL;
 717         sip->prev = NULL;
 718
 719         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 720         /*
 721          * This initial dummy entry is required to setup for insque/remque.
 722          * It must be skipped over whenever the list is traversed.
 723          */
 724         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 725         /* insque/remque require initial list entry to be self-terminated */
 726         oldstate->next = oldstate;
 727         oldstate->prev = oldstate;
 728         sip->oldstate = oldstate;
 729
 730
 731         sip->dss_npaths = dss_npaths;
 732         sip->dss_paths = kmem_alloc(dss_npaths *
 733             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 734
 735         for (i = 0; i < dss_npaths; i++) {
 736                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 737         }
 738
 739         mutex_enter(&rfs4_servinst_lock);
 740         if (rfs4_cur_servinst != NULL) {
 741                 /* add to linked list */
 742                 sip->prev = rfs4_cur_servinst;
 743                 rfs4_cur_servinst->next = sip;
 744         }
 745         if (start_grace)
 746                 rfs4_grace_start(sip);
 747         /* make the new instance "current" */
 748         rfs4_cur_servinst = sip;
 749
 750         mutex_exit(&rfs4_servinst_lock);
 751 }
 752
 753 /*
 754  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 755  * all instances directly.
 756  */
 757 void
 758 rfs4_servinst_destroy_all(void)
 759 {
 760         rfs4_servinst_t *sip, *prev, *current;
 761 #ifdef DEBUG
 762         int n = 0;
 763 #endif
 764
 765         mutex_enter(&rfs4_servinst_lock);
 766         ASSERT(rfs4_cur_servinst != NULL);
 767         current = rfs4_cur_servinst;
 768         rfs4_cur_servinst = NULL;
 769         for (sip = current; sip != NULL; sip = prev) {
 770                 prev = sip->prev;
 771                 rw_destroy(&sip->rwlock);
 772                 if (sip->oldstate)
 773                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 774                 if (sip->dss_paths)
 775                         kmem_free(sip->dss_paths,
 776                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 777                 kmem_free(sip, sizeof (rfs4_servinst_t));
 778 #ifdef DEBUG
 779                 n++;
 780 #endif
 781         }
 782         mutex_exit(&rfs4_servinst_lock);
 783 }
 784
 785 /*
 786  * Assign the current server instance to a client_t.
 787  * Should be called with cp->rc_dbe held.
 788  */
 789 void
 790 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 791 {
 792         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 793
 794         /*
 795          * The lock ensures that if the current instance is in the process
 796          * of changing, we will see the new one.
 797          */
 798         mutex_enter(&rfs4_servinst_lock);
 799         cp->rc_server_instance = sip;
 800         mutex_exit(&rfs4_servinst_lock);
 801 }
 802
 803 rfs4_servinst_t *
 804 rfs4_servinst(rfs4_client_t *cp)
 805 {
 806         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 807
 808         return (cp->rc_server_instance);
 809 }
 810
 811 /* ARGSUSED */
 812 static void
 813 nullfree(caddr_t resop)
 814 {
 815 }
 816
 817 /*
 818  * This is a fall-through for invalid or not implemented (yet) ops
 819  */
 820 /* ARGSUSED */
 821 static void
 822 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 823     struct compound_state *cs)
 824 {
 825         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 826 }
 827
 828 /*
 829  * Check if the security flavor, nfsnum, is in the flavor_list.
 830  */
 831 bool_t
 832 in_flavor_list(int nfsnum, int *flavor_list, int count)
 833 {
 834         int i;
 835
 836         for (i = 0; i < count; i++) {
 837                 if (nfsnum == flavor_list[i])
 838                         return (TRUE);
 839         }
 840         return (FALSE);
 841 }
 842
 843 /*
 844  * Used by rfs4_op_secinfo to get the security information from the
 845  * export structure associated with the component.
 846  */
 847 /* ARGSUSED */
 848 static nfsstat4
 849 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 850 {
 851         int error, different_export = 0;
 852         vnode_t *dvp, *vp;
 853         struct exportinfo *exi = NULL;
 854         fid_t fid;
 855         uint_t count, i;
 856         secinfo4 *resok_val;
 857         struct secinfo *secp;
 858         seconfig_t *si;
 859         bool_t did_traverse = FALSE;
 860         int dotdot, walk;
 861
 862         dvp = cs->vp;
 863         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 864
 865         /*
 866          * If dotdotting, then need to check whether it's above the
 867          * root of a filesystem, or above an export point.
 868          */
 869         if (dotdot) {
 870
 871                 /*
 872                  * If dotdotting at the root of a filesystem, then
 873                  * need to traverse back to the mounted-on filesystem
 874                  * and do the dotdot lookup there.
 875                  */
 876                 if (cs->vp->v_flag & VROOT) {
 877
 878                         /*
 879                          * If at the system root, then can
 880                          * go up no further.
 881                          */
 882                         if (VN_CMP(dvp, rootdir))
 883                                 return (puterrno4(ENOENT));
 884
 885                         /*
 886                          * Traverse back to the mounted-on filesystem
 887                          */
 888                         dvp = untraverse(cs->vp);
 889
 890                         /*
 891                          * Set the different_export flag so we remember
 892                          * to pick up a new exportinfo entry for
 893                          * this new filesystem.
 894                          */
 895                         different_export = 1;
 896                 } else {
 897
 898                         /*
 899                          * If dotdotting above an export point then set
 900                          * the different_export to get new export info.
 901                          */
 902                         different_export = nfs_exported(cs->exi, cs->vp);
 903                 }
 904         }
 905
 906         /*
 907          * Get the vnode for the component "nm".
 908          */
 909         error = fop_lookup(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 910             NULL, NULL, NULL);
 911         if (error)
 912                 return (puterrno4(error));
 913
 914         /*
 915          * If the vnode is in a pseudo filesystem, or if the security flavor
 916          * used in the request is valid but not an explicitly shared flavor,
 917          * or the access bit indicates that this is a limited access,
 918          * check whether this vnode is visible.
 919          */
 920         if (!different_export &&
 921             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 922             cs->access & CS_ACCESS_LIMITED)) {
 923                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 924                         VN_RELE(vp);
 925                         return (puterrno4(ENOENT));
 926                 }
 927         }
 928
 929         /*
 930          * If it's a mountpoint, then traverse it.
 931          */
 932         if (vn_ismntpt(vp)) {
 933                 if ((error = traverse(&vp)) != 0) {
 934                         VN_RELE(vp);
 935                         return (puterrno4(error));
 936                 }
 937                 /* remember that we had to traverse mountpoint */
 938                 did_traverse = TRUE;
 939                 different_export = 1;
 940         } else if (vp->v_vfsp != dvp->v_vfsp) {
 941                 /*
 942                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 943                  * then vp is probably an LOFS object.  We don't need the
 944                  * realvp, we just need to know that we might have crossed
 945                  * a server fs boundary and need to call checkexport4.
 946                  * (LOFS lookup hides server fs mountpoints, and actually calls
 947                  * traverse)
 948                  */
 949                 different_export = 1;
 950         }
 951
 952         /*
 953          * Get the export information for it.
 954          */
 955         if (different_export) {
 956
 957                 bzero(&fid, sizeof (fid));
 958                 fid.fid_len = MAXFIDSZ;
 959                 error = vop_fid_pseudo(vp, &fid);
 960                 if (error) {
 961                         VN_RELE(vp);
 962                         return (puterrno4(error));
 963                 }
 964
 965                 if (dotdot)
 966                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 967                 else
 968                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 969
 970                 if (exi == NULL) {
 971                         if (did_traverse == TRUE) {
 972                                 /*
 973                                  * If this vnode is a mounted-on vnode,
 974                                  * but the mounted-on file system is not
 975                                  * exported, send back the secinfo for
 976                                  * the exported node that the mounted-on
 977                                  * vnode lives in.
 978                                  */
 979                                 exi = cs->exi;
 980                         } else {
 981                                 VN_RELE(vp);
 982                                 return (puterrno4(EACCES));
 983                         }
 984                 }
 985         } else {
 986                 exi = cs->exi;
 987         }
 988         ASSERT(exi != NULL);
 989
 990
 991         /*
 992          * Create the secinfo result based on the security information
 993          * from the exportinfo structure (exi).
 994          *
 995          * Return all flavors for a pseudo node.
 996          * For a real export node, return the flavor that the client
 997          * has access with.
 998          */
 999         ASSERT(RW_LOCK_HELD(&exported_lock));
1000         if (PSEUDO(exi)) {
1001                 count = exi->exi_export.ex_seccnt; /* total sec count */
1002                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1003                 secp = exi->exi_export.ex_secinfo;
1004
1005                 for (i = 0; i < count; i++) {
1006                         si = &secp[i].s_secinfo;
1007                         resok_val[i].flavor = si->sc_rpcnum;
1008                         if (resok_val[i].flavor == RPCSEC_GSS) {
1009                                 rpcsec_gss_info *info;
1010
1011                                 info = &resok_val[i].flavor_info;
1012                                 info->qop = si->sc_qop;
1013                                 info->service = (rpc_gss_svc_t)si->sc_service;
1014
1015                                 /* get oid opaque data */
1016                                 info->oid.sec_oid4_len =
1017                                     si->sc_gss_mech_type->length;
1018                                 info->oid.sec_oid4_val = kmem_alloc(
1019                                     si->sc_gss_mech_type->length, KM_SLEEP);
1020                                 bcopy(
1021                                     si->sc_gss_mech_type->elements,
1022                                     info->oid.sec_oid4_val,
1023                                     info->oid.sec_oid4_len);
1024                         }
1025                 }
1026                 resp->SECINFO4resok_len = count;
1027                 resp->SECINFO4resok_val = resok_val;
1028         } else {
1029                 int ret_cnt = 0, k = 0;
1030                 int *flavor_list;
1031
1032                 count = exi->exi_export.ex_seccnt; /* total sec count */
1033                 secp = exi->exi_export.ex_secinfo;
1034
1035                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1036                 /* find out which flavors to return */
1037                 for (i = 0; i < count; i ++) {
1038                         int access, flavor, perm;
1039
1040                         flavor = secp[i].s_secinfo.sc_nfsnum;
1041                         perm = secp[i].s_flags;
1042
1043                         access = nfsauth4_secinfo_access(exi, cs->req,
1044                             flavor, perm, cs->basecr);
1045
1046                         if (! (access & NFSAUTH_DENIED) &&
1047                             ! (access & NFSAUTH_WRONGSEC)) {
1048                                 flavor_list[ret_cnt] = flavor;
1049                                 ret_cnt++;
1050                         }
1051                 }
1052
1053                 /* Create the returning SECINFO value */
1054                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1055
1056                 for (i = 0; i < count; i++) {
1057                         /*
1058                          * If the flavor is in the flavor list,
1059                          * fill in resok_val.
1060                          */
1061                         si = &secp[i].s_secinfo;
1062                         if (in_flavor_list(si->sc_nfsnum,
1063                             flavor_list, ret_cnt)) {
1064                                 resok_val[k].flavor = si->sc_rpcnum;
1065                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1066                                         rpcsec_gss_info *info;
1067
1068                                         info = &resok_val[k].flavor_info;
1069                                         info->qop = si->sc_qop;
1070                                         info->service = (rpc_gss_svc_t)
1071                                             si->sc_service;
1072
1073                                         /* get oid opaque data */
1074                                         info->oid.sec_oid4_len =
1075                                             si->sc_gss_mech_type->length;
1076                                         info->oid.sec_oid4_val = kmem_alloc(
1077                                             si->sc_gss_mech_type->length,
1078                                             KM_SLEEP);
1079                                         bcopy(si->sc_gss_mech_type->elements,
1080                                             info->oid.sec_oid4_val,
1081                                             info->oid.sec_oid4_len);
1082                                 }
1083                                 k++;
1084                         }
1085                         if (k >= ret_cnt)
1086                                 break;
1087                 }
1088                 resp->SECINFO4resok_len = ret_cnt;
1089                 resp->SECINFO4resok_val = resok_val;
1090                 kmem_free(flavor_list, count * sizeof (int));
1091         }
1092
1093         VN_RELE(vp);
1094         return (NFS4_OK);
1095 }
1096
1097 /*
1098  * SECINFO (Operation 33): Obtain required security information on
1099  * the component name in the format of (security-mechanism-oid, qop, service)
1100  * triplets.
1101  */
1102 /* ARGSUSED */
1103 static void
1104 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1105     struct compound_state *cs)
1106 {
1107         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1108         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1109         utf8string *utfnm = &args->name;
1110         uint_t len;
1111         char *nm;
1112         struct sockaddr *ca;
1113         char *name = NULL;
1114         nfsstat4 status = NFS4_OK;
1115
1116         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1117             SECINFO4args *, args);
1118
1119         /*
1120          * Current file handle (cfh) should have been set before getting
1121          * into this function. If not, return error.
1122          */
1123         if (cs->vp == NULL) {
1124                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1125                 goto out;
1126         }
1127
1128         if (cs->vp->v_type != VDIR) {
1129                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1130                 goto out;
1131         }
1132
1133         /*
1134          * Verify the component name. If failed, error out, but
1135          * do not error out if the component name is a "..".
1136          * SECINFO will return its parents secinfo data for SECINFO "..".
1137          */
1138         status = utf8_dir_verify(utfnm);
1139         if (status != NFS4_OK) {
1140                 if (utfnm->utf8string_len != 2 ||
1141                     utfnm->utf8string_val[0] != '.' ||
1142                     utfnm->utf8string_val[1] != '.') {
1143                         *cs->statusp = resp->status = status;
1144                         goto out;
1145                 }
1146         }
1147
1148         nm = utf8_to_str(utfnm, &len, NULL);
1149         if (nm == NULL) {
1150                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1151                 goto out;
1152         }
1153
1154         if (len > MAXNAMELEN) {
1155                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1156                 kmem_free(nm, len);
1157                 goto out;
1158         }
1159
1160         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1161         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1162             MAXPATHLEN  + 1);
1163
1164         if (name == NULL) {
1165                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1166                 kmem_free(nm, len);
1167                 goto out;
1168         }
1169
1170
1171         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1172
1173         if (name != nm)
1174                 kmem_free(name, MAXPATHLEN + 1);
1175         kmem_free(nm, len);
1176
1177 out:
1178         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1179             SECINFO4res *, resp);
1180 }
1181
1182 /*
1183  * Free SECINFO result.
1184  */
1185 /* ARGSUSED */
1186 static void
1187 rfs4_op_secinfo_free(nfs_resop4 *resop)
1188 {
1189         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1190         int count, i;
1191         secinfo4 *resok_val;
1192
1193         /* If this is not an Ok result, nothing to free. */
1194         if (resp->status != NFS4_OK) {
1195                 return;
1196         }
1197
1198         count = resp->SECINFO4resok_len;
1199         resok_val = resp->SECINFO4resok_val;
1200
1201         for (i = 0; i < count; i++) {
1202                 if (resok_val[i].flavor == RPCSEC_GSS) {
1203                         rpcsec_gss_info *info;
1204
1205                         info = &resok_val[i].flavor_info;
1206                         kmem_free(info->oid.sec_oid4_val,
1207                             info->oid.sec_oid4_len);
1208                 }
1209         }
1210         kmem_free(resok_val, count * sizeof (secinfo4));
1211         resp->SECINFO4resok_len = 0;
1212         resp->SECINFO4resok_val = NULL;
1213 }
1214
1215 /* ARGSUSED */
1216 static void
1217 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1218     struct compound_state *cs)
1219 {
1220         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1221         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1222         int error;
1223         vnode_t *vp;
1224         struct vattr va;
1225         int checkwriteperm;
1226         cred_t *cr = cs->cr;
1227
1228         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1229             ACCESS4args *, args);
1230
1231 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1232         if (cs->access == CS_ACCESS_DENIED) {
1233                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1234                 goto out;
1235         }
1236 #endif
1237         if (cs->vp == NULL) {
1238                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1239                 goto out;
1240         }
1241
1242         ASSERT(cr != NULL);
1243
1244         vp = cs->vp;
1245
1246         /*
1247          * If the file system is exported read only, it is not appropriate
1248          * to check write permissions for regular files and directories.
1249          * Special files are interpreted by the client, so the underlying
1250          * permissions are sent back to the client for interpretation.
1251          */
1252         if (rdonly4(req, cs) &&
1253             (vp->v_type == VREG || vp->v_type == VDIR))
1254                 checkwriteperm = 0;
1255         else
1256                 checkwriteperm = 1;
1257
1258         /*
1259          * XXX
1260          * We need the mode so that we can correctly determine access
1261          * permissions relative to a mandatory lock file.  Access to
1262          * mandatory lock files is denied on the server, so it might
1263          * as well be reflected to the server during the open.
1264          */
1265         va.va_mask = AT_MODE;
1266         error = fop_getattr(vp, &va, 0, cr, NULL);
1267         if (error) {
1268                 *cs->statusp = resp->status = puterrno4(error);
1269                 goto out;
1270         }
1271         resp->access = 0;
1272         resp->supported = 0;
1273
1274         if (args->access & ACCESS4_READ) {
1275                 error = fop_access(vp, VREAD, 0, cr, NULL);
1276                 if (!error && !MANDLOCK(vp, va.va_mode))
1277                         resp->access |= ACCESS4_READ;
1278                 resp->supported |= ACCESS4_READ;
1279         }
1280         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1281                 error = fop_access(vp, VEXEC, 0, cr, NULL);
1282                 if (!error)
1283                         resp->access |= ACCESS4_LOOKUP;
1284                 resp->supported |= ACCESS4_LOOKUP;
1285         }
1286         if (checkwriteperm &&
1287             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1288                 error = fop_access(vp, VWRITE, 0, cr, NULL);
1289                 if (!error && !MANDLOCK(vp, va.va_mode))
1290                         resp->access |=
1291                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1292                 resp->supported |=
1293                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1294         }
1295
1296         if (checkwriteperm &&
1297             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1298                 error = fop_access(vp, VWRITE, 0, cr, NULL);
1299                 if (!error)
1300                         resp->access |= ACCESS4_DELETE;
1301                 resp->supported |= ACCESS4_DELETE;
1302         }
1303         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1304                 error = fop_access(vp, VEXEC, 0, cr, NULL);
1305                 if (!error && !MANDLOCK(vp, va.va_mode))
1306                         resp->access |= ACCESS4_EXECUTE;
1307                 resp->supported |= ACCESS4_EXECUTE;
1308         }
1309
1310         *cs->statusp = resp->status = NFS4_OK;
1311 out:
1312         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1313             ACCESS4res *, resp);
1314 }
1315
1316 /* ARGSUSED */
1317 static void
1318 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1319     struct compound_state *cs)
1320 {
1321         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1322         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1323         int error;
1324         vnode_t *vp = cs->vp;
1325         cred_t *cr = cs->cr;
1326         vattr_t va;
1327
1328         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1329             COMMIT4args *, args);
1330
1331         if (vp == NULL) {
1332                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1333                 goto out;
1334         }
1335         if (cs->access == CS_ACCESS_DENIED) {
1336                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1337                 goto out;
1338         }
1339
1340         if (args->offset + args->count < args->offset) {
1341                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1342                 goto out;
1343         }
1344
1345         va.va_mask = AT_UID;
1346         error = fop_getattr(vp, &va, 0, cr, NULL);
1347
1348         /*
1349          * If we can't get the attributes, then we can't do the
1350          * right access checking.  So, we'll fail the request.
1351          */
1352         if (error) {
1353                 *cs->statusp = resp->status = puterrno4(error);
1354                 goto out;
1355         }
1356         if (rdonly4(req, cs)) {
1357                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1358                 goto out;
1359         }
1360
1361         if (vp->v_type != VREG) {
1362                 if (vp->v_type == VDIR)
1363                         resp->status = NFS4ERR_ISDIR;
1364                 else
1365                         resp->status = NFS4ERR_INVAL;
1366                 *cs->statusp = resp->status;
1367                 goto out;
1368         }
1369
1370         if (crgetuid(cr) != va.va_uid &&
1371             (error = fop_access(vp, VWRITE, 0, cs->cr, NULL))) {
1372                 *cs->statusp = resp->status = puterrno4(error);
1373                 goto out;
1374         }
1375
1376         error = fop_fsync(vp, FSYNC, cr, NULL);
1377
1378         if (error) {
1379                 *cs->statusp = resp->status = puterrno4(error);
1380                 goto out;
1381         }
1382
1383         *cs->statusp = resp->status = NFS4_OK;
1384         resp->writeverf = Write4verf;
1385 out:
1386         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1387             COMMIT4res *, resp);
1388 }
1389
1390 /*
1391  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1392  * was completed. It does the nfsv4 create for special files.
1393  */
1394 /* ARGSUSED */
1395 static vnode_t *
1396 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1397     struct compound_state *cs, vattr_t *vap, char *nm)
1398 {
1399         int error;
1400         cred_t *cr = cs->cr;
1401         vnode_t *dvp = cs->vp;
1402         vnode_t *vp = NULL;
1403         int mode;
1404         enum vcexcl excl;
1405
1406         switch (args->type) {
1407         case NF4CHR:
1408         case NF4BLK:
1409                 if (secpolicy_sys_devices(cr) != 0) {
1410                         *cs->statusp = resp->status = NFS4ERR_PERM;
1411                         return (NULL);
1412                 }
1413                 if (args->type == NF4CHR)
1414                         vap->va_type = VCHR;
1415                 else
1416                         vap->va_type = VBLK;
1417                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1418                     args->ftype4_u.devdata.specdata2);
1419                 vap->va_mask |= AT_RDEV;
1420                 break;
1421         case NF4SOCK:
1422                 vap->va_type = VSOCK;
1423                 break;
1424         case NF4FIFO:
1425                 vap->va_type = VFIFO;
1426                 break;
1427         default:
1428                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1429                 return (NULL);
1430         }
1431
1432         /*
1433          * Must specify the mode.
1434          */
1435         if (!(vap->va_mask & AT_MODE)) {
1436                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1437                 return (NULL);
1438         }
1439
1440         excl = EXCL;
1441
1442         mode = 0;
1443
1444         error = fop_create(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1445         if (error) {
1446                 *cs->statusp = resp->status = puterrno4(error);
1447                 return (NULL);
1448         }
1449         return (vp);
1450 }
1451
1452 /*
1453  * nfsv4 create is used to create non-regular files. For regular files,
1454  * use nfsv4 open.
1455  */
1456 /* ARGSUSED */
1457 static void
1458 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1459     struct compound_state *cs)
1460 {
1461         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1462         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1463         int error;
1464         struct vattr bva, iva, iva2, ava, *vap;
1465         cred_t *cr = cs->cr;
1466         vnode_t *dvp = cs->vp;
1467         vnode_t *vp = NULL;
1468         vnode_t *realvp;
1469         char *nm, *lnm;
1470         uint_t len, llen;
1471         int syncval = 0;
1472         struct nfs4_svgetit_arg sarg;
1473         struct nfs4_ntov_table ntov;
1474         struct statvfs64 sb;
1475         nfsstat4 status;
1476         struct sockaddr *ca;
1477         char *name = NULL;
1478         char *lname = NULL;
1479
1480         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1481             CREATE4args *, args);
1482
1483         resp->attrset = 0;
1484
1485         if (dvp == NULL) {
1486                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1487                 goto out;
1488         }
1489
1490         /*
1491          * If there is an unshared filesystem mounted on this vnode,
1492          * do not allow to create an object in this directory.
1493          */
1494         if (vn_ismntpt(dvp)) {
1495                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1496                 goto out;
1497         }
1498
1499         /* Verify that type is correct */
1500         switch (args->type) {
1501         case NF4LNK:
1502         case NF4BLK:
1503         case NF4CHR:
1504         case NF4SOCK:
1505         case NF4FIFO:
1506         case NF4DIR:
1507                 break;
1508         default:
1509                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1510                 goto out;
1511         };
1512
1513         if (cs->access == CS_ACCESS_DENIED) {
1514                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1515                 goto out;
1516         }
1517         if (dvp->v_type != VDIR) {
1518                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1519                 goto out;
1520         }
1521         status = utf8_dir_verify(&args->objname);
1522         if (status != NFS4_OK) {
1523                 *cs->statusp = resp->status = status;
1524                 goto out;
1525         }
1526
1527         if (rdonly4(req, cs)) {
1528                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1529                 goto out;
1530         }
1531
1532         /*
1533          * Name of newly created object
1534          */
1535         nm = utf8_to_fn(&args->objname, &len, NULL);
1536         if (nm == NULL) {
1537                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1538                 goto out;
1539         }
1540
1541         if (len > MAXNAMELEN) {
1542                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1543                 kmem_free(nm, len);
1544                 goto out;
1545         }
1546
1547         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1548         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1549             MAXPATHLEN  + 1);
1550
1551         if (name == NULL) {
1552                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1553                 kmem_free(nm, len);
1554                 goto out;
1555         }
1556
1557         resp->attrset = 0;
1558
1559         sarg.sbp = &sb;
1560         sarg.is_referral = B_FALSE;
1561         nfs4_ntov_table_init(&ntov);
1562
1563         status = do_rfs4_set_attrs(&resp->attrset,
1564             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1565
1566         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1567                 status = NFS4ERR_INVAL;
1568
1569         if (status != NFS4_OK) {
1570                 *cs->statusp = resp->status = status;
1571                 if (name != nm)
1572                         kmem_free(name, MAXPATHLEN + 1);
1573                 kmem_free(nm, len);
1574                 nfs4_ntov_table_free(&ntov, &sarg);
1575                 resp->attrset = 0;
1576                 goto out;
1577         }
1578
1579         /* Get "before" change value */
1580         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1581         error = fop_getattr(dvp, &bva, 0, cr, NULL);
1582         if (error) {
1583                 *cs->statusp = resp->status = puterrno4(error);
1584                 if (name != nm)
1585                         kmem_free(name, MAXPATHLEN + 1);
1586                 kmem_free(nm, len);
1587                 nfs4_ntov_table_free(&ntov, &sarg);
1588                 resp->attrset = 0;
1589                 goto out;
1590         }
1591         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1592
1593         vap = sarg.vap;
1594
1595         /*
1596          * Set the default initial values for attributes when the parent
1597          * directory does not have the VSUID/VSGID bit set and they have
1598          * not been specified in createattrs.
1599          */
1600         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1601                 vap->va_uid = crgetuid(cr);
1602                 vap->va_mask |= AT_UID;
1603         }
1604         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1605                 vap->va_gid = crgetgid(cr);
1606                 vap->va_mask |= AT_GID;
1607         }
1608
1609         vap->va_mask |= AT_TYPE;
1610         switch (args->type) {
1611         case NF4DIR:
1612                 vap->va_type = VDIR;
1613                 if ((vap->va_mask & AT_MODE) == 0) {
1614                         vap->va_mode = 0700;    /* default: owner rwx only */
1615                         vap->va_mask |= AT_MODE;
1616                 }
1617                 error = fop_mkdir(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1618                 if (error)
1619                         break;
1620
1621                 /*
1622                  * Get the initial "after" sequence number, if it fails,
1623                  * set to zero
1624                  */
1625                 iva.va_mask = AT_SEQ;
1626                 if (fop_getattr(dvp, &iva, 0, cs->cr, NULL))
1627                         iva.va_seq = 0;
1628                 break;
1629         case NF4LNK:
1630                 vap->va_type = VLNK;
1631                 if ((vap->va_mask & AT_MODE) == 0) {
1632                         vap->va_mode = 0700;    /* default: owner rwx only */
1633                         vap->va_mask |= AT_MODE;
1634                 }
1635
1636                 /*
1637                  * symlink names must be treated as data
1638                  */
1639                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1640                     &llen, NULL);
1641
1642                 if (lnm == NULL) {
1643                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1644                         if (name != nm)
1645                                 kmem_free(name, MAXPATHLEN + 1);
1646                         kmem_free(nm, len);
1647                         nfs4_ntov_table_free(&ntov, &sarg);
1648                         resp->attrset = 0;
1649                         goto out;
1650                 }
1651
1652                 if (llen > MAXPATHLEN) {
1653                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1654                         if (name != nm)
1655                                 kmem_free(name, MAXPATHLEN + 1);
1656                         kmem_free(nm, len);
1657                         kmem_free(lnm, llen);
1658                         nfs4_ntov_table_free(&ntov, &sarg);
1659                         resp->attrset = 0;
1660                         goto out;
1661                 }
1662
1663                 lname = nfscmd_convname(ca, cs->exi, lnm,
1664                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1665
1666                 if (lname == NULL) {
1667                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1668                         if (name != nm)
1669                                 kmem_free(name, MAXPATHLEN + 1);
1670                         kmem_free(nm, len);
1671                         kmem_free(lnm, llen);
1672                         nfs4_ntov_table_free(&ntov, &sarg);
1673                         resp->attrset = 0;
1674                         goto out;
1675                 }
1676
1677                 error = fop_symlink(dvp, name, vap, lname, cr, NULL, 0);
1678                 if (lname != lnm)
1679                         kmem_free(lname, MAXPATHLEN + 1);
1680                 kmem_free(lnm, llen);
1681                 if (error)
1682                         break;
1683
1684                 /*
1685                  * Get the initial "after" sequence number, if it fails,
1686                  * set to zero
1687                  */
1688                 iva.va_mask = AT_SEQ;
1689                 if (fop_getattr(dvp, &iva, 0, cs->cr, NULL))
1690                         iva.va_seq = 0;
1691
1692                 error = fop_lookup(dvp, name, &vp, NULL, 0, NULL, cr,
1693                     NULL, NULL, NULL);
1694                 if (error)
1695                         break;
1696
1697                 /*
1698                  * va_seq is not safe over VOP calls, check it again
1699                  * if it has changed zero out iva to force atomic = FALSE.
1700                  */
1701                 iva2.va_mask = AT_SEQ;
1702                 if (fop_getattr(dvp, &iva2, 0, cs->cr, NULL) ||
1703                     iva2.va_seq != iva.va_seq)
1704                         iva.va_seq = 0;
1705                 break;
1706         default:
1707                 /*
1708                  * probably a special file.
1709                  */
1710                 if ((vap->va_mask & AT_MODE) == 0) {
1711                         vap->va_mode = 0600;    /* default: owner rw only */
1712                         vap->va_mask |= AT_MODE;
1713                 }
1714                 syncval = FNODSYNC;
1715                 /*
1716                  * We know this will only generate one VOP call
1717                  */
1718                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1719
1720                 if (vp == NULL) {
1721                         if (name != nm)
1722                                 kmem_free(name, MAXPATHLEN + 1);
1723                         kmem_free(nm, len);
1724                         nfs4_ntov_table_free(&ntov, &sarg);
1725                         resp->attrset = 0;
1726                         goto out;
1727                 }
1728
1729                 /*
1730                  * Get the initial "after" sequence number, if it fails,
1731                  * set to zero
1732                  */
1733                 iva.va_mask = AT_SEQ;
1734                 if (fop_getattr(dvp, &iva, 0, cs->cr, NULL))
1735                         iva.va_seq = 0;
1736
1737                 break;
1738         }
1739         if (name != nm)
1740                 kmem_free(name, MAXPATHLEN + 1);
1741         kmem_free(nm, len);
1742
1743         if (error) {
1744                 *cs->statusp = resp->status = puterrno4(error);
1745         }
1746
1747         /*
1748          * Force modified data and metadata out to stable storage.
1749          */
1750         (void) fop_fsync(dvp, 0, cr, NULL);
1751
1752         if (resp->status != NFS4_OK) {
1753                 if (vp != NULL)
1754                         VN_RELE(vp);
1755                 nfs4_ntov_table_free(&ntov, &sarg);
1756                 resp->attrset = 0;
1757                 goto out;
1758         }
1759
1760         /*
1761          * Finish setup of cinfo response, "before" value already set.
1762          * Get "after" change value, if it fails, simply return the
1763          * before value.
1764          */
1765         ava.va_mask = AT_CTIME|AT_SEQ;
1766         if (fop_getattr(dvp, &ava, 0, cr, NULL)) {
1767                 ava.va_ctime = bva.va_ctime;
1768                 ava.va_seq = 0;
1769         }
1770         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1771
1772         /*
1773          * True verification that object was created with correct
1774          * attrs is impossible.  The attrs could have been changed
1775          * immediately after object creation.  If attributes did
1776          * not verify, the only recourse for the server is to
1777          * destroy the object.  Maybe if some attrs (like gid)
1778          * are set incorrectly, the object should be destroyed;
1779          * however, seems bad as a default policy.  Do we really
1780          * want to destroy an object over one of the times not
1781          * verifying correctly?  For these reasons, the server
1782          * currently sets bits in attrset for createattrs
1783          * that were set; however, no verification is done.
1784          *
1785          * vmask_to_nmask accounts for vattr bits set on create
1786          *      [do_rfs4_set_attrs() only sets resp bits for
1787          *       non-vattr/vfs bits.]
1788          * Mask off any bits set by default so as not to return
1789          * more attrset bits than were requested in createattrs
1790          */
1791         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1792         resp->attrset &= args->createattrs.attrmask;
1793         nfs4_ntov_table_free(&ntov, &sarg);
1794
1795         error = makefh4(&cs->fh, vp, cs->exi);
1796         if (error) {
1797                 *cs->statusp = resp->status = puterrno4(error);
1798         }
1799
1800         /*
1801          * The cinfo.atomic = TRUE only if we got no errors, we have
1802          * non-zero va_seq's, and it has incremented by exactly one
1803          * during the creation and it didn't change during the fop_lookup
1804          * or fop_fsync.
1805          */
1806         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1807             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1808                 resp->cinfo.atomic = TRUE;
1809         else
1810                 resp->cinfo.atomic = FALSE;
1811
1812         /*
1813          * Force modified metadata out to stable storage.
1814          *
1815          * if a underlying vp exists, pass it to fop_fsync
1816          */
1817         if (fop_realvp(vp, &realvp, NULL) == 0)
1818                 (void) fop_fsync(realvp, syncval, cr, NULL);
1819         else
1820                 (void) fop_fsync(vp, syncval, cr, NULL);
1821
1822         if (resp->status != NFS4_OK) {
1823                 VN_RELE(vp);
1824                 goto out;
1825         }
1826         if (cs->vp)
1827                 VN_RELE(cs->vp);
1828
1829         cs->vp = vp;
1830         *cs->statusp = resp->status = NFS4_OK;
1831 out:
1832         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1833             CREATE4res *, resp);
1834 }
1835
1836 /*ARGSUSED*/
1837 static void
1838 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1839     struct compound_state *cs)
1840 {
1841         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1842             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1843
1844         rfs4_op_inval(argop, resop, req, cs);
1845
1846         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1847             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1848 }
1849
1850 /*ARGSUSED*/
1851 static void
1852 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1853     struct compound_state *cs)
1854 {
1855         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1856         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1857         rfs4_deleg_state_t *dsp;
1858         nfsstat4 status;
1859
1860         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1861             DELEGRETURN4args *, args);
1862
1863         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1864         resp->status = *cs->statusp = status;
1865         if (status != NFS4_OK)
1866                 goto out;
1867
1868         /* Ensure specified filehandle matches */
1869         if (cs->vp != dsp->rds_finfo->rf_vp) {
1870                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1871         } else
1872                 rfs4_return_deleg(dsp, FALSE);
1873
1874         rfs4_update_lease(dsp->rds_client);
1875
1876         rfs4_deleg_state_rele(dsp);
1877 out:
1878         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1879             DELEGRETURN4res *, resp);
1880 }
1881
1882 /*
1883  * Check to see if a given "flavor" is an explicitly shared flavor.
1884  * The assumption of this routine is the "flavor" is already a valid
1885  * flavor in the secinfo list of "exi".
1886  *
1887  *      e.g.
1888  *              # share -o sec=flavor1 /export
1889  *              # share -o sec=flavor2 /export/home
1890  *
1891  *              flavor2 is not an explicitly shared flavor for /export,
1892  *              however it is in the secinfo list for /export thru the
1893  *              server namespace setup.
1894  */
1895 int
1896 is_exported_sec(int flavor, struct exportinfo *exi)
1897 {
1898         int     i;
1899         struct secinfo *sp;
1900
1901         sp = exi->exi_export.ex_secinfo;
1902         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1903                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1904                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1905                         return (SEC_REF_EXPORTED(&sp[i]));
1906                 }
1907         }
1908
1909         /* Should not reach this point based on the assumption */
1910         return (0);
1911 }
1912
1913 /*
1914  * Check if the security flavor used in the request matches what is
1915  * required at the export point or at the root pseudo node (exi_root).
1916  *
1917  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1918  *
1919  */
1920 static int
1921 secinfo_match_or_authnone(struct compound_state *cs)
1922 {
1923         int     i;
1924         struct secinfo *sp;
1925
1926         /*
1927          * Check cs->nfsflavor (from the request) against
1928          * the current export data in cs->exi.
1929          */
1930         sp = cs->exi->exi_export.ex_secinfo;
1931         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1932                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1933                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1934                         return (1);
1935         }
1936
1937         return (0);
1938 }
1939
1940 /*
1941  * Check the access authority for the client and return the correct error.
1942  */
1943 nfsstat4
1944 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1945 {
1946         int     authres;
1947
1948         /*
1949          * First, check if the security flavor used in the request
1950          * are among the flavors set in the server namespace.
1951          */
1952         if (!secinfo_match_or_authnone(cs)) {
1953                 *cs->statusp = NFS4ERR_WRONGSEC;
1954                 return (*cs->statusp);
1955         }
1956
1957         authres = checkauth4(cs, req);
1958
1959         if (authres > 0) {
1960                 *cs->statusp = NFS4_OK;
1961                 if (! (cs->access & CS_ACCESS_LIMITED))
1962                         cs->access = CS_ACCESS_OK;
1963         } else if (authres == 0) {
1964                 *cs->statusp = NFS4ERR_ACCESS;
1965         } else if (authres == -2) {
1966                 *cs->statusp = NFS4ERR_WRONGSEC;
1967         } else {
1968                 *cs->statusp = NFS4ERR_DELAY;
1969         }
1970         return (*cs->statusp);
1971 }
1972
1973 /*
1974  * bitmap4_to_attrmask is called by getattr and readdir.
1975  * It sets up the vattr mask and determines whether vfsstat call is needed
1976  * based on the input bitmap.
1977  * Returns nfsv4 status.
1978  */
1979 static nfsstat4
1980 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1981 {
1982         int i;
1983         uint_t  va_mask;
1984         struct statvfs64 *sbp = sargp->sbp;
1985
1986         sargp->sbp = NULL;
1987         sargp->flag = 0;
1988         sargp->rdattr_error = NFS4_OK;
1989         sargp->mntdfid_set = FALSE;
1990         if (sargp->cs->vp)
1991                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1992                     FH4_ATTRDIR | FH4_NAMEDATTR);
1993         else
1994                 sargp->xattr = 0;
1995
1996         /*
1997          * Set rdattr_error_req to true if return error per
1998          * failed entry rather than fail the readdir.
1999          */
2000         if (breq & FATTR4_RDATTR_ERROR_MASK)
2001                 sargp->rdattr_error_req = 1;
2002         else
2003                 sargp->rdattr_error_req = 0;
2004
2005         /*
2006          * generate the va_mask
2007          * Handle the easy cases first
2008          */
2009         switch (breq) {
2010         case NFS4_NTOV_ATTR_MASK:
2011                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2012                 return (NFS4_OK);
2013
2014         case NFS4_FS_ATTR_MASK:
2015                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2016                 sargp->sbp = sbp;
2017                 return (NFS4_OK);
2018
2019         case NFS4_NTOV_ATTR_CACHE_MASK:
2020                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2021                 return (NFS4_OK);
2022
2023         case FATTR4_LEASE_TIME_MASK:
2024                 sargp->vap->va_mask = 0;
2025                 return (NFS4_OK);
2026
2027         default:
2028                 va_mask = 0;
2029                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2030                         if ((breq & nfs4_ntov_map[i].fbit) &&
2031                             nfs4_ntov_map[i].vbit)
2032                                 va_mask |= nfs4_ntov_map[i].vbit;
2033                 }
2034
2035                 /*
2036                  * Check is vfsstat is needed
2037                  */
2038                 if (breq & NFS4_FS_ATTR_MASK)
2039                         sargp->sbp = sbp;
2040
2041                 sargp->vap->va_mask = va_mask;
2042                 return (NFS4_OK);
2043         }
2044         /* NOTREACHED */
2045 }
2046
2047 /*
2048  * bitmap4_get_sysattrs is called by getattr and readdir.
2049  * It calls both fop_getattr and VFS_STATVFS calls to get the attrs.
2050  * Returns nfsv4 status.
2051  */
2052 static nfsstat4
2053 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2054 {
2055         int error;
2056         struct compound_state *cs = sargp->cs;
2057         vnode_t *vp = cs->vp;
2058
2059         if (sargp->sbp != NULL) {
2060                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2061                         sargp->sbp = NULL;      /* to identify error */
2062                         return (puterrno4(error));
2063                 }
2064         }
2065
2066         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2067 }
2068
2069 static void
2070 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2071 {
2072         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2073             KM_SLEEP);
2074         ntovp->attrcnt = 0;
2075         ntovp->vfsstat = FALSE;
2076 }
2077
2078 static void
2079 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2080     struct nfs4_svgetit_arg *sargp)
2081 {
2082         int i;
2083         union nfs4_attr_u *na;
2084         uint8_t *amap;
2085
2086         /*
2087          * XXX Should do the same checks for whether the bit is set
2088          */
2089         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2090             i < ntovp->attrcnt; i++, na++, amap++) {
2091                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2092                     NFS4ATTR_FREEIT, sargp, na);
2093         }
2094         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2095                 /*
2096                  * xdr_free for getattr will be done later
2097                  */
2098                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2099                     i < ntovp->attrcnt; i++, na++, amap++) {
2100                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2101                 }
2102         }
2103         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2104 }
2105
2106 /*
2107  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2108  */
2109 static nfsstat4
2110 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2111     struct nfs4_svgetit_arg *sargp)
2112 {
2113         int error = 0;
2114         int i, k;
2115         struct nfs4_ntov_table ntov;
2116         XDR xdr;
2117         ulong_t xdr_size;
2118         char *xdr_attrs;
2119         nfsstat4 status = NFS4_OK;
2120         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2121         union nfs4_attr_u *na;
2122         uint8_t *amap;
2123
2124         sargp->op = NFS4ATTR_GETIT;
2125         sargp->flag = 0;
2126
2127         fattrp->attrmask = 0;
2128         /* if no bits requested, then return empty fattr4 */
2129         if (breq == 0) {
2130                 fattrp->attrlist4_len = 0;
2131                 fattrp->attrlist4 = NULL;
2132                 return (NFS4_OK);
2133         }
2134
2135         /*
2136          * return NFS4ERR_INVAL when client requests write-only attrs
2137          */
2138         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2139                 return (NFS4ERR_INVAL);
2140
2141         nfs4_ntov_table_init(&ntov);
2142         na = ntov.na;
2143         amap = ntov.amap;
2144
2145         /*
2146          * Now loop to get or verify the attrs
2147          */
2148         for (i = 0; i < nfs4_ntov_map_size; i++) {
2149                 if (breq & nfs4_ntov_map[i].fbit) {
2150                         if ((*nfs4_ntov_map[i].sv_getit)(
2151                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2152
2153                                 error = (*nfs4_ntov_map[i].sv_getit)(
2154                                     NFS4ATTR_GETIT, sargp, na);
2155
2156                                 /*
2157                                  * Possible error values:
2158                                  * >0 if sv_getit failed to
2159                                  * get the attr; 0 if succeeded;
2160                                  * <0 if rdattr_error and the
2161                                  * attribute cannot be returned.
2162                                  */
2163                                 if (error && !(sargp->rdattr_error_req))
2164                                         goto done;
2165                                 /*
2166                                  * If error then just for entry
2167                                  */
2168                                 if (error == 0) {
2169                                         fattrp->attrmask |=
2170                                             nfs4_ntov_map[i].fbit;
2171                                         *amap++ =
2172                                             (uint8_t)nfs4_ntov_map[i].nval;
2173                                         na++;
2174                                         (ntov.attrcnt)++;
2175                                 } else if ((error > 0) &&
2176                                     (sargp->rdattr_error == NFS4_OK)) {
2177                                         sargp->rdattr_error = puterrno4(error);
2178                                 }
2179                                 error = 0;
2180                         }
2181                 }
2182         }
2183
2184         /*
2185          * If rdattr_error was set after the return value for it was assigned,
2186          * update it.
2187          */
2188         if (prev_rdattr_error != sargp->rdattr_error) {
2189                 na = ntov.na;
2190                 amap = ntov.amap;
2191                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2192                         k = *amap;
2193                         if (k < FATTR4_RDATTR_ERROR) {
2194                                 continue;
2195                         }
2196                         if ((k == FATTR4_RDATTR_ERROR) &&
2197                             ((*nfs4_ntov_map[k].sv_getit)(
2198                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2199
2200                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2201                                     NFS4ATTR_GETIT, sargp, na);
2202                         }
2203                         break;
2204                 }
2205         }
2206
2207         xdr_size = 0;
2208         na = ntov.na;
2209         amap = ntov.amap;
2210         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2211                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2212         }
2213
2214         fattrp->attrlist4_len = xdr_size;
2215         if (xdr_size) {
2216                 /* freed by rfs4_op_getattr_free() */
2217                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2218
2219                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2220
2221                 na = ntov.na;
2222                 amap = ntov.amap;
2223                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2224                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2225                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2226                                     int, *amap);
2227                                 status = NFS4ERR_SERVERFAULT;
2228                                 break;
2229                         }
2230                 }
2231                 /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2232         } else {
2233                 fattrp->attrlist4 = NULL;
2234         }
2235 done:
2236
2237         nfs4_ntov_table_free(&ntov, sargp);
2238
2239         if (error != 0)
2240                 status = puterrno4(error);
2241
2242         return (status);
2243 }
2244
2245 /* ARGSUSED */
2246 static void
2247 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2248     struct compound_state *cs)
2249 {
2250         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2251         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2252         struct nfs4_svgetit_arg sarg;
2253         struct statvfs64 sb;
2254         nfsstat4 status;
2255
2256         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2257             GETATTR4args *, args);
2258
2259         if (cs->vp == NULL) {
2260                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2261                 goto out;
2262         }
2263
2264         if (cs->access == CS_ACCESS_DENIED) {
2265                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2266                 goto out;
2267         }
2268
2269         sarg.sbp = &sb;
2270         sarg.cs = cs;
2271         sarg.is_referral = B_FALSE;
2272
2273         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2274         if (status == NFS4_OK) {
2275
2276                 status = bitmap4_get_sysattrs(&sarg);
2277                 if (status == NFS4_OK) {
2278
2279                         /* Is this a referral? */
2280                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2281                                 /* Older V4 Solaris client sees a link */
2282                                 if (client_is_downrev(req))
2283                                         sarg.vap->va_type = VLNK;
2284                                 else
2285                                         sarg.is_referral = B_TRUE;
2286                         }
2287
2288                         status = do_rfs4_op_getattr(args->attr_request,
2289                             &resp->obj_attributes, &sarg);
2290                 }
2291         }
2292         *cs->statusp = resp->status = status;
2293 out:
2294         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2295             GETATTR4res *, resp);
2296 }
2297
2298 static void
2299 rfs4_op_getattr_free(nfs_resop4 *resop)
2300 {
2301         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2302
2303         nfs4_fattr4_free(&resp->obj_attributes);
2304 }
2305
2306 /* ARGSUSED */
2307 static void
2308 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2309     struct compound_state *cs)
2310 {
2311         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2312
2313         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2314
2315         if (cs->vp == NULL) {
2316                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317                 goto out;
2318         }
2319         if (cs->access == CS_ACCESS_DENIED) {
2320                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2321                 goto out;
2322         }
2323
2324         /* check for reparse point at the share point */
2325         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2326                 /* it's all bad */
2327                 cs->exi->exi_moved = 1;
2328                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2329                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2330                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2331                 return;
2332         }
2333
2334         /* check for reparse point at vp */
2335         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2336                 /* it's not all bad */
2337                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2338                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2339                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2340                 return;
2341         }
2342
2343         resp->object.nfs_fh4_val =
2344             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2345         nfs_fh4_copy(&cs->fh, &resp->object);
2346         *cs->statusp = resp->status = NFS4_OK;
2347 out:
2348         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2349             GETFH4res *, resp);
2350 }
2351
2352 static void
2353 rfs4_op_getfh_free(nfs_resop4 *resop)
2354 {
2355         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2356
2357         if (resp->status == NFS4_OK &&
2358             resp->object.nfs_fh4_val != NULL) {
2359                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2360                 resp->object.nfs_fh4_val = NULL;
2361                 resp->object.nfs_fh4_len = 0;
2362         }
2363 }
2364
2365 /*
2366  * illegal: args: void
2367  *          res : status (NFS4ERR_OP_ILLEGAL)
2368  */
2369 /* ARGSUSED */
2370 static void
2371 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2372     struct svc_req *req, struct compound_state *cs)
2373 {
2374         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2375
2376         resop->resop = OP_ILLEGAL;
2377         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2378 }
2379
2380 /*
2381  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2382  *       res: status. If success - CURRENT_FH unchanged, return change_info
2383  */
2384 /* ARGSUSED */
2385 static void
2386 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2387     struct compound_state *cs)
2388 {
2389         LINK4args *args = &argop->nfs_argop4_u.oplink;
2390         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2391         int error;
2392         vnode_t *vp;
2393         vnode_t *dvp;
2394         struct vattr bdva, idva, adva;
2395         char *nm;
2396         uint_t  len;
2397         struct sockaddr *ca;
2398         char *name = NULL;
2399         nfsstat4 status;
2400
2401         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2402             LINK4args *, args);
2403
2404         /* SAVED_FH: source object */
2405         vp = cs->saved_vp;
2406         if (vp == NULL) {
2407                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2408                 goto out;
2409         }
2410
2411         /* CURRENT_FH: target directory */
2412         dvp = cs->vp;
2413         if (dvp == NULL) {
2414                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2415                 goto out;
2416         }
2417
2418         /*
2419          * If there is a non-shared filesystem mounted on this vnode,
2420          * do not allow to link any file in this directory.
2421          */
2422         if (vn_ismntpt(dvp)) {
2423                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2424                 goto out;
2425         }
2426
2427         if (cs->access == CS_ACCESS_DENIED) {
2428                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2429                 goto out;
2430         }
2431
2432         /* Check source object's type validity */
2433         if (vp->v_type == VDIR) {
2434                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2435                 goto out;
2436         }
2437
2438         /* Check target directory's type */
2439         if (dvp->v_type != VDIR) {
2440                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2441                 goto out;
2442         }
2443
2444         if (cs->saved_exi != cs->exi) {
2445                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2446                 goto out;
2447         }
2448
2449         status = utf8_dir_verify(&args->newname);
2450         if (status != NFS4_OK) {
2451                 *cs->statusp = resp->status = status;
2452                 goto out;
2453         }
2454
2455         nm = utf8_to_fn(&args->newname, &len, NULL);
2456         if (nm == NULL) {
2457                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2458                 goto out;
2459         }
2460
2461         if (len > MAXNAMELEN) {
2462                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2463                 kmem_free(nm, len);
2464                 goto out;
2465         }
2466
2467         if (rdonly4(req, cs)) {
2468                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2469                 kmem_free(nm, len);
2470                 goto out;
2471         }
2472
2473         /* Get "before" change value */
2474         bdva.va_mask = AT_CTIME|AT_SEQ;
2475         error = fop_getattr(dvp, &bdva, 0, cs->cr, NULL);
2476         if (error) {
2477                 *cs->statusp = resp->status = puterrno4(error);
2478                 kmem_free(nm, len);
2479                 goto out;
2480         }
2481
2482         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2483         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2484             MAXPATHLEN  + 1);
2485
2486         if (name == NULL) {
2487                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2488                 kmem_free(nm, len);
2489                 goto out;
2490         }
2491
2492         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2493
2494         error = fop_link(dvp, vp, name, cs->cr, NULL, 0);
2495
2496         if (nm != name)
2497                 kmem_free(name, MAXPATHLEN + 1);
2498         kmem_free(nm, len);
2499
2500         /*
2501          * Get the initial "after" sequence number, if it fails, set to zero
2502          */
2503         idva.va_mask = AT_SEQ;
2504         if (fop_getattr(dvp, &idva, 0, cs->cr, NULL))
2505                 idva.va_seq = 0;
2506
2507         /*
2508          * Force modified data and metadata out to stable storage.
2509          */
2510         (void) fop_fsync(vp, FNODSYNC, cs->cr, NULL);
2511         (void) fop_fsync(dvp, 0, cs->cr, NULL);
2512
2513         if (error) {
2514                 *cs->statusp = resp->status = puterrno4(error);
2515                 goto out;
2516         }
2517
2518         /*
2519          * Get "after" change value, if it fails, simply return the
2520          * before value.
2521          */
2522         adva.va_mask = AT_CTIME|AT_SEQ;
2523         if (fop_getattr(dvp, &adva, 0, cs->cr, NULL)) {
2524                 adva.va_ctime = bdva.va_ctime;
2525                 adva.va_seq = 0;
2526         }
2527
2528         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2529
2530         /*
2531          * The cinfo.atomic = TRUE only if we have
2532          * non-zero va_seq's, and it has incremented by exactly one
2533          * during the fop_link and it didn't change during the fop_fsync.
2534          */
2535         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2536             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2537                 resp->cinfo.atomic = TRUE;
2538         else
2539                 resp->cinfo.atomic = FALSE;
2540
2541         *cs->statusp = resp->status = NFS4_OK;
2542 out:
2543         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2544             LINK4res *, resp);
2545 }
2546
2547 /*
2548  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2549  */
2550
2551 /* ARGSUSED */
2552 static nfsstat4
2553 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2554 {
2555         int error;
2556         int different_export = 0;
2557         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2558         struct exportinfo *exi = NULL, *pre_exi = NULL;
2559         nfsstat4 stat;
2560         fid_t fid;
2561         int attrdir, dotdot, walk;
2562         bool_t is_newvp = FALSE;
2563
2564         if (cs->vp->v_flag & V_XATTRDIR) {
2565                 attrdir = 1;
2566                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2567         } else {
2568                 attrdir = 0;
2569                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2570         }
2571
2572         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2573
2574         /*
2575          * If dotdotting, then need to check whether it's
2576          * above the root of a filesystem, or above an
2577          * export point.
2578          */
2579         if (dotdot) {
2580
2581                 /*
2582                  * If dotdotting at the root of a filesystem, then
2583                  * need to traverse back to the mounted-on filesystem
2584                  * and do the dotdot lookup there.
2585                  */
2586                 if (cs->vp->v_flag & VROOT) {
2587
2588                         /*
2589                          * If at the system root, then can
2590                          * go up no further.
2591                          */
2592                         if (VN_CMP(cs->vp, rootdir))
2593                                 return (puterrno4(ENOENT));
2594
2595                         /*
2596                          * Traverse back to the mounted-on filesystem
2597                          */
2598                         cs->vp = untraverse(cs->vp);
2599
2600                         /*
2601                          * Set the different_export flag so we remember
2602                          * to pick up a new exportinfo entry for
2603                          * this new filesystem.
2604                          */
2605                         different_export = 1;
2606                 } else {
2607
2608                         /*
2609                          * If dotdotting above an export point then set
2610                          * the different_export to get new export info.
2611                          */
2612                         different_export = nfs_exported(cs->exi, cs->vp);
2613                 }
2614         }
2615
2616         error = fop_lookup(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2617             NULL, NULL, NULL);
2618         if (error)
2619                 return (puterrno4(error));
2620
2621         /*
2622          * If the vnode is in a pseudo filesystem, check whether it is visible.
2623          *
2624          * XXX if the vnode is a symlink and it is not visible in
2625          * a pseudo filesystem, return ENOENT (not following symlink).
2626          * V4 client can not mount such symlink. This is a regression
2627          * from V2/V3.
2628          *
2629          * In the same exported filesystem, if the security flavor used
2630          * is not an explicitly shared flavor, limit the view to the visible
2631          * list entries only. This is not a WRONGSEC case because it's already
2632          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2633          */
2634         if (!different_export &&
2635             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2636             cs->access & CS_ACCESS_LIMITED)) {
2637                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2638                         VN_RELE(vp);
2639                         return (puterrno4(ENOENT));
2640                 }
2641         }
2642
2643         /*
2644          * If it's a mountpoint, then traverse it.
2645          */
2646         if (vn_ismntpt(vp)) {
2647                 pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2648                 pre_tvp = vp;           /* save pre-traversed vnode     */
2649
2650                 /*
2651                  * hold pre_tvp to counteract rele by traverse.  We will
2652                  * need pre_tvp below if checkexport4 fails
2653                  */
2654                 VN_HOLD(pre_tvp);
2655                 if ((error = traverse(&vp)) != 0) {
2656                         VN_RELE(vp);
2657                         VN_RELE(pre_tvp);
2658                         return (puterrno4(error));
2659                 }
2660                 different_export = 1;
2661         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2662                 /*
2663                  * The vfsp comparison is to handle the case where
2664                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2665                  * and NFS is unaware of local fs transistions because
2666                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2667                  * the dir and the obj returned by lookup will have different
2668                  * vfs ptrs.
2669                  */
2670                 different_export = 1;
2671         }
2672
2673         if (different_export) {
2674
2675                 bzero(&fid, sizeof (fid));
2676                 fid.fid_len = MAXFIDSZ;
2677                 error = vop_fid_pseudo(vp, &fid);
2678                 if (error) {
2679                         VN_RELE(vp);
2680                         if (pre_tvp)
2681                                 VN_RELE(pre_tvp);
2682                         return (puterrno4(error));
2683                 }
2684
2685                 if (dotdot)
2686                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2687                 else
2688                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2689
2690                 if (exi == NULL) {
2691                         if (pre_tvp) {
2692                                 /*
2693                                  * If this vnode is a mounted-on vnode,
2694                                  * but the mounted-on file system is not
2695                                  * exported, send back the filehandle for
2696                                  * the mounted-on vnode, not the root of
2697                                  * the mounted-on file system.
2698                                  */
2699                                 VN_RELE(vp);
2700                                 vp = pre_tvp;
2701                                 exi = pre_exi;
2702                         } else {
2703                                 VN_RELE(vp);
2704                                 return (puterrno4(EACCES));
2705                         }
2706                 } else if (pre_tvp) {
2707                         /* we're done with pre_tvp now. release extra hold */
2708                         VN_RELE(pre_tvp);
2709                 }
2710
2711                 cs->exi = exi;
2712
2713                 /*
2714                  * Now we do a checkauth4. The reason is that
2715                  * this client/user may not have access to the new
2716                  * exported file system, and if they do,
2717                  * the client/user may be mapped to a different uid.
2718                  *
2719                  * We start with a new cr, because the checkauth4 done
2720                  * in the PUT*FH operation over wrote the cred's uid,
2721                  * gid, etc, and we want the real thing before calling
2722                  * checkauth4()
2723                  */
2724                 crfree(cs->cr);
2725                 cs->cr = crdup(cs->basecr);
2726
2727                 oldvp = cs->vp;
2728                 cs->vp = vp;
2729                 is_newvp = TRUE;
2730
2731                 stat = call_checkauth4(cs, req);
2732                 if (stat != NFS4_OK) {
2733                         VN_RELE(cs->vp);
2734                         cs->vp = oldvp;
2735                         return (stat);
2736                 }
2737         }
2738
2739         error = makefh4(&cs->fh, vp, cs->exi);
2740
2741 err_out:
2742         if (error) {
2743                 if (is_newvp) {
2744                         VN_RELE(cs->vp);
2745                         cs->vp = oldvp;
2746                 } else
2747                         VN_RELE(vp);
2748                 return (puterrno4(error));
2749         }
2750
2751         if (!is_newvp) {
2752                 if (cs->vp)
2753                         VN_RELE(cs->vp);
2754                 cs->vp = vp;
2755         } else if (oldvp)
2756                 VN_RELE(oldvp);
2757
2758         /*
2759          * if did lookup on attrdir and didn't lookup .., set named
2760          * attr fh flag
2761          */
2762         if (attrdir && ! dotdot)
2763                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2764
2765         /* Assume false for now, open proc will set this */
2766         cs->mandlock = FALSE;
2767
2768         return (NFS4_OK);
2769 }
2770
2771 /* ARGSUSED */
2772 static void
2773 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2774     struct compound_state *cs)
2775 {
2776         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2777         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2778         char *nm;
2779         uint_t len;
2780         struct sockaddr *ca;
2781         char *name = NULL;
2782         nfsstat4 status;
2783
2784         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2785             LOOKUP4args *, args);
2786
2787         if (cs->vp == NULL) {
2788                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2789                 goto out;
2790         }
2791
2792         if (cs->vp->v_type == VLNK) {
2793                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2794                 goto out;
2795         }
2796
2797         if (cs->vp->v_type != VDIR) {
2798                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2799                 goto out;
2800         }
2801
2802         status = utf8_dir_verify(&args->objname);
2803         if (status != NFS4_OK) {
2804                 *cs->statusp = resp->status = status;
2805                 goto out;
2806         }
2807
2808         nm = utf8_to_str(&args->objname, &len, NULL);
2809         if (nm == NULL) {
2810                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2811                 goto out;
2812         }
2813
2814         if (len > MAXNAMELEN) {
2815                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2816                 kmem_free(nm, len);
2817                 goto out;
2818         }
2819
2820         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2821         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2822             MAXPATHLEN  + 1);
2823
2824         if (name == NULL) {
2825                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2826                 kmem_free(nm, len);
2827                 goto out;
2828         }
2829
2830         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2831
2832         if (name != nm)
2833                 kmem_free(name, MAXPATHLEN + 1);
2834         kmem_free(nm, len);
2835
2836 out:
2837         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2838             LOOKUP4res *, resp);
2839 }
2840
2841 /* ARGSUSED */
2842 static void
2843 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2844     struct compound_state *cs)
2845 {
2846         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2847
2848         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2849
2850         if (cs->vp == NULL) {
2851                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2852                 goto out;
2853         }
2854
2855         if (cs->vp->v_type != VDIR) {
2856                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2857                 goto out;
2858         }
2859
2860         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2861
2862         /*
2863          * From NFSV4 Specification, LOOKUPP should not check for
2864          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2865          */
2866         if (resp->status == NFS4ERR_WRONGSEC) {
2867                 *cs->statusp = resp->status = NFS4_OK;
2868         }
2869
2870 out:
2871         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2872             LOOKUPP4res *, resp);
2873 }
2874
2875
2876 /*ARGSUSED2*/
2877 static void
2878 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2879     struct compound_state *cs)
2880 {
2881         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2882         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2883         vnode_t         *avp = NULL;
2884         int             lookup_flags = LOOKUP_XATTR, error;
2885         int             exp_ro = 0;
2886
2887         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
2888             OPENATTR4args *, args);
2889
2890         if (cs->vp == NULL) {
2891                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2892                 goto out;
2893         }
2894
2895         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
2896             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
2897                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
2898                 goto out;
2899         }
2900
2901         /*
2902          * If file system supports passing ACE mask to fop_access then
2903          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
2904          */
2905
2906         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
2907                 error = fop_access(cs->vp, ACE_READ_NAMED_ATTRS,
2908                     V_ACE_MASK, cs->cr, NULL);
2909         else
2910                 error = ((fop_access(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
2911                     (fop_access(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
2912                     (fop_access(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
2913
2914         if (error) {
2915                 *cs->statusp = resp->status = puterrno4(EACCES);
2916                 goto out;
2917         }
2918
2919         /*
2920          * The CREATE_XATTR_DIR VOP flag cannot be specified if
2921          * the file system is exported read-only -- regardless of
2922          * createdir flag.  Otherwise the attrdir would be created
2923          * (assuming server fs isn't mounted readonly locally).  If
2924          * fop_lookup returns ENOENT in this case, the error will
2925          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2926          * because specfs has no fop_lookup op, so the macro would
2927          * return ENOSYS.  EINVAL is returned by all (current)
2928          * Solaris file system implementations when any of their
2929          * restrictions are violated (xattr(dir) can't have xattrdir).
2930          * Returning NOTSUPP is more appropriate in this case
2931          * because the object will never be able to have an attrdir.
2932          */
2933         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
2934                 lookup_flags |= CREATE_XATTR_DIR;
2935
2936         error = fop_lookup(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
2937             NULL, NULL, NULL);
2938
2939         if (error) {
2940                 if (error == ENOENT && args->createdir && exp_ro)
2941                         *cs->statusp = resp->status = puterrno4(EROFS);
2942                 else if (error == EINVAL || error == ENOSYS)
2943                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
2944                 else
2945                         *cs->statusp = resp->status = puterrno4(error);
2946                 goto out;
2947         }
2948
2949         ASSERT(avp->v_flag & V_XATTRDIR);
2950
2951         error = makefh4(&cs->fh, avp, cs->exi);
2952
2953         if (error) {
2954                 VN_RELE(avp);
2955                 *cs->statusp = resp->status = puterrno4(error);
2956                 goto out;
2957         }
2958
2959         VN_RELE(cs->vp);
2960         cs->vp = avp;
2961
2962         /*
2963          * There is no requirement for an attrdir fh flag
2964          * because the attrdir has a vnode flag to distinguish
2965          * it from regular (non-xattr) directories.  The
2966          * FH4_ATTRDIR flag is set for future sanity checks.
2967          */
2968         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2969         *cs->statusp = resp->status = NFS4_OK;
2970
2971 out:
2972         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
2973             OPENATTR4res *, resp);
2974 }
2975
2976 static int
2977 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
2978     caller_context_t *ct)
2979 {
2980         int error;
2981         int i;
2982         clock_t delaytime;
2983
2984         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2985
2986         /*
2987          * Don't block on mandatory locks. If this routine returns
2988          * EAGAIN, the caller should return NFS4ERR_LOCKED.
2989          */
2990         uio->uio_fmode = FNONBLOCK;
2991
2992         for (i = 0; i < rfs4_maxlock_tries; i++) {
2993
2994
2995                 if (direction == FREAD) {
2996                         (void) fop_rwlock(vp, V_WRITELOCK_FALSE, ct);
2997                         error = fop_read(vp, uio, ioflag, cred, ct);
2998                         fop_rwunlock(vp, V_WRITELOCK_FALSE, ct);
2999                 } else {
3000                         (void) fop_rwlock(vp, V_WRITELOCK_TRUE, ct);
3001                         error = fop_write(vp, uio, ioflag, cred, ct);
3002                         fop_rwunlock(vp, V_WRITELOCK_TRUE, ct);
3003                 }
3004
3005                 if (error != EAGAIN)
3006                         break;
3007
3008                 if (i < rfs4_maxlock_tries - 1) {
3009                         delay(delaytime);
3010                         delaytime *= 2;
3011                 }
3012         }
3013
3014         return (error);
3015 }
3016
3017 /* ARGSUSED */
3018 static void
3019 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3020     struct compound_state *cs)
3021 {
3022         READ4args *args = &argop->nfs_argop4_u.opread;
3023         READ4res *resp = &resop->nfs_resop4_u.opread;
3024         int error;
3025         int verror;
3026         vnode_t *vp;
3027         struct vattr va;
3028         struct iovec iov, *iovp = NULL;
3029         int iovcnt;
3030         struct uio uio;
3031         uoff_t offset;
3032         bool_t *deleg = &cs->deleg;
3033         nfsstat4 stat;
3034         int in_crit = 0;
3035         mblk_t *mp = NULL;
3036         int alloc_err = 0;
3037         int rdma_used = 0;
3038         int loaned_buffers;
3039         caller_context_t ct;
3040         struct uio *uiop;
3041
3042         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3043             READ4args, args);
3044
3045         vp = cs->vp;
3046         if (vp == NULL) {
3047                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3048                 goto out;
3049         }
3050         if (cs->access == CS_ACCESS_DENIED) {
3051                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3052                 goto out;
3053         }
3054
3055         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3056             deleg, TRUE, &ct)) != NFS4_OK) {
3057                 *cs->statusp = resp->status = stat;
3058                 goto out;
3059         }
3060
3061         /*
3062          * Enter the critical region before calling fop_rwlock
3063          * to avoid a deadlock with write requests.
3064          */
3065         if (nbl_need_check(vp)) {
3066                 nbl_start_crit(vp, RW_READER);
3067                 in_crit = 1;
3068                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3069                     &ct)) {
3070                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3071                         goto out;
3072                 }
3073         }
3074
3075         if (args->wlist) {
3076                 if (args->count > clist_len(args->wlist)) {
3077                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3078                         goto out;
3079                 }
3080                 rdma_used = 1;
3081         }
3082
3083         /* use loaned buffers for TCP */
3084         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3085
3086         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3087         verror = fop_getattr(vp, &va, 0, cs->cr, &ct);
3088
3089         /*
3090          * If we can't get the attributes, then we can't do the
3091          * right access checking.  So, we'll fail the request.
3092          */
3093         if (verror) {
3094                 *cs->statusp = resp->status = puterrno4(verror);
3095                 goto out;
3096         }
3097
3098         if (vp->v_type != VREG) {
3099                 *cs->statusp = resp->status =
3100                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3101                 goto out;
3102         }
3103
3104         if (crgetuid(cs->cr) != va.va_uid &&
3105             (error = fop_access(vp, VREAD, 0, cs->cr, &ct)) &&
3106             (error = fop_access(vp, VEXEC, 0, cs->cr, &ct))) {
3107                 *cs->statusp = resp->status = puterrno4(error);
3108                 goto out;
3109         }
3110
3111         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3112                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3113                 goto out;
3114         }
3115
3116         offset = args->offset;
3117         if (offset >= va.va_size) {
3118                 *cs->statusp = resp->status = NFS4_OK;
3119                 resp->eof = TRUE;
3120                 resp->data_len = 0;
3121                 resp->data_val = NULL;
3122                 resp->mblk = NULL;
3123                 /* RDMA */
3124                 resp->wlist = args->wlist;
3125                 resp->wlist_len = resp->data_len;
3126                 *cs->statusp = resp->status = NFS4_OK;
3127                 if (resp->wlist)
3128                         clist_zero_len(resp->wlist);
3129                 goto out;
3130         }
3131
3132         if (args->count == 0) {
3133                 *cs->statusp = resp->status = NFS4_OK;
3134                 resp->eof = FALSE;
3135                 resp->data_len = 0;
3136                 resp->data_val = NULL;
3137                 resp->mblk = NULL;
3138                 /* RDMA */
3139                 resp->wlist = args->wlist;
3140                 resp->wlist_len = resp->data_len;
3141                 if (resp->wlist)
3142                         clist_zero_len(resp->wlist);
3143                 goto out;
3144         }
3145
3146         /*
3147          * Do not allocate memory more than maximum allowed
3148          * transfer size
3149          */
3150         if (args->count > rfs4_tsize(req))
3151                 args->count = rfs4_tsize(req);
3152
3153         if (loaned_buffers) {
3154                 uiop = (uio_t *)rfs_setup_xuio(vp);
3155                 ASSERT(uiop != NULL);
3156                 uiop->uio_segflg = UIO_SYSSPACE;
3157                 uiop->uio_loffset = args->offset;
3158                 uiop->uio_resid = args->count;
3159
3160                 /* Jump to do the read if successful */
3161                 if (!fop_reqzcbuf(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3162                         /*
3163                          * Need to hold the vnode until after fop_retzcbuf()
3164                          * is called.
3165                          */
3166                         VN_HOLD(vp);
3167                         goto doio_read;
3168                 }
3169
3170                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3171                     uiop->uio_loffset, int, uiop->uio_resid);
3172
3173                 uiop->uio_extflg = 0;
3174
3175                 /* failure to setup for zero copy */
3176                 rfs_free_xuio((void *)uiop);
3177                 loaned_buffers = 0;
3178         }
3179
3180         /*
3181          * If returning data via RDMA Write, then grab the chunk list. If we
3182          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3183          */
3184         if (rdma_used) {
3185                 mp = NULL;
3186                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3187                 uio.uio_iov = &iov;
3188                 uio.uio_iovcnt = 1;
3189         } else {
3190                 /*
3191                  * mp will contain the data to be sent out in the read reply.
3192                  * It will be freed after the reply has been sent.
3193                  */
3194                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3195                 ASSERT(mp != NULL);
3196                 ASSERT(alloc_err == 0);
3197                 uio.uio_iov = iovp;
3198                 uio.uio_iovcnt = iovcnt;
3199         }
3200
3201         uio.uio_segflg = UIO_SYSSPACE;
3202         uio.uio_extflg = UIO_COPY_CACHED;
3203         uio.uio_loffset = args->offset;
3204         uio.uio_resid = args->count;
3205         uiop = &uio;
3206
3207 doio_read:
3208         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3209
3210         va.va_mask = AT_SIZE;
3211         verror = fop_getattr(vp, &va, 0, cs->cr, &ct);
3212
3213         if (error) {
3214                 if (mp)
3215                         freemsg(mp);
3216                 *cs->statusp = resp->status = puterrno4(error);
3217                 goto out;
3218         }
3219
3220         /* make mblk using zc buffers */
3221         if (loaned_buffers) {
3222                 mp = uio_to_mblk(uiop);
3223                 ASSERT(mp != NULL);
3224         }
3225
3226         *cs->statusp = resp->status = NFS4_OK;
3227
3228         ASSERT(uiop->uio_resid >= 0);
3229         resp->data_len = args->count - uiop->uio_resid;
3230         if (mp) {
3231                 resp->data_val = (char *)mp->b_datap->db_base;
3232                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3233         } else {
3234                 resp->data_val = (caddr_t)iov.iov_base;
3235         }
3236
3237         resp->mblk = mp;
3238
3239         if (!verror && offset + resp->data_len == va.va_size)
3240                 resp->eof = TRUE;
3241         else
3242                 resp->eof = FALSE;
3243
3244         if (rdma_used) {
3245                 if (!rdma_setup_read_data4(args, resp)) {
3246                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3247                 }
3248         } else {
3249                 resp->wlist = NULL;
3250         }
3251
3252 out:
3253         if (in_crit)
3254                 nbl_end_crit(vp);
3255
3256         if (iovp != NULL)
3257                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3258
3259         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3260             READ4res *, resp);
3261 }
3262
3263 static void
3264 rfs4_op_read_free(nfs_resop4 *resop)
3265 {
3266         READ4res        *resp = &resop->nfs_resop4_u.opread;
3267
3268         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3269                 freemsg(resp->mblk);
3270                 resp->mblk = NULL;
3271                 resp->data_val = NULL;
3272                 resp->data_len = 0;
3273         }
3274 }
3275
3276 static void
3277 rfs4_op_readdir_free(nfs_resop4 * resop)
3278 {
3279         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3280
3281         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3282                 freeb(resp->mblk);
3283                 resp->mblk = NULL;
3284                 resp->data_len = 0;
3285         }
3286 }
3287
3288
3289 /* ARGSUSED */
3290 static void
3291 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3292     struct compound_state *cs)
3293 {
3294         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3295         int             error;
3296         vnode_t         *vp;
3297         struct exportinfo *exi, *sav_exi;
3298         nfs_fh4_fmt_t   *fh_fmtp;
3299
3300         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3301
3302         if (cs->vp) {
3303                 VN_RELE(cs->vp);
3304                 cs->vp = NULL;
3305         }
3306
3307         if (cs->cr)
3308                 crfree(cs->cr);
3309
3310         cs->cr = crdup(cs->basecr);
3311
3312         vp = exi_public->exi_vp;
3313         if (vp == NULL) {
3314                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3315                 goto out;
3316         }
3317
3318         error = makefh4(&cs->fh, vp, exi_public);
3319         if (error != 0) {
3320                 *cs->statusp = resp->status = puterrno4(error);
3321                 goto out;
3322         }
3323         sav_exi = cs->exi;
3324         if (exi_public == exi_root) {
3325                 /*
3326                  * No filesystem is actually shared public, so we default
3327                  * to exi_root. In this case, we must check whether root
3328                  * is exported.
3329                  */
3330                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3331
3332                 /*
3333                  * if root filesystem is exported, the exportinfo struct that we
3334                  * should use is what checkexport4 returns, because root_exi is
3335                  * actually a mostly empty struct.
3336                  */
3337                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3338                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3339                 cs->exi = ((exi != NULL) ? exi : exi_public);
3340         } else {
3341                 /*
3342                  * it's a properly shared filesystem
3343                  */
3344                 cs->exi = exi_public;
3345         }
3346
3347         VN_HOLD(vp);
3348         cs->vp = vp;
3349
3350         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3351                 VN_RELE(cs->vp);
3352                 cs->vp = NULL;
3353                 cs->exi = sav_exi;
3354                 goto out;
3355         }
3356
3357         *cs->statusp = resp->status = NFS4_OK;
3358 out:
3359         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3360             PUTPUBFH4res *, resp);
3361 }
3362
3363 /*
3364  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3365  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3366  * or joe have restrictive search permissions, then we shouldn't let
3367  * the client get a file handle. This is easy to enforce. However, we
3368  * don't know what security flavor should be used until we resolve the
3369  * path name. Another complication is uid mapping. If root is
3370  * the user, then it will be mapped to the anonymous user by default,
3371  * but we won't know that till we've resolved the path name. And we won't
3372  * know what the anonymous user is.
3373  * Luckily, SECINFO is specified to take a full filename.
3374  * So what we will have to in rfs4_op_lookup is check that flavor of
3375  * the target object matches that of the request, and if root was the
3376  * caller, check for the root= and anon= options, and if necessary,
3377  * repeat the lookup using the right cred_t. But that's not done yet.
3378  */
3379 /* ARGSUSED */
3380 static void
3381 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3382     struct compound_state *cs)
3383 {
3384         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3385         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3386         nfs_fh4_fmt_t *fh_fmtp;
3387
3388         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3389             PUTFH4args *, args);
3390
3391         if (cs->vp) {
3392                 VN_RELE(cs->vp);
3393                 cs->vp = NULL;
3394         }
3395
3396         if (cs->cr) {
3397                 crfree(cs->cr);
3398                 cs->cr = NULL;
3399         }
3400
3401
3402         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3403                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3404                 goto out;
3405         }
3406
3407         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3408         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3409             NULL);
3410
3411         if (cs->exi == NULL) {
3412                 *cs->statusp = resp->status = NFS4ERR_STALE;
3413                 goto out;
3414         }
3415
3416         cs->cr = crdup(cs->basecr);
3417
3418         ASSERT(cs->cr != NULL);
3419
3420         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3421                 *cs->statusp = resp->status;
3422                 goto out;
3423         }
3424
3425         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3426                 VN_RELE(cs->vp);
3427                 cs->vp = NULL;
3428                 goto out;
3429         }
3430
3431         nfs_fh4_copy(&args->object, &cs->fh);
3432         *cs->statusp = resp->status = NFS4_OK;
3433         cs->deleg = FALSE;
3434
3435 out:
3436         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3437             PUTFH4res *, resp);
3438 }
3439
3440 /* ARGSUSED */
3441 static void
3442 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3443     struct compound_state *cs)
3444 {
3445         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3446         int error;
3447         fid_t fid;
3448         struct exportinfo *exi, *sav_exi;
3449
3450         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3451
3452         if (cs->vp) {
3453                 VN_RELE(cs->vp);
3454                 cs->vp = NULL;
3455         }
3456
3457         if (cs->cr)
3458                 crfree(cs->cr);
3459
3460         cs->cr = crdup(cs->basecr);
3461
3462         /*
3463          * Using rootdir, the system root vnode,
3464          * get its fid.
3465          */
3466         bzero(&fid, sizeof (fid));
3467         fid.fid_len = MAXFIDSZ;
3468         error = vop_fid_pseudo(rootdir, &fid);
3469         if (error != 0) {
3470                 *cs->statusp = resp->status = puterrno4(error);
3471                 goto out;
3472         }
3473
3474         /*
3475          * Then use the root fsid & fid it to find out if it's exported
3476          *
3477          * If the server root isn't exported directly, then
3478          * it should at least be a pseudo export based on
3479          * one or more exports further down in the server's
3480          * file tree.
3481          */
3482         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3483         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3484                 NFS4_DEBUG(rfs4_debug,
3485                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3486                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3487                 goto out;
3488         }
3489
3490         /*
3491          * Now make a filehandle based on the root
3492          * export and root vnode.
3493          */
3494         error = makefh4(&cs->fh, rootdir, exi);
3495         if (error != 0) {
3496                 *cs->statusp = resp->status = puterrno4(error);
3497                 goto out;
3498         }
3499
3500         sav_exi = cs->exi;
3501         cs->exi = exi;
3502
3503         VN_HOLD(rootdir);
3504         cs->vp = rootdir;
3505
3506         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3507                 VN_RELE(rootdir);
3508                 cs->vp = NULL;
3509                 cs->exi = sav_exi;
3510                 goto out;
3511         }
3512
3513         *cs->statusp = resp->status = NFS4_OK;
3514         cs->deleg = FALSE;
3515 out:
3516         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3517             PUTROOTFH4res *, resp);
3518 }
3519
3520 /*
3521  * set_rdattr_params sets up the variables used to manage what information
3522  * to get for each directory entry.
3523  */
3524 static nfsstat4
3525 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3526     bitmap4 attrs, bool_t *need_to_lookup)
3527 {
3528         uint_t  va_mask;
3529         nfsstat4 status;
3530         bitmap4 objbits;
3531
3532         status = bitmap4_to_attrmask(attrs, sargp);
3533         if (status != NFS4_OK) {
3534                 /*
3535                  * could not even figure attr mask
3536                  */
3537                 return (status);
3538         }
3539         va_mask = sargp->vap->va_mask;
3540
3541         /*
3542          * dirent's d_ino is always correct value for mounted_on_fileid.
3543          * mntdfid_set is set once here, but mounted_on_fileid is
3544          * set in main dirent processing loop for each dirent.
3545          * The mntdfid_set is a simple optimization that lets the
3546          * server attr code avoid work when caller is readdir.
3547          */
3548         sargp->mntdfid_set = TRUE;
3549
3550         /*
3551          * Lookup entry only if client asked for any of the following:
3552          * a) vattr attrs
3553          * b) vfs attrs
3554          * c) attrs w/per-object scope requested (change, filehandle, etc)
3555          *    other than mounted_on_fileid (which we can take from dirent)
3556          */
3557         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3558
3559         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3560                 *need_to_lookup = TRUE;
3561         else
3562                 *need_to_lookup = FALSE;
3563
3564         if (sargp->sbp == NULL)
3565                 return (NFS4_OK);
3566
3567         /*
3568          * If filesystem attrs are requested, get them now from the
3569          * directory vp, as most entries will have same filesystem. The only
3570          * exception are mounted over entries but we handle
3571          * those as we go (XXX mounted over detection not yet implemented).
3572          */
3573         sargp->vap->va_mask = 0;        /* to avoid fop_getattr */
3574         status = bitmap4_get_sysattrs(sargp);
3575         sargp->vap->va_mask = va_mask;
3576
3577         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3578                 /*
3579                  * Failed to get filesystem attributes.
3580                  * Return a rdattr_error for each entry, but don't fail.
3581                  * However, don't get any obj-dependent attrs.
3582                  */
3583                 sargp->rdattr_error = status;   /* for rdattr_error */
3584                 *need_to_lookup = FALSE;
3585                 /*
3586                  * At least get fileid for regular readdir output
3587                  */
3588                 sargp->vap->va_mask &= AT_NODEID;
3589                 status = NFS4_OK;
3590         }
3591
3592         return (status);
3593 }
3594
3595 /*
3596  * readlink: args: CURRENT_FH.
3597  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3598  */
3599
3600 /* ARGSUSED */
3601 static void
3602 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3603     struct compound_state *cs)
3604 {
3605         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3606         int error;
3607         vnode_t *vp;
3608         struct iovec iov;
3609         struct vattr va;
3610         struct uio uio;
3611         char *data;
3612         struct sockaddr *ca;
3613         char *name = NULL;
3614         int is_referral;
3615
3616         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3617
3618         /* CURRENT_FH: directory */
3619         vp = cs->vp;
3620         if (vp == NULL) {
3621                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3622                 goto out;
3623         }
3624
3625         if (cs->access == CS_ACCESS_DENIED) {
3626                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3627                 goto out;
3628         }
3629
3630         /* Is it a referral? */
3631         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3632
3633                 is_referral = 1;
3634
3635         } else {
3636
3637                 is_referral = 0;
3638
3639                 if (vp->v_type == VDIR) {
3640                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3641                         goto out;
3642                 }
3643
3644                 if (vp->v_type != VLNK) {
3645                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3646                         goto out;
3647                 }
3648
3649         }
3650
3651         va.va_mask = AT_MODE;
3652         error = fop_getattr(vp, &va, 0, cs->cr, NULL);
3653         if (error) {
3654                 *cs->statusp = resp->status = puterrno4(error);
3655                 goto out;
3656         }
3657
3658         if (MANDLOCK(vp, va.va_mode)) {
3659                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3660                 goto out;
3661         }
3662
3663         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3664
3665         if (is_referral) {
3666                 char *s;
3667                 size_t strsz;
3668
3669                 /* Get an artificial symlink based on a referral */
3670                 s = build_symlink(vp, cs->cr, &strsz);
3671                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3672                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3673                     vnode_t *, vp, char *, s);
3674                 if (s == NULL)
3675                         error = EINVAL;
3676                 else {
3677                         error = 0;
3678                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3679                         kmem_free(s, strsz);
3680                 }
3681
3682         } else {
3683
3684                 iov.iov_base = data;
3685                 iov.iov_len = MAXPATHLEN;
3686                 uio.uio_iov = &iov;
3687                 uio.uio_iovcnt = 1;
3688                 uio.uio_segflg = UIO_SYSSPACE;
3689                 uio.uio_extflg = UIO_COPY_CACHED;
3690                 uio.uio_loffset = 0;
3691                 uio.uio_resid = MAXPATHLEN;
3692
3693                 error = fop_readlink(vp, &uio, cs->cr, NULL);
3694
3695                 if (!error)
3696                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3697         }
3698
3699         if (error) {
3700                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3701                 *cs->statusp = resp->status = puterrno4(error);
3702                 goto out;
3703         }
3704
3705         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3706         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3707             MAXPATHLEN  + 1);
3708
3709         if (name == NULL) {
3710                 /*
3711                  * Even though the conversion failed, we return
3712                  * something. We just don't translate it.
3713                  */
3714                 name = data;
3715         }
3716
3717         /*
3718          * treat link name as data
3719          */
3720         (void) str_to_utf8(name, (utf8string *)&resp->link);
3721
3722         if (name != data)
3723                 kmem_free(name, MAXPATHLEN + 1);
3724         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3725         *cs->statusp = resp->status = NFS4_OK;
3726
3727 out:
3728         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3729             READLINK4res *, resp);
3730 }
3731
3732 static void
3733 rfs4_op_readlink_free(nfs_resop4 *resop)
3734 {
3735         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3736         utf8string *symlink = (utf8string *)&resp->link;
3737
3738         if (symlink->utf8string_val) {
3739                 UTF8STRING_FREE(*symlink)
3740         }
3741 }
3742
3743 /*
3744  * release_lockowner:
3745  *      Release any state associated with the supplied
3746  *      lockowner. Note if any lo_state is holding locks we will not
3747  *      rele that lo_state and thus the lockowner will not be destroyed.
3748  *      A client using lock after the lock owner stateid has been released
3749  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3750  *      to reissue the lock with new_lock_owner set to TRUE.
3751  *      args: lock_owner
3752  *      res:  status
3753  */
3754 /* ARGSUSED */
3755 static void
3756 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3757     struct svc_req *req, struct compound_state *cs)
3758 {
3759         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3760         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3761         rfs4_lockowner_t *lo;
3762         rfs4_openowner_t *oo;
3763         rfs4_state_t *sp;
3764         rfs4_lo_state_t *lsp;
3765         rfs4_client_t *cp;
3766         bool_t create = FALSE;
3767         locklist_t *llist;
3768         sysid_t sysid;
3769
3770         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3771             cs, RELEASE_LOCKOWNER4args *, ap);
3772
3773         /* Make sure there is a clientid around for this request */
3774         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3775
3776         if (cp == NULL) {
3777                 *cs->statusp = resp->status =
3778                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3779                 goto out;
3780         }
3781         rfs4_client_rele(cp);
3782
3783         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3784         if (lo == NULL) {
3785                 *cs->statusp = resp->status = NFS4_OK;
3786                 goto out;
3787         }
3788         ASSERT(lo->rl_client != NULL);
3789
3790         /*
3791          * Check for EXPIRED client. If so will reap state with in a lease
3792          * period or on next set_clientid_confirm step
3793          */
3794         if (rfs4_lease_expired(lo->rl_client)) {
3795                 rfs4_lockowner_rele(lo);
3796                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3797                 goto out;
3798         }
3799
3800         /*
3801          * If no sysid has been assigned, then no locks exist; just return.
3802          */
3803         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3804         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3805                 rfs4_lockowner_rele(lo);
3806                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3807                 goto out;
3808         }
3809
3810         sysid = lo->rl_client->rc_sysidt;
3811         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3812
3813         /*
3814          * Mark the lockowner invalid.
3815          */
3816         rfs4_dbe_hide(lo->rl_dbe);
3817
3818         /*
3819          * sysid-pid pair should now not be used since the lockowner is
3820          * invalid. If the client were to instantiate the lockowner again
3821          * it would be assigned a new pid. Thus we can get the list of
3822          * current locks.
3823          */
3824
3825         llist = flk_get_active_locks(sysid, lo->rl_pid);
3826         /* If we are still holding locks fail */
3827         if (llist != NULL) {
3828
3829                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3830
3831                 flk_free_locklist(llist);
3832                 /*
3833                  * We need to unhide the lockowner so the client can
3834                  * try it again. The bad thing here is if the client
3835                  * has a logic error that took it here in the first place
3836                  * they probably have lost accounting of the locks that it
3837                  * is holding. So we may have dangling state until the
3838                  * open owner state is reaped via close. One scenario
3839                  * that could possibly occur is that the client has
3840                  * sent the unlock request(s) in separate threads
3841                  * and has not waited for the replies before sending the
3842                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3843                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3844                  * reissuing the request.
3845                  */
3846                 rfs4_dbe_unhide(lo->rl_dbe);
3847                 rfs4_lockowner_rele(lo);
3848                 goto out;
3849         }
3850
3851         /*
3852          * For the corresponding client we need to check each open
3853          * owner for any opens that have lockowner state associated
3854          * with this lockowner.
3855          */
3856
3857         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3858         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3859             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3860
3861                 rfs4_dbe_lock(oo->ro_dbe);
3862                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3863                     sp = list_next(&oo->ro_statelist, sp)) {
3864
3865                         rfs4_dbe_lock(sp->rs_dbe);
3866                         for (lsp = list_head(&sp->rs_lostatelist);
3867                             lsp != NULL;
3868                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
3869                                 if (lsp->rls_locker == lo) {
3870                                         rfs4_dbe_lock(lsp->rls_dbe);
3871                                         rfs4_dbe_invalidate(lsp->rls_dbe);
3872                                         rfs4_dbe_unlock(lsp->rls_dbe);
3873                                 }
3874                         }
3875                         rfs4_dbe_unlock(sp->rs_dbe);
3876                 }
3877                 rfs4_dbe_unlock(oo->ro_dbe);
3878         }
3879         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3880
3881         rfs4_lockowner_rele(lo);
3882
3883         *cs->statusp = resp->status = NFS4_OK;
3884
3885 out:
3886         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
3887             cs, RELEASE_LOCKOWNER4res *, resp);
3888 }
3889
3890 /*
3891  * short utility function to lookup a file and recall the delegation
3892  */
3893 static rfs4_file_t *
3894 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3895     int *lkup_error, cred_t *cr)
3896 {
3897         vnode_t *vp;
3898         rfs4_file_t *fp = NULL;
3899         bool_t fcreate = FALSE;
3900         int error;
3901
3902         if (vpp)
3903                 *vpp = NULL;
3904
3905         if ((error = fop_lookup(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
3906             NULL)) == 0) {
3907                 if (vp->v_type == VREG)
3908                         fp = rfs4_findfile(vp, NULL, &fcreate);
3909                 if (vpp)
3910                         *vpp = vp;
3911                 else
3912                         VN_RELE(vp);
3913         }
3914
3915         if (lkup_error)
3916                 *lkup_error = error;
3917
3918         return (fp);
3919 }
3920
3921 /*
3922  * remove: args: CURRENT_FH: directory; name.
3923  *      res: status. If success - CURRENT_FH unchanged, return change_info
3924  *              for directory.
3925  */
3926 /* ARGSUSED */
3927 static void
3928 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3929     struct compound_state *cs)
3930 {
3931         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3932         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3933         int error;
3934         vnode_t *dvp, *vp;
3935         struct vattr bdva, idva, adva;
3936         char *nm;
3937         uint_t len;
3938         rfs4_file_t *fp;
3939         int in_crit = 0;
3940         struct sockaddr *ca;
3941         char *name = NULL;
3942         nfsstat4 status;
3943
3944         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
3945             REMOVE4args *, args);
3946
3947         /* CURRENT_FH: directory */
3948         dvp = cs->vp;
3949         if (dvp == NULL) {
3950                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3951                 goto out;
3952         }
3953
3954         if (cs->access == CS_ACCESS_DENIED) {
3955                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3956                 goto out;
3957         }
3958
3959         /*
3960          * If there is an unshared filesystem mounted on this vnode,
3961          * Do not allow to remove anything in this directory.
3962          */
3963         if (vn_ismntpt(dvp)) {
3964                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3965                 goto out;
3966         }
3967
3968         if (dvp->v_type != VDIR) {
3969                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3970                 goto out;
3971         }
3972
3973         status = utf8_dir_verify(&args->target);
3974         if (status != NFS4_OK) {
3975                 *cs->statusp = resp->status = status;
3976                 goto out;
3977         }
3978
3979         /*
3980          * Lookup the file so that we can check if it's a directory
3981          */
3982         nm = utf8_to_fn(&args->target, &len, NULL);
3983         if (nm == NULL) {
3984                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3985                 goto out;
3986         }
3987
3988         if (len > MAXNAMELEN) {
3989                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3990                 kmem_free(nm, len);
3991                 goto out;
3992         }
3993
3994         if (rdonly4(req, cs)) {
3995                 *cs->statusp = resp->status = NFS4ERR_ROFS;
3996                 kmem_free(nm, len);
3997                 goto out;
3998         }
3999
4000         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4001         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4002             MAXPATHLEN  + 1);
4003
4004         if (name == NULL) {
4005                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4006                 kmem_free(nm, len);
4007                 goto out;
4008         }
4009
4010         /*
4011          * Lookup the file to determine type and while we are see if
4012          * there is a file struct around and check for delegation.
4013          * We don't need to acquire va_seq before this lookup, if
4014          * it causes an update, cinfo.before will not match, which will
4015          * trigger a cache flush even if atomic is TRUE.
4016          */
4017         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4018                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4019                     NULL)) {
4020                         VN_RELE(vp);
4021                         rfs4_file_rele(fp);
4022                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4023                         if (nm != name)
4024                                 kmem_free(name, MAXPATHLEN + 1);
4025                         kmem_free(nm, len);
4026                         goto out;
4027                 }
4028         }
4029
4030         /* Didn't find anything to remove */
4031         if (vp == NULL) {
4032                 *cs->statusp = resp->status = error;
4033                 if (nm != name)
4034                         kmem_free(name, MAXPATHLEN + 1);
4035                 kmem_free(nm, len);
4036                 goto out;
4037         }
4038
4039         if (nbl_need_check(vp)) {
4040                 nbl_start_crit(vp, RW_READER);
4041                 in_crit = 1;
4042                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4043                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4044                         if (nm != name)
4045                                 kmem_free(name, MAXPATHLEN + 1);
4046                         kmem_free(nm, len);
4047                         nbl_end_crit(vp);
4048                         VN_RELE(vp);
4049                         if (fp) {
4050                                 rfs4_clear_dont_grant(fp);
4051                                 rfs4_file_rele(fp);
4052                         }
4053                         goto out;
4054                 }
4055         }
4056
4057         /* Get dir "before" change value */
4058         bdva.va_mask = AT_CTIME|AT_SEQ;
4059         error = fop_getattr(dvp, &bdva, 0, cs->cr, NULL);
4060         if (error) {
4061                 *cs->statusp = resp->status = puterrno4(error);
4062                 if (nm != name)
4063                         kmem_free(name, MAXPATHLEN + 1);
4064                 kmem_free(nm, len);
4065                 if (in_crit)
4066                         nbl_end_crit(vp);
4067                 VN_RELE(vp);
4068                 if (fp) {
4069                         rfs4_clear_dont_grant(fp);
4070                         rfs4_file_rele(fp);
4071                 }
4072                 goto out;
4073         }
4074         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4075
4076         /* Actually do the REMOVE operation */
4077         if (vp->v_type == VDIR) {
4078                 /*
4079                  * Can't remove a directory that has a mounted-on filesystem.
4080                  */
4081                 if (vn_ismntpt(vp)) {
4082                         error = EACCES;
4083                 } else {
4084                         /*
4085                          * System V defines rmdir to return EEXIST,
4086                          * not ENOTEMPTY, if the directory is not
4087                          * empty.  A System V NFS server needs to map
4088                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4089                          * transmit over the wire.
4090                          */
4091                         if ((error = fop_rmdir(dvp, name, rootdir, cs->cr,
4092                             NULL, 0)) == EEXIST)
4093                                 error = ENOTEMPTY;
4094                 }
4095         } else {
4096                 if ((error = fop_remove(dvp, name, cs->cr, NULL, 0)) == 0 &&
4097                     fp != NULL) {
4098                         struct vattr va;
4099                         vnode_t *tvp;
4100
4101                         rfs4_dbe_lock(fp->rf_dbe);
4102                         tvp = fp->rf_vp;
4103                         if (tvp)
4104                                 VN_HOLD(tvp);
4105                         rfs4_dbe_unlock(fp->rf_dbe);
4106
4107                         if (tvp) {
4108                                 /*
4109                                  * This is va_seq safe because we are not
4110                                  * manipulating dvp.
4111                                  */
4112                                 va.va_mask = AT_NLINK;
4113                                 if (!fop_getattr(tvp, &va, 0, cs->cr, NULL) &&
4114                                     va.va_nlink == 0) {
4115                                         /* Remove state on file remove */
4116                                         if (in_crit) {
4117                                                 nbl_end_crit(vp);
4118                                                 in_crit = 0;
4119                                         }
4120                                         rfs4_close_all_state(fp);
4121                                 }
4122                                 VN_RELE(tvp);
4123                         }
4124                 }
4125         }
4126
4127         if (in_crit)
4128                 nbl_end_crit(vp);
4129         VN_RELE(vp);
4130
4131         if (fp) {
4132                 rfs4_clear_dont_grant(fp);
4133                 rfs4_file_rele(fp);
4134         }
4135         if (nm != name)
4136                 kmem_free(name, MAXPATHLEN + 1);
4137         kmem_free(nm, len);
4138
4139         if (error) {
4140                 *cs->statusp = resp->status = puterrno4(error);
4141                 goto out;
4142         }
4143
4144         /*
4145          * Get the initial "after" sequence number, if it fails, set to zero
4146          */
4147         idva.va_mask = AT_SEQ;
4148         if (fop_getattr(dvp, &idva, 0, cs->cr, NULL))
4149                 idva.va_seq = 0;
4150
4151         /*
4152          * Force modified data and metadata out to stable storage.
4153          */
4154         (void) fop_fsync(dvp, 0, cs->cr, NULL);
4155
4156         /*
4157          * Get "after" change value, if it fails, simply return the
4158          * before value.
4159          */
4160         adva.va_mask = AT_CTIME|AT_SEQ;
4161         if (fop_getattr(dvp, &adva, 0, cs->cr, NULL)) {
4162                 adva.va_ctime = bdva.va_ctime;
4163                 adva.va_seq = 0;
4164         }
4165
4166         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4167
4168         /*
4169          * The cinfo.atomic = TRUE only if we have
4170          * non-zero va_seq's, and it has incremented by exactly one
4171          * during the fop_remove/RMDIR and it didn't change during
4172          * the fop_fsync.
4173          */
4174         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4175             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4176                 resp->cinfo.atomic = TRUE;
4177         else
4178                 resp->cinfo.atomic = FALSE;
4179
4180         *cs->statusp = resp->status = NFS4_OK;
4181
4182 out:
4183         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4184             REMOVE4res *, resp);
4185 }
4186
4187 /*
4188  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4189  *              oldname and newname.
4190  *      res: status. If success - CURRENT_FH unchanged, return change_info
4191  *              for both from and target directories.
4192  */
4193 /* ARGSUSED */
4194 static void
4195 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4196     struct compound_state *cs)
4197 {
4198         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4199         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4200         int error;
4201         vnode_t *odvp;
4202         vnode_t *ndvp;
4203         vnode_t *srcvp, *targvp;
4204         struct vattr obdva, oidva, oadva;
4205         struct vattr nbdva, nidva, nadva;
4206         char *onm, *nnm;
4207         uint_t olen, nlen;
4208         rfs4_file_t *fp, *sfp;
4209         int in_crit_src, in_crit_targ;
4210         int fp_rele_grant_hold, sfp_rele_grant_hold;
4211         struct sockaddr *ca;
4212         char *converted_onm = NULL;
4213         char *converted_nnm = NULL;
4214         nfsstat4 status;
4215
4216         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4217             RENAME4args *, args);
4218
4219         fp = sfp = NULL;
4220         srcvp = targvp = NULL;
4221         in_crit_src = in_crit_targ = 0;
4222         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4223
4224         /* CURRENT_FH: target directory */
4225         ndvp = cs->vp;
4226         if (ndvp == NULL) {
4227                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4228                 goto out;
4229         }
4230
4231         /* SAVED_FH: from directory */
4232         odvp = cs->saved_vp;
4233         if (odvp == NULL) {
4234                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4235                 goto out;
4236         }
4237
4238         if (cs->access == CS_ACCESS_DENIED) {
4239                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4240                 goto out;
4241         }
4242
4243         /*
4244          * If there is an unshared filesystem mounted on this vnode,
4245          * do not allow to rename objects in this directory.
4246          */
4247         if (vn_ismntpt(odvp)) {
4248                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4249                 goto out;
4250         }
4251
4252         /*
4253          * If there is an unshared filesystem mounted on this vnode,
4254          * do not allow to rename to this directory.
4255          */
4256         if (vn_ismntpt(ndvp)) {
4257                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4258                 goto out;
4259         }
4260
4261         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4262                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4263                 goto out;
4264         }
4265
4266         if (cs->saved_exi != cs->exi) {
4267                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4268                 goto out;
4269         }
4270
4271         status = utf8_dir_verify(&args->oldname);
4272         if (status != NFS4_OK) {
4273                 *cs->statusp = resp->status = status;
4274                 goto out;
4275         }
4276
4277         status = utf8_dir_verify(&args->newname);
4278         if (status != NFS4_OK) {
4279                 *cs->statusp = resp->status = status;
4280                 goto out;
4281         }
4282
4283         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4284         if (onm == NULL) {
4285                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4286                 goto out;
4287         }
4288         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4289         nlen = MAXPATHLEN + 1;
4290         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4291             nlen);
4292
4293         if (converted_onm == NULL) {
4294                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4295                 kmem_free(onm, olen);
4296                 goto out;
4297         }
4298
4299         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4300         if (nnm == NULL) {
4301                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4302                 if (onm != converted_onm)
4303                         kmem_free(converted_onm, MAXPATHLEN + 1);
4304                 kmem_free(onm, olen);
4305                 goto out;
4306         }
4307         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4308             MAXPATHLEN  + 1);
4309
4310         if (converted_nnm == NULL) {
4311                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4312                 kmem_free(nnm, nlen);
4313                 nnm = NULL;
4314                 if (onm != converted_onm)
4315                         kmem_free(converted_onm, MAXPATHLEN + 1);
4316                 kmem_free(onm, olen);
4317                 goto out;
4318         }
4319
4320
4321         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4322                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4323                 kmem_free(onm, olen);
4324                 kmem_free(nnm, nlen);
4325                 goto out;
4326         }
4327
4328
4329         if (rdonly4(req, cs)) {
4330                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4331                 if (onm != converted_onm)
4332                         kmem_free(converted_onm, MAXPATHLEN + 1);
4333                 kmem_free(onm, olen);
4334                 if (nnm != converted_nnm)
4335                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4336                 kmem_free(nnm, nlen);
4337                 goto out;
4338         }
4339
4340         /*
4341          * Is the source a file and have a delegation?
4342          * We don't need to acquire va_seq before these lookups, if
4343          * it causes an update, cinfo.before will not match, which will
4344          * trigger a cache flush even if atomic is TRUE.
4345          */
4346         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4347             &error, cs->cr)) {
4348                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4349                     NULL)) {
4350                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4351                         goto err_out;
4352                 }
4353         }
4354
4355         if (srcvp == NULL) {
4356                 *cs->statusp = resp->status = puterrno4(error);
4357                 if (onm != converted_onm)
4358                         kmem_free(converted_onm, MAXPATHLEN + 1);
4359                 kmem_free(onm, olen);
4360                 if (nnm != converted_nnm)
4361                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4362                 kmem_free(nnm, nlen);
4363                 goto out;
4364         }
4365
4366         sfp_rele_grant_hold = 1;
4367
4368         /* Does the destination exist and a file and have a delegation? */
4369         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4370             NULL, cs->cr)) {
4371                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4372                     NULL)) {
4373                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4374                         goto err_out;
4375                 }
4376         }
4377         fp_rele_grant_hold = 1;
4378
4379
4380         /* Check for NBMAND lock on both source and target */
4381         if (nbl_need_check(srcvp)) {
4382                 nbl_start_crit(srcvp, RW_READER);
4383                 in_crit_src = 1;
4384                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4385                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4386                         goto err_out;
4387                 }
4388         }
4389
4390         if (targvp && nbl_need_check(targvp)) {
4391                 nbl_start_crit(targvp, RW_READER);
4392                 in_crit_targ = 1;
4393                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4394                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4395                         goto err_out;
4396                 }
4397         }
4398
4399         /* Get source "before" change value */
4400         obdva.va_mask = AT_CTIME|AT_SEQ;
4401         error = fop_getattr(odvp, &obdva, 0, cs->cr, NULL);
4402         if (!error) {
4403                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4404                 error = fop_getattr(ndvp, &nbdva, 0, cs->cr, NULL);
4405         }
4406         if (error) {
4407                 *cs->statusp = resp->status = puterrno4(error);
4408                 goto err_out;
4409         }
4410
4411         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4412         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4413
4414         if ((error = fop_rename(odvp, converted_onm, ndvp, converted_nnm,
4415             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4416                 struct vattr va;
4417                 vnode_t *tvp;
4418
4419                 rfs4_dbe_lock(fp->rf_dbe);
4420                 tvp = fp->rf_vp;
4421                 if (tvp)
4422                         VN_HOLD(tvp);
4423                 rfs4_dbe_unlock(fp->rf_dbe);
4424
4425                 if (tvp) {
4426                         va.va_mask = AT_NLINK;
4427                         if (!fop_getattr(tvp, &va, 0, cs->cr, NULL) &&
4428                             va.va_nlink == 0) {
4429                                 /* The file is gone and so should the state */
4430                                 if (in_crit_targ) {
4431                                         nbl_end_crit(targvp);
4432                                         in_crit_targ = 0;
4433                                 }
4434                                 rfs4_close_all_state(fp);
4435                         }
4436                         VN_RELE(tvp);
4437                 }
4438         }
4439         if (error == 0)
4440                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4441
4442         if (in_crit_src)
4443                 nbl_end_crit(srcvp);
4444         if (srcvp)
4445                 VN_RELE(srcvp);
4446         if (in_crit_targ)
4447                 nbl_end_crit(targvp);
4448         if (targvp)
4449                 VN_RELE(targvp);
4450
4451         if (sfp) {
4452                 rfs4_clear_dont_grant(sfp);
4453                 rfs4_file_rele(sfp);
4454         }
4455         if (fp) {
4456                 rfs4_clear_dont_grant(fp);
4457                 rfs4_file_rele(fp);
4458         }
4459
4460         if (converted_onm != onm)
4461                 kmem_free(converted_onm, MAXPATHLEN + 1);
4462         kmem_free(onm, olen);
4463         if (converted_nnm != nnm)
4464                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4465         kmem_free(nnm, nlen);
4466
4467         /*
4468          * Get the initial "after" sequence number, if it fails, set to zero
4469          */
4470         oidva.va_mask = AT_SEQ;
4471         if (fop_getattr(odvp, &oidva, 0, cs->cr, NULL))
4472                 oidva.va_seq = 0;
4473
4474         nidva.va_mask = AT_SEQ;
4475         if (fop_getattr(ndvp, &nidva, 0, cs->cr, NULL))
4476                 nidva.va_seq = 0;
4477
4478         /*
4479          * Force modified data and metadata out to stable storage.
4480          */
4481         (void) fop_fsync(odvp, 0, cs->cr, NULL);
4482         (void) fop_fsync(ndvp, 0, cs->cr, NULL);
4483
4484         if (error) {
4485                 *cs->statusp = resp->status = puterrno4(error);
4486                 goto out;
4487         }
4488
4489         /*
4490          * Get "after" change values, if it fails, simply return the
4491          * before value.
4492          */
4493         oadva.va_mask = AT_CTIME|AT_SEQ;
4494         if (fop_getattr(odvp, &oadva, 0, cs->cr, NULL)) {
4495                 oadva.va_ctime = obdva.va_ctime;
4496                 oadva.va_seq = 0;
4497         }
4498
4499         nadva.va_mask = AT_CTIME|AT_SEQ;
4500         if (fop_getattr(odvp, &nadva, 0, cs->cr, NULL)) {
4501                 nadva.va_ctime = nbdva.va_ctime;
4502                 nadva.va_seq = 0;
4503         }
4504
4505         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4506         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4507
4508         /*
4509          * The cinfo.atomic = TRUE only if we have
4510          * non-zero va_seq's, and it has incremented by exactly one
4511          * during the fop_rename and it didn't change during the fop_fsync.
4512          */
4513         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4514             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4515                 resp->source_cinfo.atomic = TRUE;
4516         else
4517                 resp->source_cinfo.atomic = FALSE;
4518
4519         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4520             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4521                 resp->target_cinfo.atomic = TRUE;
4522         else
4523                 resp->target_cinfo.atomic = FALSE;
4524
4525 #ifdef  VOLATILE_FH_TEST
4526         {
4527         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4528
4529         /*
4530          * Add the renamed file handle to the volatile rename list
4531          */
4532         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4533                 /* file handles may expire on rename */
4534                 vnode_t *vp;
4535
4536                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4537                 /*
4538                  * Already know that nnm will be a valid string
4539                  */
4540                 error = fop_lookup(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4541                     NULL, NULL, NULL);
4542                 kmem_free(nnm, nlen);
4543                 if (!error) {
4544                         add_volrnm_fh(cs->exi, vp);
4545                         VN_RELE(vp);
4546                 }
4547         }
4548         }
4549 #endif  /* VOLATILE_FH_TEST */
4550
4551         *cs->statusp = resp->status = NFS4_OK;
4552 out:
4553         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4554             RENAME4res *, resp);
4555         return;
4556
4557 err_out:
4558         if (onm != converted_onm)
4559                 kmem_free(converted_onm, MAXPATHLEN + 1);
4560         if (onm != NULL)
4561                 kmem_free(onm, olen);
4562         if (nnm != converted_nnm)
4563                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4564         if (nnm != NULL)
4565                 kmem_free(nnm, nlen);
4566
4567         if (in_crit_src) nbl_end_crit(srcvp);
4568         if (in_crit_targ) nbl_end_crit(targvp);
4569         if (targvp) VN_RELE(targvp);
4570         if (srcvp) VN_RELE(srcvp);
4571         if (sfp) {
4572                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4573                 rfs4_file_rele(sfp);
4574         }
4575         if (fp) {
4576                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4577                 rfs4_file_rele(fp);
4578         }
4579
4580         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4581             RENAME4res *, resp);
4582 }
4583
4584 /* ARGSUSED */
4585 static void
4586 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4587     struct compound_state *cs)
4588 {
4589         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4590         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4591         rfs4_client_t *cp;
4592
4593         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4594             RENEW4args *, args);
4595
4596         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4597                 *cs->statusp = resp->status =
4598                     rfs4_check_clientid(&args->clientid, 0);
4599                 goto out;
4600         }
4601
4602         if (rfs4_lease_expired(cp)) {
4603                 rfs4_client_rele(cp);
4604                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4605                 goto out;
4606         }
4607
4608         rfs4_update_lease(cp);
4609
4610         mutex_enter(cp->rc_cbinfo.cb_lock);
4611         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4612                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4613                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4614         } else {
4615                 *cs->statusp = resp->status = NFS4_OK;
4616         }
4617         mutex_exit(cp->rc_cbinfo.cb_lock);
4618
4619         rfs4_client_rele(cp);
4620
4621 out:
4622         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4623             RENEW4res *, resp);
4624 }
4625
4626 /* ARGSUSED */
4627 static void
4628 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4629     struct compound_state *cs)
4630 {
4631         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4632
4633         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4634
4635         /* No need to check cs->access - we are not accessing any object */
4636         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4637                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4638                 goto out;
4639         }
4640         if (cs->vp != NULL) {
4641                 VN_RELE(cs->vp);
4642         }
4643         cs->vp = cs->saved_vp;
4644         cs->saved_vp = NULL;
4645         cs->exi = cs->saved_exi;
4646         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4647         *cs->statusp = resp->status = NFS4_OK;
4648         cs->deleg = FALSE;
4649
4650 out:
4651         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4652             RESTOREFH4res *, resp);
4653 }
4654
4655 /* ARGSUSED */
4656 static void
4657 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4658     struct compound_state *cs)
4659 {
4660         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4661
4662         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4663
4664         /* No need to check cs->access - we are not accessing any object */
4665         if (cs->vp == NULL) {
4666                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4667                 goto out;
4668         }
4669         if (cs->saved_vp != NULL) {
4670                 VN_RELE(cs->saved_vp);
4671         }
4672         cs->saved_vp = cs->vp;
4673         VN_HOLD(cs->saved_vp);
4674         cs->saved_exi = cs->exi;
4675         /*
4676          * since SAVEFH is fairly rare, don't alloc space for its fh
4677          * unless necessary.
4678          */
4679         if (cs->saved_fh.nfs_fh4_val == NULL) {
4680                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4681         }
4682         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4683         *cs->statusp = resp->status = NFS4_OK;
4684
4685 out:
4686         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4687             SAVEFH4res *, resp);
4688 }
4689
4690 /*
4691  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4692  * return the bitmap of attrs that were set successfully. It is also
4693  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4694  * always be called only after rfs4_do_set_attrs().
4695  *
4696  * Verify that the attributes are same as the expected ones. sargp->vap
4697  * and sargp->sbp contain the input attributes as translated from fattr4.
4698  *
4699  * This function verifies only the attrs that correspond to a vattr or
4700  * vfsstat struct. That is because of the extra step needed to get the
4701  * corresponding system structs. Other attributes have already been set or
4702  * verified by do_rfs4_set_attrs.
4703  *
4704  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4705  */
4706 static int
4707 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4708     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4709 {
4710         int error, ret_error = 0;
4711         int i, k;
4712         uint_t sva_mask = sargp->vap->va_mask;
4713         uint_t vbit;
4714         union nfs4_attr_u *na;
4715         uint8_t *amap;
4716         bool_t getsb = ntovp->vfsstat;
4717
4718         if (sva_mask != 0) {
4719                 /*
4720                  * Okay to overwrite sargp->vap because we verify based
4721                  * on the incoming values.
4722                  */
4723                 ret_error = fop_getattr(sargp->cs->vp, sargp->vap, 0,
4724                     sargp->cs->cr, NULL);
4725                 if (ret_error) {
4726                         if (resp == NULL)
4727                                 return (ret_error);
4728                         /*
4729                          * Must return bitmap of successful attrs
4730                          */
4731                         sva_mask = 0;   /* to prevent checking vap later */
4732                 } else {
4733                         /*
4734                          * Some file systems clobber va_mask. it is probably
4735                          * wrong of them to do so, nonethless we practice
4736                          * defensive coding.
4737                          * See bug id 4276830.
4738                          */
4739                         sargp->vap->va_mask = sva_mask;
4740                 }
4741         }
4742
4743         if (getsb) {
4744                 /*
4745                  * Now get the superblock and loop on the bitmap, as there is
4746                  * no simple way of translating from superblock to bitmap4.
4747                  */
4748                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4749                 if (ret_error) {
4750                         if (resp == NULL)
4751                                 goto errout;
4752                         getsb = FALSE;
4753                 }
4754         }
4755
4756         /*
4757          * Now loop and verify each attribute which getattr returned
4758          * whether it's the same as the input.
4759          */
4760         if (resp == NULL && !getsb && (sva_mask == 0))
4761                 goto errout;
4762
4763         na = ntovp->na;
4764         amap = ntovp->amap;
4765         k = 0;
4766         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4767                 k = *amap;
4768                 ASSERT(nfs4_ntov_map[k].nval == k);
4769                 vbit = nfs4_ntov_map[k].vbit;
4770
4771                 /*
4772                  * If vattr attribute but fop_getattr failed, or it's
4773                  * superblock attribute but VFS_STATVFS failed, skip
4774                  */
4775                 if (vbit) {
4776                         if ((vbit & sva_mask) == 0)
4777                                 continue;
4778                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4779                         continue;
4780                 }
4781                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4782                 if (resp != NULL) {
4783                         if (error)
4784                                 ret_error = -1; /* not all match */
4785                         else    /* update response bitmap */
4786                                 *resp |= nfs4_ntov_map[k].fbit;
4787                         continue;
4788                 }
4789                 if (error) {
4790                         ret_error = -1; /* not all match */
4791                         break;
4792                 }
4793         }
4794 errout:
4795         return (ret_error);
4796 }
4797
4798 /*
4799  * Decode the attribute to be set/verified. If the attr requires a sys op
4800  * (fop_getattr, VFS_VFSSTAT), and the request is to verify, then don't
4801  * call the sv_getit function for it, because the sys op hasn't yet been done.
4802  * Return 0 for success, error code if failed.
4803  *
4804  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4805  */
4806 static int
4807 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4808     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4809 {
4810         int error = 0;
4811         bool_t set_later;
4812
4813         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4814
4815         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4816                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4817                 /*
4818                  * don't verify yet if a vattr or sb dependent attr,
4819                  * because we don't have their sys values yet.
4820                  * Will be done later.
4821                  */
4822                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4823                         /*
4824                          * ACLs are a special case, since setting the MODE
4825                          * conflicts with setting the ACL.  We delay setting
4826                          * the ACL until all other attributes have been set.
4827                          * The ACL gets set in do_rfs4_op_setattr().
4828                          */
4829                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4830                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4831                                     sargp, nap);
4832                                 if (error) {
4833                                         xdr_free(nfs4_ntov_map[k].xfunc,
4834                                             (caddr_t)nap);
4835                                 }
4836                         }
4837                 }
4838         } else {
4839 #ifdef  DEBUG
4840                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4841                     "decoding attribute %d\n", k);
4842 #endif
4843                 error = EINVAL;
4844         }
4845         if (!error && resp_bval && !set_later) {
4846                 *resp_bval |= nfs4_ntov_map[k].fbit;
4847         }
4848
4849         return (error);
4850 }
4851
4852 /*
4853  * Set vattr based on incoming fattr4 attrs - used by setattr.
4854  * Set response mask. Ignore any values that are not writable vattr attrs.
4855  */
4856 static nfsstat4
4857 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4858     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4859     nfs4_attr_cmd_t cmd)
4860 {
4861         int error = 0;
4862         int i;
4863         char *attrs = fattrp->attrlist4;
4864         uint32_t attrslen = fattrp->attrlist4_len;
4865         XDR xdr;
4866         nfsstat4 status = NFS4_OK;
4867         vnode_t *vp = cs->vp;
4868         union nfs4_attr_u *na;
4869         uint8_t *amap;
4870
4871         /*
4872          * Make sure that maximum attribute number can be expressed as an
4873          * 8 bit quantity.
4874          */
4875         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4876
4877         if (vp == NULL) {
4878                 if (resp)
4879                         *resp = 0;
4880                 return (NFS4ERR_NOFILEHANDLE);
4881         }
4882         if (cs->access == CS_ACCESS_DENIED) {
4883                 if (resp)
4884                         *resp = 0;
4885                 return (NFS4ERR_ACCESS);
4886         }
4887
4888         sargp->op = cmd;
4889         sargp->cs = cs;
4890         sargp->flag = 0;        /* may be set later */
4891         sargp->vap->va_mask = 0;
4892         sargp->rdattr_error = NFS4_OK;
4893         sargp->rdattr_error_req = FALSE;
4894         /* sargp->sbp is set by the caller */
4895
4896         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4897
4898         na = ntovp->na;
4899         amap = ntovp->amap;
4900
4901         /*
4902          * The following loop iterates on the nfs4_ntov_map checking
4903          * if the fbit is set in the requested bitmap.
4904          * If set then we process the arguments using the
4905          * rfs4_fattr4 conversion functions to populate the setattr
4906          * vattr and va_mask. Any settable attrs that are not using vattr
4907          * will be set in this loop.
4908          */
4909         for (i = 0; i < nfs4_ntov_map_size; i++) {
4910                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4911                         continue;
4912                 }
4913                 /*
4914                  * If setattr, must be a writable attr.
4915                  * If verify/nverify, must be a readable attr.
4916                  */
4917                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
4918                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4919                         /*
4920                          * Client tries to set/verify an
4921                          * unsupported attribute, tries to set
4922                          * a read only attr or verify a write
4923                          * only one - error!
4924                          */
4925                         break;
4926                 }
4927                 /*
4928                  * Decode the attribute to set/verify
4929                  */
4930                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4931                     &xdr, resp ? resp : NULL, na);
4932                 if (error)
4933                         break;
4934                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4935                 na++;
4936                 (ntovp->attrcnt)++;
4937                 if (nfs4_ntov_map[i].vfsstat)
4938                         ntovp->vfsstat = TRUE;
4939         }
4940
4941         if (error != 0)
4942                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
4943                     puterrno4(error));
4944         /* xdrmem_destroy(&xdrs); */    /* NO-OP */
4945         return (status);
4946 }
4947
4948 static nfsstat4
4949 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4950     stateid4 *stateid)
4951 {
4952         int error = 0;
4953         struct nfs4_svgetit_arg sarg;
4954         bool_t trunc;
4955
4956         nfsstat4 status = NFS4_OK;
4957         cred_t *cr = cs->cr;
4958         vnode_t *vp = cs->vp;
4959         struct nfs4_ntov_table ntov;
4960         struct statvfs64 sb;
4961         struct vattr bva;
4962         struct flock64 bf;
4963         int in_crit = 0;
4964         uint_t saved_mask = 0;
4965         caller_context_t ct;
4966
4967         *resp = 0;
4968         sarg.sbp = &sb;
4969         sarg.is_referral = B_FALSE;
4970         nfs4_ntov_table_init(&ntov);
4971         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4972             NFS4ATTR_SETIT);
4973         if (status != NFS4_OK) {
4974                 /*
4975                  * failed set attrs
4976                  */
4977                 goto done;
4978         }
4979         if ((sarg.vap->va_mask == 0) &&
4980             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4981                 /*
4982                  * no further work to be done
4983                  */
4984                 goto done;
4985         }
4986
4987         /*
4988          * If we got a request to set the ACL and the MODE, only
4989          * allow changing VSUID, VSGID, and VSVTX.  Attempting
4990          * to change any other bits, along with setting an ACL,
4991          * gives NFS4ERR_INVAL.
4992          */
4993         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4994             (fattrp->attrmask & FATTR4_MODE_MASK)) {
4995                 vattr_t va;
4996
4997                 va.va_mask = AT_MODE;
4998                 error = fop_getattr(vp, &va, 0, cs->cr, NULL);
4999                 if (error) {
5000                         status = puterrno4(error);
5001                         goto done;
5002                 }
5003                 if ((sarg.vap->va_mode ^ va.va_mode) &
5004                     ~(VSUID | VSGID | VSVTX)) {
5005                         status = NFS4ERR_INVAL;
5006                         goto done;
5007                 }
5008         }
5009
5010         /* Check stateid only if size has been set */
5011         if (sarg.vap->va_mask & AT_SIZE) {
5012                 trunc = (sarg.vap->va_size == 0);
5013                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5014                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5015                 if (status != NFS4_OK)
5016                         goto done;
5017         } else {
5018                 ct.cc_sysid = 0;
5019                 ct.cc_pid = 0;
5020                 ct.cc_caller_id = nfs4_srv_caller_id;
5021                 ct.cc_flags = CC_DONTBLOCK;
5022         }
5023
5024         /* XXX start of possible race with delegations */
5025
5026         /*
5027          * We need to specially handle size changes because it is
5028          * possible for the client to create a file with read-only
5029          * modes, but with the file opened for writing. If the client
5030          * then tries to set the file size, e.g. ftruncate(3C),
5031          * fcntl(F_FREESP), the normal access checking done in
5032          * fop_setattr would prevent the client from doing it even though
5033          * it should be allowed to do so.  To get around this, we do the
5034          * access checking for ourselves and use fop_space which doesn't
5035          * do the access checking.
5036          * Also the client should not be allowed to change the file
5037          * size if there is a conflicting non-blocking mandatory lock in
5038          * the region of the change.
5039          */
5040         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5041                 uoff_t offset;
5042                 ssize_t length;
5043
5044                 /*
5045                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5046                  * before returning, sarg.vap->va_mask is used to
5047                  * generate the setattr reply bitmap.  We also clear
5048                  * AT_SIZE below before calling fop_space.  For both
5049                  * of these cases, the va_mask needs to be saved here
5050                  * and restored after calling fop_setattr.
5051                  */
5052                 saved_mask = sarg.vap->va_mask;
5053
5054                 /*
5055                  * Check any possible conflict due to NBMAND locks.
5056                  * Get into critical region before fop_getattr, so the
5057                  * size attribute is valid when checking conflicts.
5058                  */
5059                 if (nbl_need_check(vp)) {
5060                         nbl_start_crit(vp, RW_READER);
5061                         in_crit = 1;
5062                 }
5063
5064                 bva.va_mask = AT_UID|AT_SIZE;
5065                 if (error = fop_getattr(vp, &bva, 0, cr, &ct)) {
5066                         status = puterrno4(error);
5067                         goto done;
5068                 }
5069
5070                 if (in_crit) {
5071                         if (sarg.vap->va_size < bva.va_size) {
5072                                 offset = sarg.vap->va_size;
5073                                 length = bva.va_size - sarg.vap->va_size;
5074                         } else {
5075                                 offset = bva.va_size;
5076                                 length = sarg.vap->va_size - bva.va_size;
5077                         }
5078                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5079                             &ct)) {
5080                                 status = NFS4ERR_LOCKED;
5081                                 goto done;
5082                         }
5083                 }
5084
5085                 if (crgetuid(cr) == bva.va_uid) {
5086                         sarg.vap->va_mask &= ~AT_SIZE;
5087                         bf.l_type = F_WRLCK;
5088                         bf.l_whence = 0;
5089                         bf.l_start = (off64_t)sarg.vap->va_size;
5090                         bf.l_len = 0;
5091                         bf.l_sysid = 0;
5092                         bf.l_pid = 0;
5093                         error = fop_space(vp, F_FREESP, &bf, FWRITE,
5094                             (offset_t)sarg.vap->va_size, cr, &ct);
5095                 }
5096         }
5097
5098         if (!error && sarg.vap->va_mask != 0)
5099                 error = fop_setattr(vp, sarg.vap, sarg.flag, cr, &ct);
5100
5101         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5102         if (saved_mask & AT_SIZE)
5103                 sarg.vap->va_mask |= AT_SIZE;
5104
5105         /*
5106          * If an ACL was being set, it has been delayed until now,
5107          * in order to set the mode (via the fop_setattr() above) first.
5108          */
5109         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5110                 int i;
5111
5112                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5113                         if (ntov.amap[i] == FATTR4_ACL)
5114                                 break;
5115                 if (i < NFS4_MAXNUM_ATTRS) {
5116                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5117                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5118                         if (error == 0) {
5119                                 *resp |= FATTR4_ACL_MASK;
5120                         } else if (error == ENOTSUP) {
5121                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5122                                 status = NFS4ERR_ATTRNOTSUPP;
5123                                 goto done;
5124                         }
5125                 } else {
5126                         NFS4_DEBUG(rfs4_debug,
5127                             (CE_NOTE, "do_rfs4_op_setattr: "
5128                             "unable to find ACL in fattr4"));
5129                         error = EINVAL;
5130                 }
5131         }
5132
5133         if (error) {
5134                 /* check if a monitor detected a delegation conflict */
5135                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5136                         status = NFS4ERR_DELAY;
5137                 else
5138                         status = puterrno4(error);
5139
5140                 /*
5141                  * Set the response bitmap when setattr failed.
5142                  * If fop_setattr partially succeeded, test by doing a
5143                  * fop_getattr on the object and comparing the data
5144                  * to the setattr arguments.
5145                  */
5146                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5147         } else {
5148                 /*
5149                  * Force modified metadata out to stable storage.
5150                  */
5151                 (void) fop_fsync(vp, FNODSYNC, cr, &ct);
5152                 /*
5153                  * Set response bitmap
5154                  */
5155                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5156         }
5157
5158 /* Return early and already have a NFSv4 error */
5159 done:
5160         /*
5161          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5162          * conversion sets both readable and writeable NFS4 attrs
5163          * for AT_MTIME and AT_ATIME.  The line below masks out
5164          * unrequested attrs from the setattr result bitmap.  This
5165          * is placed after the done: label to catch the ATTRNOTSUP
5166          * case.
5167          */
5168         *resp &= fattrp->attrmask;
5169
5170         if (in_crit)
5171                 nbl_end_crit(vp);
5172
5173         nfs4_ntov_table_free(&ntov, &sarg);
5174
5175         return (status);
5176 }
5177
5178 /* ARGSUSED */
5179 static void
5180 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5181     struct compound_state *cs)
5182 {
5183         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5184         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5185
5186         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5187             SETATTR4args *, args);
5188
5189         if (cs->vp == NULL) {
5190                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5191                 goto out;
5192         }
5193
5194         /*
5195          * If there is an unshared filesystem mounted on this vnode,
5196          * do not allow to setattr on this vnode.
5197          */
5198         if (vn_ismntpt(cs->vp)) {
5199                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5200                 goto out;
5201         }
5202
5203         resp->attrsset = 0;
5204
5205         if (rdonly4(req, cs)) {
5206                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5207                 goto out;
5208         }
5209
5210         *cs->statusp = resp->status =
5211             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5212             &args->stateid);
5213
5214 out:
5215         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5216             SETATTR4res *, resp);
5217 }
5218
5219 /* ARGSUSED */
5220 static void
5221 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5222     struct compound_state *cs)
5223 {
5224         /*
5225          * verify and nverify are exactly the same, except that nverify
5226          * succeeds when some argument changed, and verify succeeds when
5227          * when none changed.
5228          */
5229
5230         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5231         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5232
5233         int error;
5234         struct nfs4_svgetit_arg sarg;
5235         struct statvfs64 sb;
5236         struct nfs4_ntov_table ntov;
5237
5238         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5239             VERIFY4args *, args);
5240
5241         if (cs->vp == NULL) {
5242                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5243                 goto out;
5244         }
5245
5246         sarg.sbp = &sb;
5247         sarg.is_referral = B_FALSE;
5248         nfs4_ntov_table_init(&ntov);
5249         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5250             &sarg, &ntov, NFS4ATTR_VERIT);
5251         if (resp->status != NFS4_OK) {
5252                 /*
5253                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5254                  * so could return -1 for "no match".
5255                  */
5256                 if (resp->status == -1)
5257                         resp->status = NFS4ERR_NOT_SAME;
5258                 goto done;
5259         }
5260         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5261         switch (error) {
5262         case 0:
5263                 resp->status = NFS4_OK;
5264                 break;
5265         case -1:
5266                 resp->status = NFS4ERR_NOT_SAME;
5267                 break;
5268         default:
5269                 resp->status = puterrno4(error);
5270                 break;
5271         }
5272 done:
5273         *cs->statusp = resp->status;
5274         nfs4_ntov_table_free(&ntov, &sarg);
5275 out:
5276         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5277             VERIFY4res *, resp);
5278 }
5279
5280 /* ARGSUSED */
5281 static void
5282 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5283     struct compound_state *cs)
5284 {
5285         /*
5286          * verify and nverify are exactly the same, except that nverify
5287          * succeeds when some argument changed, and verify succeeds when
5288          * when none changed.
5289          */
5290
5291         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5292         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5293
5294         int error;
5295         struct nfs4_svgetit_arg sarg;
5296         struct statvfs64 sb;
5297         struct nfs4_ntov_table ntov;
5298
5299         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5300             NVERIFY4args *, args);
5301
5302         if (cs->vp == NULL) {
5303                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5304                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5305                     NVERIFY4res *, resp);
5306                 return;
5307         }
5308         sarg.sbp = &sb;
5309         sarg.is_referral = B_FALSE;
5310         nfs4_ntov_table_init(&ntov);
5311         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5312             &sarg, &ntov, NFS4ATTR_VERIT);
5313         if (resp->status != NFS4_OK) {
5314                 /*
5315                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5316                  * so could return -1 for "no match".
5317                  */
5318                 if (resp->status == -1)
5319                         resp->status = NFS4_OK;
5320                 goto done;
5321         }
5322         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5323         switch (error) {
5324         case 0:
5325                 resp->status = NFS4ERR_SAME;
5326                 break;
5327         case -1:
5328                 resp->status = NFS4_OK;
5329                 break;
5330         default:
5331                 resp->status = puterrno4(error);
5332                 break;
5333         }
5334 done:
5335         *cs->statusp = resp->status;
5336         nfs4_ntov_table_free(&ntov, &sarg);
5337
5338         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5339             NVERIFY4res *, resp);
5340 }
5341
5342 /*
5343  * XXX - This should live in an NFS header file.
5344  */
5345 #define MAX_IOVECS      12
5346
5347 /* ARGSUSED */
5348 static void
5349 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5350     struct compound_state *cs)
5351 {
5352         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5353         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5354         int error;
5355         vnode_t *vp;
5356         struct vattr bva;
5357         uoff_t rlimit;
5358         struct uio uio;
5359         struct iovec iov[MAX_IOVECS];
5360         struct iovec *iovp;
5361         int iovcnt;
5362         int ioflag;
5363         cred_t *savecred, *cr;
5364         bool_t *deleg = &cs->deleg;
5365         nfsstat4 stat;
5366         int in_crit = 0;
5367         caller_context_t ct;
5368
5369         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5370             WRITE4args *, args);
5371
5372         vp = cs->vp;
5373         if (vp == NULL) {
5374                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5375                 goto out;
5376         }
5377         if (cs->access == CS_ACCESS_DENIED) {
5378                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5379                 goto out;
5380         }
5381
5382         cr = cs->cr;
5383
5384         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5385             deleg, TRUE, &ct)) != NFS4_OK) {
5386                 *cs->statusp = resp->status = stat;
5387                 goto out;
5388         }
5389
5390         /*
5391          * We have to enter the critical region before calling fop_rwlock
5392          * to avoid a deadlock with ufs.
5393          */
5394         if (nbl_need_check(vp)) {
5395                 nbl_start_crit(vp, RW_READER);
5396                 in_crit = 1;
5397                 if (nbl_conflict(vp, NBL_WRITE,
5398                     args->offset, args->data_len, 0, &ct)) {
5399                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5400                         goto out;
5401                 }
5402         }
5403
5404         bva.va_mask = AT_MODE | AT_UID;
5405         error = fop_getattr(vp, &bva, 0, cr, &ct);
5406
5407         /*
5408          * If we can't get the attributes, then we can't do the
5409          * right access checking.  So, we'll fail the request.
5410          */
5411         if (error) {
5412                 *cs->statusp = resp->status = puterrno4(error);
5413                 goto out;
5414         }
5415
5416         if (rdonly4(req, cs)) {
5417                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5418                 goto out;
5419         }
5420
5421         if (vp->v_type != VREG) {
5422                 *cs->statusp = resp->status =
5423                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5424                 goto out;
5425         }
5426
5427         if (crgetuid(cr) != bva.va_uid &&
5428             (error = fop_access(vp, VWRITE, 0, cr, &ct))) {
5429                 *cs->statusp = resp->status = puterrno4(error);
5430                 goto out;
5431         }
5432
5433         if (MANDLOCK(vp, bva.va_mode)) {
5434                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5435                 goto out;
5436         }
5437
5438         if (args->data_len == 0) {
5439                 *cs->statusp = resp->status = NFS4_OK;
5440                 resp->count = 0;
5441                 resp->committed = args->stable;
5442                 resp->writeverf = Write4verf;
5443                 goto out;
5444         }
5445
5446         if (args->mblk != NULL) {
5447                 mblk_t *m;
5448                 uint_t bytes, round_len;
5449
5450                 iovcnt = 0;
5451                 bytes = 0;
5452                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5453                 for (m = args->mblk;
5454                     m != NULL && bytes < round_len;
5455                     m = m->b_cont) {
5456                         iovcnt++;
5457                         bytes += MBLKL(m);
5458                 }
5459 #ifdef DEBUG
5460                 /* should have ended on an mblk boundary */
5461                 if (bytes != round_len) {
5462                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5463                             bytes, round_len, args->data_len);
5464                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5465                             (void *)args->mblk, (void *)m);
5466                         ASSERT(bytes == round_len);
5467                 }
5468 #endif
5469                 if (iovcnt <= MAX_IOVECS) {
5470                         iovp = iov;
5471                 } else {
5472                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5473                 }
5474                 mblk_to_iov(args->mblk, iovcnt, iovp);
5475         } else if (args->rlist != NULL) {
5476                 iovcnt = 1;
5477                 iovp = iov;
5478                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5479                 iovp->iov_len = args->data_len;
5480         } else {
5481                 iovcnt = 1;
5482                 iovp = iov;
5483                 iovp->iov_base = args->data_val;
5484                 iovp->iov_len = args->data_len;
5485         }
5486
5487         uio.uio_iov = iovp;
5488         uio.uio_iovcnt = iovcnt;
5489
5490         uio.uio_segflg = UIO_SYSSPACE;
5491         uio.uio_extflg = UIO_COPY_DEFAULT;
5492         uio.uio_loffset = args->offset;
5493         uio.uio_resid = args->data_len;
5494         uio.uio_llimit = curproc->p_fsz_ctl;
5495         rlimit = uio.uio_llimit - args->offset;
5496         if (rlimit < (uoff_t)uio.uio_resid)
5497                 uio.uio_resid = (int)rlimit;
5498
5499         if (args->stable == UNSTABLE4)
5500                 ioflag = 0;
5501         else if (args->stable == FILE_SYNC4)
5502                 ioflag = FSYNC;
5503         else if (args->stable == DATA_SYNC4)
5504                 ioflag = FDSYNC;
5505         else {
5506                 if (iovp != iov)
5507                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5508                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5509                 goto out;
5510         }
5511
5512         /*
5513          * We're changing creds because VM may fault and we need
5514          * the cred of the current thread to be used if quota
5515          * checking is enabled.
5516          */
5517         savecred = curthread->t_cred;
5518         curthread->t_cred = cr;
5519         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5520         curthread->t_cred = savecred;
5521
5522         if (iovp != iov)
5523                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5524
5525         if (error) {
5526                 *cs->statusp = resp->status = puterrno4(error);
5527                 goto out;
5528         }
5529
5530         *cs->statusp = resp->status = NFS4_OK;
5531         resp->count = args->data_len - uio.uio_resid;
5532
5533         if (ioflag == 0)
5534                 resp->committed = UNSTABLE4;
5535         else
5536                 resp->committed = FILE_SYNC4;
5537
5538         resp->writeverf = Write4verf;
5539
5540 out:
5541         if (in_crit)
5542                 nbl_end_crit(vp);
5543
5544         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5545             WRITE4res *, resp);
5546 }
5547
5548
5549 /* XXX put in a header file */
5550 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5551
5552 void
5553 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5554     struct svc_req *req, cred_t *cr, int *rv)
5555 {
5556         uint_t i;
5557         struct compound_state cs;
5558
5559         if (rv != NULL)
5560                 *rv = 0;
5561         rfs4_init_compound_state(&cs);
5562         /*
5563          * Form a reply tag by copying over the reqeuest tag.
5564          */
5565         resp->tag.utf8string_val =
5566             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5567         resp->tag.utf8string_len = args->tag.utf8string_len;
5568         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5569             resp->tag.utf8string_len);
5570
5571         cs.statusp = &resp->status;
5572         cs.req = req;
5573         resp->array = NULL;
5574         resp->array_len = 0;
5575
5576         /*
5577          * XXX for now, minorversion should be zero
5578          */
5579         if (args->minorversion != NFS4_MINORVERSION) {
5580                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5581                     &cs, COMPOUND4args *, args);
5582                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5583                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5584                     &cs, COMPOUND4res *, resp);
5585                 return;
5586         }
5587
5588         if (args->array_len == 0) {
5589                 resp->status = NFS4_OK;
5590                 return;
5591         }
5592
5593         ASSERT(exi == NULL);
5594         ASSERT(cr == NULL);
5595
5596         cr = crget();
5597         ASSERT(cr != NULL);
5598
5599         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5600                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5601                     &cs, COMPOUND4args *, args);
5602                 crfree(cr);
5603                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5604                     &cs, COMPOUND4res *, resp);
5605                 svcerr_badcred(req->rq_xprt);
5606                 if (rv != NULL)
5607                         *rv = 1;
5608                 return;
5609         }
5610         resp->array_len = args->array_len;
5611         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5612             KM_SLEEP);
5613
5614         cs.basecr = cr;
5615
5616         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5617             COMPOUND4args *, args);
5618
5619         /*
5620          * For now, NFS4 compound processing must be protected by
5621          * exported_lock because it can access more than one exportinfo
5622          * per compound and share/unshare can now change multiple
5623          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5624          * per proc (excluding public exinfo), and exi_count design
5625          * is sufficient to protect concurrent execution of NFS2/3
5626          * ops along with unexport.  This lock will be removed as
5627          * part of the NFSv4 phase 2 namespace redesign work.
5628          */
5629         rw_enter(&exported_lock, RW_READER);
5630
5631         /*
5632          * If this is the first compound we've seen, we need to start all
5633          * new instances' grace periods.
5634          */
5635         if (rfs4_seen_first_compound == 0) {
5636                 rfs4_grace_start_new();
5637                 /*
5638                  * This must be set after rfs4_grace_start_new(), otherwise
5639                  * another thread could proceed past here before the former
5640                  * is finished.
5641                  */
5642                 rfs4_seen_first_compound = 1;
5643         }
5644
5645         for (i = 0; i < args->array_len && cs.cont; i++) {
5646                 nfs_argop4 *argop;
5647                 nfs_resop4 *resop;
5648                 uint_t op;
5649
5650                 argop = &args->array[i];
5651                 resop = &resp->array[i];
5652                 resop->resop = argop->argop;
5653                 op = (uint_t)resop->resop;
5654
5655                 if (op < rfsv4disp_cnt) {
5656                         /*
5657                          * Count the individual ops here; NULL and COMPOUND
5658                          * are counted in common_dispatch()
5659                          */
5660                         rfsproccnt_v4_ptr[op].value.ui64++;
5661
5662                         NFS4_DEBUG(rfs4_debug > 1,
5663                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5664                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5665                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5666                             rfs4_op_string[op], *cs.statusp));
5667                         if (*cs.statusp != NFS4_OK)
5668                                 cs.cont = FALSE;
5669                 } else {
5670                         /*
5671                          * This is effectively dead code since XDR code
5672                          * will have already returned BADXDR if op doesn't
5673                          * decode to legal value.  This only done for a
5674                          * day when XDR code doesn't verify v4 opcodes.
5675                          */
5676                         op = OP_ILLEGAL;
5677                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5678
5679                         rfs4_op_illegal(argop, resop, req, &cs);
5680                         cs.cont = FALSE;
5681                 }
5682
5683                 /*
5684                  * If not at last op, and if we are to stop, then
5685                  * compact the results array.
5686                  */
5687                 if ((i + 1) < args->array_len && !cs.cont) {
5688                         nfs_resop4 *new_res = kmem_alloc(
5689                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5690                         bcopy(resp->array,
5691                             new_res, (i+1) * sizeof (nfs_resop4));
5692                         kmem_free(resp->array,
5693                             args->array_len * sizeof (nfs_resop4));
5694
5695                         resp->array_len =  i + 1;
5696                         resp->array = new_res;
5697                 }
5698         }
5699
5700         rw_exit(&exported_lock);
5701
5702         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5703             COMPOUND4res *, resp);
5704
5705         if (cs.vp)
5706                 VN_RELE(cs.vp);
5707         if (cs.saved_vp)
5708                 VN_RELE(cs.saved_vp);
5709         if (cs.saved_fh.nfs_fh4_val)
5710                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5711
5712         if (cs.basecr)
5713                 crfree(cs.basecr);
5714         if (cs.cr)
5715                 crfree(cs.cr);
5716 }
5717
5718 /*
5719  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5720  * XXX zero out the tag and array values. Need to investigate why the
5721  * XXX calls occur, but at least prevent the panic for now.
5722  */
5723 void
5724 rfs4_compound_free(COMPOUND4res *resp)
5725 {
5726         uint_t i;
5727
5728         if (resp->tag.utf8string_val) {
5729                 UTF8STRING_FREE(resp->tag)
5730         }
5731
5732         for (i = 0; i < resp->array_len; i++) {
5733                 nfs_resop4 *resop;
5734                 uint_t op;
5735
5736                 resop = &resp->array[i];
5737                 op = (uint_t)resop->resop;
5738                 if (op < rfsv4disp_cnt) {
5739                         (*rfsv4disptab[op].dis_resfree)(resop);
5740                 }
5741         }
5742         if (resp->array != NULL) {
5743                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5744         }
5745 }
5746
5747 /*
5748  * Process the value of the compound request rpc flags, as a bit-AND
5749  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5750  */
5751 void
5752 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5753 {
5754         int i;
5755         int flag = RPC_ALL;
5756
5757         for (i = 0; flag && i < args->array_len; i++) {
5758                 uint_t op;
5759
5760                 op = (uint_t)args->array[i].argop;
5761
5762                 if (op < rfsv4disp_cnt)
5763                         flag &= rfsv4disptab[op].dis_flags;
5764                 else
5765                         flag = 0;
5766         }
5767         *flagp = flag;
5768 }
5769
5770 nfsstat4
5771 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5772 {
5773         nfsstat4 e;
5774
5775         rfs4_dbe_lock(cp->rc_dbe);
5776
5777         if (cp->rc_sysidt != LM_NOSYSID) {
5778                 *sp = cp->rc_sysidt;
5779                 e = NFS4_OK;
5780
5781         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5782                 *sp = cp->rc_sysidt;
5783                 e = NFS4_OK;
5784
5785                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5786                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
5787         } else
5788                 e = NFS4ERR_DELAY;
5789
5790         rfs4_dbe_unlock(cp->rc_dbe);
5791         return (e);
5792 }
5793
5794 #if defined(DEBUG) && ! defined(lint)
5795 static void lock_print(char *str, int operation, struct flock64 *flk)
5796 {
5797         char *op, *type;
5798
5799         switch (operation) {
5800         case F_GETLK: op = "F_GETLK";
5801                 break;
5802         case F_SETLK: op = "F_SETLK";
5803                 break;
5804         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
5805                 break;
5806         default: op = "F_UNKNOWN";
5807                 break;
5808         }
5809         switch (flk->l_type) {
5810         case F_UNLCK: type = "F_UNLCK";
5811                 break;
5812         case F_RDLCK: type = "F_RDLCK";
5813                 break;
5814         case F_WRLCK: type = "F_WRLCK";
5815                 break;
5816         default: type = "F_UNKNOWN";
5817                 break;
5818         }
5819
5820         ASSERT(flk->l_whence == 0);
5821         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5822             str, op, type, (longlong_t)flk->l_start,
5823             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
5824 }
5825
5826 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5827 #else
5828 #define LOCK_PRINT(d, s, t, f)
5829 #endif
5830
5831 /*ARGSUSED*/
5832 static bool_t
5833 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5834 {
5835         return (TRUE);
5836 }
5837
5838 /*
5839  * Look up the pathname using the vp in cs as the directory vnode.
5840  * cs->vp will be the vnode for the file on success
5841  */
5842
5843 static nfsstat4
5844 rfs4_lookup(component4 *component, struct svc_req *req,
5845     struct compound_state *cs)
5846 {
5847         char *nm;
5848         uint32_t len;
5849         nfsstat4 status;
5850         struct sockaddr *ca;
5851         char *name;
5852
5853         if (cs->vp == NULL) {
5854                 return (NFS4ERR_NOFILEHANDLE);
5855         }
5856         if (cs->vp->v_type != VDIR) {
5857                 return (NFS4ERR_NOTDIR);
5858         }
5859
5860         status = utf8_dir_verify(component);
5861         if (status != NFS4_OK)
5862                 return (status);
5863
5864         nm = utf8_to_fn(component, &len, NULL);
5865         if (nm == NULL) {
5866                 return (NFS4ERR_INVAL);
5867         }
5868
5869         if (len > MAXNAMELEN) {
5870                 kmem_free(nm, len);
5871                 return (NFS4ERR_NAMETOOLONG);
5872         }
5873
5874         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
5875         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
5876             MAXPATHLEN + 1);
5877
5878         if (name == NULL) {
5879                 kmem_free(nm, len);
5880                 return (NFS4ERR_INVAL);
5881         }
5882
5883         status = do_rfs4_op_lookup(name, req, cs);
5884
5885         if (name != nm)
5886                 kmem_free(name, MAXPATHLEN + 1);
5887
5888         kmem_free(nm, len);
5889
5890         return (status);
5891 }
5892
5893 static nfsstat4
5894 rfs4_lookupfile(component4 *component, struct svc_req *req,
5895     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
5896 {
5897         nfsstat4 status;
5898         vnode_t *dvp = cs->vp;
5899         vattr_t bva, ava, fva;
5900         int error;
5901
5902         /* Get "before" change value */
5903         bva.va_mask = AT_CTIME|AT_SEQ;
5904         error = fop_getattr(dvp, &bva, 0, cs->cr, NULL);
5905         if (error)
5906                 return (puterrno4(error));
5907
5908         /* rfs4_lookup may VN_RELE directory */
5909         VN_HOLD(dvp);
5910
5911         status = rfs4_lookup(component, req, cs);
5912         if (status != NFS4_OK) {
5913                 VN_RELE(dvp);
5914                 return (status);
5915         }
5916
5917         /*
5918          * Get "after" change value, if it fails, simply return the
5919          * before value.
5920          */
5921         ava.va_mask = AT_CTIME|AT_SEQ;
5922         if (fop_getattr(dvp, &ava, 0, cs->cr, NULL)) {
5923                 ava.va_ctime = bva.va_ctime;
5924                 ava.va_seq = 0;
5925         }
5926         VN_RELE(dvp);
5927
5928         /*
5929          * Validate the file is a file
5930          */
5931         fva.va_mask = AT_TYPE|AT_MODE;
5932         error = fop_getattr(cs->vp, &fva, 0, cs->cr, NULL);
5933         if (error)
5934                 return (puterrno4(error));
5935
5936         if (fva.va_type != VREG) {
5937                 if (fva.va_type == VDIR)
5938                         return (NFS4ERR_ISDIR);
5939                 if (fva.va_type == VLNK)
5940                         return (NFS4ERR_SYMLINK);
5941                 return (NFS4ERR_INVAL);
5942         }
5943
5944         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5945         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5946
5947         /*
5948          * It is undefined if fop_lookup will change va_seq, so
5949          * cinfo.atomic = TRUE only if we have
5950          * non-zero va_seq's, and they have not changed.
5951          */
5952         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5953                 cinfo->atomic = TRUE;
5954         else
5955                 cinfo->atomic = FALSE;
5956
5957         /* Check for mandatory locking */
5958         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5959         return (check_open_access(access, cs, req));
5960 }
5961
5962 static nfsstat4
5963 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5964     cred_t *cr, vnode_t **vpp, bool_t *created)
5965 {
5966         int error;
5967         nfsstat4 status = NFS4_OK;
5968         vattr_t va;
5969
5970 tryagain:
5971
5972         /*
5973          * The file open mode used is VWRITE.  If the client needs
5974          * some other semantic, then it should do the access checking
5975          * itself.  It would have been nice to have the file open mode
5976          * passed as part of the arguments.
5977          */
5978
5979         *created = TRUE;
5980         error = fop_create(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
5981
5982         if (error) {
5983                 *created = FALSE;
5984
5985                 /*
5986                  * If we got something other than file already exists
5987                  * then just return this error.  Otherwise, we got
5988                  * EEXIST.  If we were doing a GUARDED create, then
5989                  * just return this error.  Otherwise, we need to
5990                  * make sure that this wasn't a duplicate of an
5991                  * exclusive create request.
5992                  *
5993                  * The assumption is made that a non-exclusive create
5994                  * request will never return EEXIST.
5995                  */
5996
5997                 if (error != EEXIST || mode == GUARDED4) {
5998                         status = puterrno4(error);
5999                         return (status);
6000                 }
6001                 error = fop_lookup(dvp, nm, vpp, NULL, 0, NULL, cr,
6002                     NULL, NULL, NULL);
6003
6004                 if (error) {
6005                         /*
6006                          * We couldn't find the file that we thought that
6007                          * we just created.  So, we'll just try creating
6008                          * it again.
6009                          */
6010                         if (error == ENOENT)
6011                                 goto tryagain;
6012
6013                         status = puterrno4(error);
6014                         return (status);
6015                 }
6016
6017                 if (mode == UNCHECKED4) {
6018                         /* existing object must be regular file */
6019                         if ((*vpp)->v_type != VREG) {
6020                                 if ((*vpp)->v_type == VDIR)
6021                                         status = NFS4ERR_ISDIR;
6022                                 else if ((*vpp)->v_type == VLNK)
6023                                         status = NFS4ERR_SYMLINK;
6024                                 else
6025                                         status = NFS4ERR_INVAL;
6026                                 VN_RELE(*vpp);
6027                                 return (status);
6028                         }
6029
6030                         return (NFS4_OK);
6031                 }
6032
6033                 /* Check for duplicate request */
6034                 va.va_mask = AT_MTIME;
6035                 error = fop_getattr(*vpp, &va, 0, cr, NULL);
6036                 if (!error) {
6037                         /* We found the file */
6038                         const timestruc_t *mtime = &vap->va_mtime;
6039
6040                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6041                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6042                                 /* but its not our creation */
6043                                 VN_RELE(*vpp);
6044                                 return (NFS4ERR_EXIST);
6045                         }
6046                         *created = TRUE; /* retrans of create == created */
6047                         return (NFS4_OK);
6048                 }
6049                 VN_RELE(*vpp);
6050                 return (NFS4ERR_EXIST);
6051         }
6052
6053         return (NFS4_OK);
6054 }
6055
6056 static nfsstat4
6057 check_open_access(uint32_t access, struct compound_state *cs,
6058     struct svc_req *req)
6059 {
6060         int error;
6061         vnode_t *vp;
6062         bool_t readonly;
6063         cred_t *cr = cs->cr;
6064
6065         /* For now we don't allow mandatory locking as per V2/V3 */
6066         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6067                 return (NFS4ERR_ACCESS);
6068         }
6069
6070         vp = cs->vp;
6071         ASSERT(cr != NULL && vp->v_type == VREG);
6072
6073         /*
6074          * If the file system is exported read only and we are trying
6075          * to open for write, then return NFS4ERR_ROFS
6076          */
6077
6078         readonly = rdonly4(req, cs);
6079
6080         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6081                 return (NFS4ERR_ROFS);
6082
6083         if (access & OPEN4_SHARE_ACCESS_READ) {
6084                 if ((fop_access(vp, VREAD, 0, cr, NULL) != 0) &&
6085                     (fop_access(vp, VEXEC, 0, cr, NULL) != 0)) {
6086                         return (NFS4ERR_ACCESS);
6087                 }
6088         }
6089
6090         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6091                 error = fop_access(vp, VWRITE, 0, cr, NULL);
6092                 if (error)
6093                         return (NFS4ERR_ACCESS);
6094         }
6095
6096         return (NFS4_OK);
6097 }
6098
6099 static nfsstat4
6100 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6101     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6102 {
6103         struct nfs4_svgetit_arg sarg;
6104         struct nfs4_ntov_table ntov;
6105
6106         bool_t ntov_table_init = FALSE;
6107         struct statvfs64 sb;
6108         nfsstat4 status;
6109         vnode_t *vp;
6110         vattr_t bva, ava, iva, cva, *vap;
6111         vnode_t *dvp;
6112         timespec32_t *mtime;
6113         char *nm = NULL;
6114         uint_t buflen;
6115         bool_t created;
6116         bool_t setsize = FALSE;
6117         len_t reqsize;
6118         int error;
6119         bool_t trunc;
6120         caller_context_t ct;
6121         component4 *component;
6122         struct sockaddr *ca;
6123         char *name = NULL;
6124
6125         sarg.sbp = &sb;
6126         sarg.is_referral = B_FALSE;
6127
6128         dvp = cs->vp;
6129
6130         /* Check if the file system is read only */
6131         if (rdonly4(req, cs))
6132                 return (NFS4ERR_ROFS);
6133
6134         /*
6135          * Get the last component of path name in nm. cs will reference
6136          * the including directory on success.
6137          */
6138         component = &args->open_claim4_u.file;
6139         status = utf8_dir_verify(component);
6140         if (status != NFS4_OK)
6141                 return (status);
6142
6143         nm = utf8_to_fn(component, &buflen, NULL);
6144
6145         if (nm == NULL)
6146                 return (NFS4ERR_RESOURCE);
6147
6148         if (buflen > MAXNAMELEN) {
6149                 kmem_free(nm, buflen);
6150                 return (NFS4ERR_NAMETOOLONG);
6151         }
6152
6153         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6154         error = fop_getattr(dvp, &bva, 0, cs->cr, NULL);
6155         if (error) {
6156                 kmem_free(nm, buflen);
6157                 return (puterrno4(error));
6158         }
6159
6160         if (bva.va_type != VDIR) {
6161                 kmem_free(nm, buflen);
6162                 return (NFS4ERR_NOTDIR);
6163         }
6164
6165         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6166
6167         switch (args->mode) {
6168         case GUARDED4:
6169                 /*FALLTHROUGH*/
6170         case UNCHECKED4:
6171                 nfs4_ntov_table_init(&ntov);
6172                 ntov_table_init = TRUE;
6173
6174                 *attrset = 0;
6175                 status = do_rfs4_set_attrs(attrset,
6176                     &args->createhow4_u.createattrs,
6177                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6178
6179                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6180                     sarg.vap->va_type != VREG) {
6181                         if (sarg.vap->va_type == VDIR)
6182                                 status = NFS4ERR_ISDIR;
6183                         else if (sarg.vap->va_type == VLNK)
6184                                 status = NFS4ERR_SYMLINK;
6185                         else
6186                                 status = NFS4ERR_INVAL;
6187                 }
6188
6189                 if (status != NFS4_OK) {
6190                         kmem_free(nm, buflen);
6191                         nfs4_ntov_table_free(&ntov, &sarg);
6192                         *attrset = 0;
6193                         return (status);
6194                 }
6195
6196                 vap = sarg.vap;
6197                 vap->va_type = VREG;
6198                 vap->va_mask |= AT_TYPE;
6199
6200                 if ((vap->va_mask & AT_MODE) == 0) {
6201                         vap->va_mask |= AT_MODE;
6202                         vap->va_mode = (mode_t)0600;
6203                 }
6204
6205                 if (vap->va_mask & AT_SIZE) {
6206
6207                         /* Disallow create with a non-zero size */
6208
6209                         if ((reqsize = sarg.vap->va_size) != 0) {
6210                                 kmem_free(nm, buflen);
6211                                 nfs4_ntov_table_free(&ntov, &sarg);
6212                                 *attrset = 0;
6213                                 return (NFS4ERR_INVAL);
6214                         }
6215                         setsize = TRUE;
6216                 }
6217                 break;
6218
6219         case EXCLUSIVE4:
6220                 /* prohibit EXCL create of named attributes */
6221                 if (dvp->v_flag & V_XATTRDIR) {
6222                         kmem_free(nm, buflen);
6223                         *attrset = 0;
6224                         return (NFS4ERR_INVAL);
6225                 }
6226
6227                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6228                 cva.va_type = VREG;
6229                 /*
6230                  * Ensure no time overflows. Assumes underlying
6231                  * filesystem supports at least 32 bits.
6232                  * Truncate nsec to usec resolution to allow valid
6233                  * compares even if the underlying filesystem truncates.
6234                  */
6235                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6236                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6237                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6238                 cva.va_mode = (mode_t)0;
6239                 vap = &cva;
6240
6241                 /*
6242                  * For EXCL create, attrset is set to the server attr
6243                  * used to cache the client's verifier.
6244                  */
6245                 *attrset = FATTR4_TIME_MODIFY_MASK;
6246                 break;
6247         }
6248
6249         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6250         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6251             MAXPATHLEN  + 1);
6252
6253         if (name == NULL) {
6254                 kmem_free(nm, buflen);
6255                 return (NFS4ERR_SERVERFAULT);
6256         }
6257
6258         status = create_vnode(dvp, name, vap, args->mode,
6259             cs->cr, &vp, &created);
6260         if (nm != name)
6261                 kmem_free(name, MAXPATHLEN + 1);
6262         kmem_free(nm, buflen);
6263
6264         if (status != NFS4_OK) {
6265                 if (ntov_table_init)
6266                         nfs4_ntov_table_free(&ntov, &sarg);
6267                 *attrset = 0;
6268                 return (status);
6269         }
6270
6271         trunc = (setsize && !created);
6272
6273         if (args->mode != EXCLUSIVE4) {
6274                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6275
6276                 /*
6277                  * True verification that object was created with correct
6278                  * attrs is impossible.  The attrs could have been changed
6279                  * immediately after object creation.  If attributes did
6280                  * not verify, the only recourse for the server is to
6281                  * destroy the object.  Maybe if some attrs (like gid)
6282                  * are set incorrectly, the object should be destroyed;
6283                  * however, seems bad as a default policy.  Do we really
6284                  * want to destroy an object over one of the times not
6285                  * verifying correctly?  For these reasons, the server
6286                  * currently sets bits in attrset for createattrs
6287                  * that were set; however, no verification is done.
6288                  *
6289                  * vmask_to_nmask accounts for vattr bits set on create
6290                  *      [do_rfs4_set_attrs() only sets resp bits for
6291                  *       non-vattr/vfs bits.]
6292                  * Mask off any bits we set by default so as not to return
6293                  * more attrset bits than were requested in createattrs
6294                  */
6295                 if (created) {
6296                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6297                         *attrset &= createmask;
6298                 } else {
6299                         /*
6300                          * We did not create the vnode (we tried but it
6301                          * already existed).  In this case, the only createattr
6302                          * that the spec allows the server to set is size,
6303                          * and even then, it can only be set if it is 0.
6304                          */
6305                         *attrset = 0;
6306                         if (trunc)
6307                                 *attrset = FATTR4_SIZE_MASK;
6308                 }
6309         }
6310         if (ntov_table_init)
6311                 nfs4_ntov_table_free(&ntov, &sarg);
6312
6313         /*
6314          * Get the initial "after" sequence number, if it fails,
6315          * set to zero, time to before.
6316          */
6317         iva.va_mask = AT_CTIME|AT_SEQ;
6318         if (fop_getattr(dvp, &iva, 0, cs->cr, NULL)) {
6319                 iva.va_seq = 0;
6320                 iva.va_ctime = bva.va_ctime;
6321         }
6322
6323         /*
6324          * create_vnode attempts to create the file exclusive,
6325          * if it already exists the fop_create will fail and
6326          * may not increase va_seq. It is atomic if
6327          * we haven't changed the directory, but if it has changed
6328          * we don't know what changed it.
6329          */
6330         if (!created) {
6331                 if (bva.va_seq && iva.va_seq &&
6332                     bva.va_seq == iva.va_seq)
6333                         cinfo->atomic = TRUE;
6334                 else
6335                         cinfo->atomic = FALSE;
6336                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6337         } else {
6338                 /*
6339                  * The entry was created, we need to sync the
6340                  * directory metadata.
6341                  */
6342                 (void) fop_fsync(dvp, 0, cs->cr, NULL);
6343
6344                 /*
6345                  * Get "after" change value, if it fails, simply return the
6346                  * before value.
6347                  */
6348                 ava.va_mask = AT_CTIME|AT_SEQ;
6349                 if (fop_getattr(dvp, &ava, 0, cs->cr, NULL)) {
6350                         ava.va_ctime = bva.va_ctime;
6351                         ava.va_seq = 0;
6352                 }
6353
6354                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6355
6356                 /*
6357                  * The cinfo->atomic = TRUE only if we have
6358                  * non-zero va_seq's, and it has incremented by exactly one
6359                  * during the create_vnode and it didn't
6360                  * change during the fop_fsync.
6361                  */
6362                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6363                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6364                         cinfo->atomic = TRUE;
6365                 else
6366                         cinfo->atomic = FALSE;
6367         }
6368
6369         /* Check for mandatory locking and that the size gets set. */
6370         cva.va_mask = AT_MODE;
6371         if (setsize)
6372                 cva.va_mask |= AT_SIZE;
6373
6374         /* Assume the worst */
6375         cs->mandlock = TRUE;
6376
6377         if (fop_getattr(vp, &cva, 0, cs->cr, NULL) == 0) {
6378                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6379
6380                 /*
6381                  * Truncate the file if necessary; this would be
6382                  * the case for create over an existing file.
6383                  */
6384
6385                 if (trunc) {
6386                         int in_crit = 0;
6387                         rfs4_file_t *fp;
6388                         bool_t create = FALSE;
6389
6390                         /*
6391                          * We are writing over an existing file.
6392                          * Check to see if we need to recall a delegation.
6393                          */
6394                         rfs4_hold_deleg_policy();
6395                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6396                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6397                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6398                                         rfs4_file_rele(fp);
6399                                         rfs4_rele_deleg_policy();
6400                                         VN_RELE(vp);
6401                                         *attrset = 0;
6402                                         return (NFS4ERR_DELAY);
6403                                 }
6404                                 rfs4_file_rele(fp);
6405                         }
6406                         rfs4_rele_deleg_policy();
6407
6408                         if (nbl_need_check(vp)) {
6409                                 in_crit = 1;
6410
6411                                 ASSERT(reqsize == 0);
6412
6413                                 nbl_start_crit(vp, RW_READER);
6414                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6415                                     cva.va_size, 0, NULL)) {
6416                                         in_crit = 0;
6417                                         nbl_end_crit(vp);
6418                                         VN_RELE(vp);
6419                                         *attrset = 0;
6420                                         return (NFS4ERR_ACCESS);
6421                                 }
6422                         }
6423                         ct.cc_sysid = 0;
6424                         ct.cc_pid = 0;
6425                         ct.cc_caller_id = nfs4_srv_caller_id;
6426                         ct.cc_flags = CC_DONTBLOCK;
6427
6428                         cva.va_mask = AT_SIZE;
6429                         cva.va_size = reqsize;
6430                         (void) fop_setattr(vp, &cva, 0, cs->cr, &ct);
6431                         if (in_crit)
6432                                 nbl_end_crit(vp);
6433                 }
6434         }
6435
6436         error = makefh4(&cs->fh, vp, cs->exi);
6437
6438         /*
6439          * Force modified data and metadata out to stable storage.
6440          */
6441         (void) fop_fsync(vp, FNODSYNC, cs->cr, NULL);
6442
6443         if (error) {
6444                 VN_RELE(vp);
6445                 *attrset = 0;
6446                 return (puterrno4(error));
6447         }
6448
6449         /* if parent dir is attrdir, set namedattr fh flag */
6450         if (dvp->v_flag & V_XATTRDIR)
6451                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6452
6453         if (cs->vp)
6454                 VN_RELE(cs->vp);
6455
6456         cs->vp = vp;
6457
6458         /*
6459          * if we did not create the file, we will need to check
6460          * the access bits on the file
6461          */
6462
6463         if (!created) {
6464                 if (setsize)
6465                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6466                 status = check_open_access(args->share_access, cs, req);
6467                 if (status != NFS4_OK)
6468                         *attrset = 0;
6469         }
6470         return (status);
6471 }
6472
6473 /*ARGSUSED*/
6474 static void
6475 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6476     rfs4_openowner_t *oo, delegreq_t deleg,
6477     uint32_t access, uint32_t deny,
6478     OPEN4res *resp, int deleg_cur)
6479 {
6480         /* XXX Currently not using req  */
6481         rfs4_state_t *sp;
6482         rfs4_file_t *fp;
6483         bool_t screate = TRUE;
6484         bool_t fcreate = TRUE;
6485         uint32_t open_a, share_a;
6486         uint32_t open_d, share_d;
6487         rfs4_deleg_state_t *dsp;
6488         sysid_t sysid;
6489         nfsstat4 status;
6490         caller_context_t ct;
6491         int fflags = 0;
6492         int recall = 0;
6493         int err;
6494         int first_open;
6495
6496         /* get the file struct and hold a lock on it during initial open */
6497         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6498         if (fp == NULL) {
6499                 resp->status = NFS4ERR_RESOURCE;
6500                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6501                 return;
6502         }
6503
6504         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6505         if (sp == NULL) {
6506                 resp->status = NFS4ERR_RESOURCE;
6507                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6508                 /* No need to keep any reference */
6509                 rw_exit(&fp->rf_file_rwlock);
6510                 rfs4_file_rele(fp);
6511                 return;
6512         }
6513
6514         /* try to get the sysid before continuing */
6515         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6516                 resp->status = status;
6517                 rfs4_file_rele(fp);
6518                 /* Not a fully formed open; "close" it */
6519                 if (screate == TRUE)
6520                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6521                 rfs4_state_rele(sp);
6522                 return;
6523         }
6524
6525         /* Calculate the fflags for this OPEN. */
6526         if (access & OPEN4_SHARE_ACCESS_READ)
6527                 fflags |= FREAD;
6528         if (access & OPEN4_SHARE_ACCESS_WRITE)
6529                 fflags |= FWRITE;
6530
6531         rfs4_dbe_lock(sp->rs_dbe);
6532
6533         /*
6534          * Calculate the new deny and access mode that this open is adding to
6535          * the file for this open owner;
6536          */
6537         open_d = (deny & ~sp->rs_open_deny);
6538         open_a = (access & ~sp->rs_open_access);
6539
6540         /*
6541          * Calculate the new share access and share deny modes that this open
6542          * is adding to the file for this open owner;
6543          */
6544         share_a = (access & ~sp->rs_share_access);
6545         share_d = (deny & ~sp->rs_share_deny);
6546
6547         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6548
6549         /*
6550          * Check to see the client has already sent an open for this
6551          * open owner on this file with the same share/deny modes.
6552          * If so, we don't need to check for a conflict and we don't
6553          * need to add another shrlock.  If not, then we need to
6554          * check for conflicts in deny and access before checking for
6555          * conflicts in delegation.  We don't want to recall a
6556          * delegation based on an open that will eventually fail based
6557          * on shares modes.
6558          */
6559
6560         if (share_a || share_d) {
6561                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6562                         rfs4_dbe_unlock(sp->rs_dbe);
6563                         resp->status = err;
6564
6565                         rfs4_file_rele(fp);
6566                         /* Not a fully formed open; "close" it */
6567                         if (screate == TRUE)
6568                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6569                         rfs4_state_rele(sp);
6570                         return;
6571                 }
6572         }
6573
6574         rfs4_dbe_lock(fp->rf_dbe);
6575
6576         /*
6577          * Check to see if this file is delegated and if so, if a
6578          * recall needs to be done.
6579          */
6580         if (rfs4_check_recall(sp, access)) {
6581                 rfs4_dbe_unlock(fp->rf_dbe);
6582                 rfs4_dbe_unlock(sp->rs_dbe);
6583                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6584                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6585                 rfs4_dbe_lock(sp->rs_dbe);
6586
6587                 /* if state closed while lock was dropped */
6588                 if (sp->rs_closed) {
6589                         if (share_a || share_d)
6590                                 (void) rfs4_unshare(sp);
6591                         rfs4_dbe_unlock(sp->rs_dbe);
6592                         rfs4_file_rele(fp);
6593                         /* Not a fully formed open; "close" it */
6594                         if (screate == TRUE)
6595                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6596                         rfs4_state_rele(sp);
6597                         resp->status = NFS4ERR_OLD_STATEID;
6598                         return;
6599                 }
6600
6601                 rfs4_dbe_lock(fp->rf_dbe);
6602                 /* Let's see if the delegation was returned */
6603                 if (rfs4_check_recall(sp, access)) {
6604                         rfs4_dbe_unlock(fp->rf_dbe);
6605                         if (share_a || share_d)
6606                                 (void) rfs4_unshare(sp);
6607                         rfs4_dbe_unlock(sp->rs_dbe);
6608                         rfs4_file_rele(fp);
6609                         rfs4_update_lease(sp->rs_owner->ro_client);
6610
6611                         /* Not a fully formed open; "close" it */
6612                         if (screate == TRUE)
6613                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6614                         rfs4_state_rele(sp);
6615                         resp->status = NFS4ERR_DELAY;
6616                         return;
6617                 }
6618         }
6619         /*
6620          * the share check passed and any delegation conflict has been
6621          * taken care of, now call vop_open.
6622          * if this is the first open then call vop_open with fflags.
6623          * if not, call vn_open_upgrade with just the upgrade flags.
6624          *
6625          * if the file has been opened already, it will have the current
6626          * access mode in the state struct.  if it has no share access, then
6627          * this is a new open.
6628          *
6629          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6630          * call fop_open(), just do the open upgrade.
6631          */
6632         if (first_open && !deleg_cur) {
6633                 ct.cc_sysid = sysid;
6634                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6635                 ct.cc_caller_id = nfs4_srv_caller_id;
6636                 ct.cc_flags = CC_DONTBLOCK;
6637                 err = fop_open(&cs->vp, fflags, cs->cr, &ct);
6638                 if (err) {
6639                         rfs4_dbe_unlock(fp->rf_dbe);
6640                         if (share_a || share_d)
6641                                 (void) rfs4_unshare(sp);
6642                         rfs4_dbe_unlock(sp->rs_dbe);
6643                         rfs4_file_rele(fp);
6644
6645                         /* Not a fully formed open; "close" it */
6646                         if (screate == TRUE)
6647                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6648                         rfs4_state_rele(sp);
6649                         /* check if a monitor detected a delegation conflict */
6650                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6651                                 resp->status = NFS4ERR_DELAY;
6652                         else
6653                                 resp->status = NFS4ERR_SERVERFAULT;
6654                         return;
6655                 }
6656         } else { /* open upgrade */
6657                 /*
6658                  * calculate the fflags for the new mode that is being added
6659                  * by this upgrade.
6660                  */
6661                 fflags = 0;
6662                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6663                         fflags |= FREAD;
6664                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6665                         fflags |= FWRITE;
6666                 vn_open_upgrade(cs->vp, fflags);
6667         }
6668         sp->rs_open_access |= access;
6669         sp->rs_open_deny |= deny;
6670
6671         if (open_d & OPEN4_SHARE_DENY_READ)
6672                 fp->rf_deny_read++;
6673         if (open_d & OPEN4_SHARE_DENY_WRITE)
6674                 fp->rf_deny_write++;
6675         fp->rf_share_deny |= deny;
6676
6677         if (open_a & OPEN4_SHARE_ACCESS_READ)
6678                 fp->rf_access_read++;
6679         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6680                 fp->rf_access_write++;
6681         fp->rf_share_access |= access;
6682
6683         /*
6684          * Check for delegation here. if the deleg argument is not
6685          * DELEG_ANY, then this is a reclaim from a client and
6686          * we must honor the delegation requested. If necessary we can
6687          * set the recall flag.
6688          */
6689
6690         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6691
6692         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6693
6694         next_stateid(&sp->rs_stateid);
6695
6696         resp->stateid = sp->rs_stateid.stateid;
6697
6698         rfs4_dbe_unlock(fp->rf_dbe);
6699         rfs4_dbe_unlock(sp->rs_dbe);
6700
6701         if (dsp) {
6702                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6703                 rfs4_deleg_state_rele(dsp);
6704         }
6705
6706         rfs4_file_rele(fp);
6707         rfs4_state_rele(sp);
6708
6709         resp->status = NFS4_OK;
6710 }
6711
6712 /*ARGSUSED*/
6713 static void
6714 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6715     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6716 {
6717         change_info4 *cinfo = &resp->cinfo;
6718         bitmap4 *attrset = &resp->attrset;
6719
6720         if (args->opentype == OPEN4_NOCREATE)
6721                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6722                     req, cs, args->share_access, cinfo);
6723         else {
6724                 /* inhibit delegation grants during exclusive create */
6725
6726                 if (args->mode == EXCLUSIVE4)
6727                         rfs4_disable_delegation();
6728
6729                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6730                     oo->ro_client->rc_clientid);
6731         }
6732
6733         if (resp->status == NFS4_OK) {
6734
6735                 /* cs->vp cs->fh now reference the desired file */
6736
6737                 rfs4_do_open(cs, req, oo,
6738                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6739                     args->share_access, args->share_deny, resp, 0);
6740
6741                 /*
6742                  * If rfs4_createfile set attrset, we must
6743                  * clear this attrset before the response is copied.
6744                  */
6745                 if (resp->status != NFS4_OK && resp->attrset) {
6746                         resp->attrset = 0;
6747                 }
6748         }
6749         else
6750                 *cs->statusp = resp->status;
6751
6752         if (args->mode == EXCLUSIVE4)
6753                 rfs4_enable_delegation();
6754 }
6755
6756 /*ARGSUSED*/
6757 static void
6758 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6759     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6760 {
6761         change_info4 *cinfo = &resp->cinfo;
6762         vattr_t va;
6763         vtype_t v_type = cs->vp->v_type;
6764         int error = 0;
6765
6766         /* Verify that we have a regular file */
6767         if (v_type != VREG) {
6768                 if (v_type == VDIR)
6769                         resp->status = NFS4ERR_ISDIR;
6770                 else if (v_type == VLNK)
6771                         resp->status = NFS4ERR_SYMLINK;
6772                 else
6773                         resp->status = NFS4ERR_INVAL;
6774                 return;
6775         }
6776
6777         va.va_mask = AT_MODE|AT_UID;
6778         error = fop_getattr(cs->vp, &va, 0, cs->cr, NULL);
6779         if (error) {
6780                 resp->status = puterrno4(error);
6781                 return;
6782         }
6783
6784         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6785
6786         /*
6787          * Check if we have access to the file, Note the the file
6788          * could have originally been open UNCHECKED or GUARDED
6789          * with mode bits that will now fail, but there is nothing
6790          * we can really do about that except in the case that the
6791          * owner of the file is the one requesting the open.
6792          */
6793         if (crgetuid(cs->cr) != va.va_uid) {
6794                 resp->status = check_open_access(args->share_access, cs, req);
6795                 if (resp->status != NFS4_OK) {
6796                         return;
6797                 }
6798         }
6799
6800         /*
6801          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6802          */
6803         cinfo->before = 0;
6804         cinfo->after = 0;
6805         cinfo->atomic = FALSE;
6806
6807         rfs4_do_open(cs, req, oo,
6808             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6809             args->share_access, args->share_deny, resp, 0);
6810 }
6811
6812 static void
6813 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6814     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6815 {
6816         int error;
6817         nfsstat4 status;
6818         stateid4 stateid =
6819             args->open_claim4_u.delegate_cur_info.delegate_stateid;
6820         rfs4_deleg_state_t *dsp;
6821
6822         /*
6823          * Find the state info from the stateid and confirm that the
6824          * file is delegated.  If the state openowner is the same as
6825          * the supplied openowner we're done. If not, get the file
6826          * info from the found state info. Use that file info to
6827          * create the state for this lock owner. Note solaris doen't
6828          * really need the pathname to find the file. We may want to
6829          * lookup the pathname and make sure that the vp exist and
6830          * matches the vp in the file structure. However it is
6831          * possible that the pathname nolonger exists (local process
6832          * unlinks the file), so this may not be that useful.
6833          */
6834
6835         status = rfs4_get_deleg_state(&stateid, &dsp);
6836         if (status != NFS4_OK) {
6837                 resp->status = status;
6838                 return;
6839         }
6840
6841         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
6842
6843         /*
6844          * New lock owner, create state. Since this was probably called
6845          * in response to a CB_RECALL we set deleg to DELEG_NONE
6846          */
6847
6848         ASSERT(cs->vp != NULL);
6849         VN_RELE(cs->vp);
6850         VN_HOLD(dsp->rds_finfo->rf_vp);
6851         cs->vp = dsp->rds_finfo->rf_vp;
6852
6853         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6854                 rfs4_deleg_state_rele(dsp);
6855                 *cs->statusp = resp->status = puterrno4(error);
6856                 return;
6857         }
6858
6859         /* Mark progress for delegation returns */
6860         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
6861         rfs4_deleg_state_rele(dsp);
6862         rfs4_do_open(cs, req, oo, DELEG_NONE,
6863             args->share_access, args->share_deny, resp, 1);
6864 }
6865
6866 /*ARGSUSED*/
6867 static void
6868 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6869     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6870 {
6871         /*
6872          * Lookup the pathname, it must already exist since this file
6873          * was delegated.
6874          *
6875          * Find the file and state info for this vp and open owner pair.
6876          *      check that they are in fact delegated.
6877          *      check that the state access and deny modes are the same.
6878          *
6879          * Return the delgation possibly seting the recall flag.
6880          */
6881         rfs4_file_t *fp;
6882         rfs4_state_t *sp;
6883         bool_t create = FALSE;
6884         bool_t dcreate = FALSE;
6885         rfs4_deleg_state_t *dsp;
6886         nfsace4 *ace;
6887
6888         /* Note we ignore oflags */
6889         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6890             req, cs, args->share_access, &resp->cinfo);
6891
6892         if (resp->status != NFS4_OK) {
6893                 return;
6894         }
6895
6896         /* get the file struct and hold a lock on it during initial open */
6897         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
6898         if (fp == NULL) {
6899                 resp->status = NFS4ERR_RESOURCE;
6900                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
6901                 return;
6902         }
6903
6904         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
6905         if (sp == NULL) {
6906                 resp->status = NFS4ERR_SERVERFAULT;
6907                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
6908                 rw_exit(&fp->rf_file_rwlock);
6909                 rfs4_file_rele(fp);
6910                 return;
6911         }
6912
6913         rfs4_dbe_lock(sp->rs_dbe);
6914         rfs4_dbe_lock(fp->rf_dbe);
6915         if (args->share_access != sp->rs_share_access ||
6916             args->share_deny != sp->rs_share_deny ||
6917             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
6918                 NFS4_DEBUG(rfs4_debug,
6919                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6920                 rfs4_dbe_unlock(fp->rf_dbe);
6921                 rfs4_dbe_unlock(sp->rs_dbe);
6922                 rfs4_file_rele(fp);
6923                 rfs4_state_rele(sp);
6924                 resp->status = NFS4ERR_SERVERFAULT;
6925                 return;
6926         }
6927         rfs4_dbe_unlock(fp->rf_dbe);
6928         rfs4_dbe_unlock(sp->rs_dbe);
6929
6930         dsp = rfs4_finddeleg(sp, &dcreate);
6931         if (dsp == NULL) {
6932                 rfs4_state_rele(sp);
6933                 rfs4_file_rele(fp);
6934                 resp->status = NFS4ERR_SERVERFAULT;
6935                 return;
6936         }
6937
6938         next_stateid(&sp->rs_stateid);
6939
6940         resp->stateid = sp->rs_stateid.stateid;
6941
6942         resp->delegation.delegation_type = dsp->rds_dtype;
6943
6944         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
6945                 open_read_delegation4 *rv =
6946                     &resp->delegation.open_delegation4_u.read;
6947
6948                 rv->stateid = dsp->rds_delegid.stateid;
6949                 rv->recall = FALSE; /* no policy in place to set to TRUE */
6950                 ace = &rv->permissions;
6951         } else {
6952                 open_write_delegation4 *rv =
6953                     &resp->delegation.open_delegation4_u.write;
6954
6955                 rv->stateid = dsp->rds_delegid.stateid;
6956                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
6957                 ace = &rv->permissions;
6958                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
6959                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6960         }
6961
6962         /* XXX For now */
6963         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6964         ace->flag = 0;
6965         ace->access_mask = 0;
6966         ace->who.utf8string_len = 0;
6967         ace->who.utf8string_val = 0;
6968
6969         rfs4_deleg_state_rele(dsp);
6970         rfs4_state_rele(sp);
6971         rfs4_file_rele(fp);
6972 }
6973
6974 typedef enum {
6975         NFS4_CHKSEQ_OKAY = 0,
6976         NFS4_CHKSEQ_REPLAY = 1,
6977         NFS4_CHKSEQ_BAD = 2
6978 } rfs4_chkseq_t;
6979
6980 /*
6981  * Generic function for sequence number checks.
6982  */
6983 static rfs4_chkseq_t
6984 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6985     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6986 {
6987         /* Same sequence ids and matching operations? */
6988         if (seqid == rqst_seq && resop->resop == lastop->resop) {
6989                 if (copyres == TRUE) {
6990                         rfs4_free_reply(resop);
6991                         rfs4_copy_reply(resop, lastop);
6992                 }
6993                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6994                     "Replayed SEQID %d\n", seqid));
6995                 return (NFS4_CHKSEQ_REPLAY);
6996         }
6997
6998         /* If the incoming sequence is not the next expected then it is bad */
6999         if (rqst_seq != seqid + 1) {
7000                 if (rqst_seq == seqid) {
7001                         NFS4_DEBUG(rfs4_debug,
7002                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7003                             "but last op was %d current op is %d\n",
7004                             lastop->resop, resop->resop));
7005                         return (NFS4_CHKSEQ_BAD);
7006                 }
7007                 NFS4_DEBUG(rfs4_debug,
7008                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7009                     rqst_seq, seqid));
7010                 return (NFS4_CHKSEQ_BAD);
7011         }
7012
7013         /* Everything okay -- next expected */
7014         return (NFS4_CHKSEQ_OKAY);
7015 }
7016
7017
7018 static rfs4_chkseq_t
7019 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7020 {
7021         rfs4_chkseq_t rc;
7022
7023         rfs4_dbe_lock(op->ro_dbe);
7024         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7025             TRUE);
7026         rfs4_dbe_unlock(op->ro_dbe);
7027
7028         if (rc == NFS4_CHKSEQ_OKAY)
7029                 rfs4_update_lease(op->ro_client);
7030
7031         return (rc);
7032 }
7033
7034 static rfs4_chkseq_t
7035 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7036 {
7037         rfs4_chkseq_t rc;
7038
7039         rfs4_dbe_lock(op->ro_dbe);
7040         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7041             olo_seqid, resop, FALSE);
7042         rfs4_dbe_unlock(op->ro_dbe);
7043
7044         return (rc);
7045 }
7046
7047 static rfs4_chkseq_t
7048 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7049 {
7050         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7051
7052         rfs4_dbe_lock(lsp->rls_dbe);
7053         if (!lsp->rls_skip_seqid_check)
7054                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7055                     resop, TRUE);
7056         rfs4_dbe_unlock(lsp->rls_dbe);
7057
7058         return (rc);
7059 }
7060
7061 static void
7062 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7063     struct svc_req *req, struct compound_state *cs)
7064 {
7065         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7066         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7067         open_owner4 *owner = &args->owner;
7068         open_claim_type4 claim = args->claim;
7069         rfs4_client_t *cp;
7070         rfs4_openowner_t *oo;
7071         bool_t create;
7072         bool_t replay = FALSE;
7073         int can_reclaim;
7074
7075         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7076             OPEN4args *, args);
7077
7078         if (cs->vp == NULL) {
7079                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7080                 goto end;
7081         }
7082
7083         /*
7084          * Need to check clientid and lease expiration first based on
7085          * error ordering and incrementing sequence id.
7086          */
7087         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7088         if (cp == NULL) {
7089                 *cs->statusp = resp->status =
7090                     rfs4_check_clientid(&owner->clientid, 0);
7091                 goto end;
7092         }
7093
7094         if (rfs4_lease_expired(cp)) {
7095                 rfs4_client_close(cp);
7096                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7097                 goto end;
7098         }
7099         can_reclaim = cp->rc_can_reclaim;
7100
7101         /*
7102          * Find the open_owner for use from this point forward.  Take
7103          * care in updating the sequence id based on the type of error
7104          * being returned.
7105          */
7106 retry:
7107         create = TRUE;
7108         oo = rfs4_findopenowner(owner, &create, args->seqid);
7109         if (oo == NULL) {
7110                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7111                 rfs4_client_rele(cp);
7112                 goto end;
7113         }
7114
7115         /* Hold off access to the sequence space while the open is done */
7116         rfs4_sw_enter(&oo->ro_sw);
7117
7118         /*
7119          * If the open_owner existed before at the server, then check
7120          * the sequence id.
7121          */
7122         if (!create && !oo->ro_postpone_confirm) {
7123                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7124                 case NFS4_CHKSEQ_BAD:
7125                         if ((args->seqid > oo->ro_open_seqid) &&
7126                             oo->ro_need_confirm) {
7127                                 rfs4_free_opens(oo, TRUE, FALSE);
7128                                 rfs4_sw_exit(&oo->ro_sw);
7129                                 rfs4_openowner_rele(oo);
7130                                 goto retry;
7131                         }
7132                         resp->status = NFS4ERR_BAD_SEQID;
7133                         goto out;
7134                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7135                         replay = TRUE;
7136                         goto out;
7137                 default:
7138                         break;
7139                 }
7140
7141                 /*
7142                  * Sequence was ok and open owner exists
7143                  * check to see if we have yet to see an
7144                  * open_confirm.
7145                  */
7146                 if (oo->ro_need_confirm) {
7147                         rfs4_free_opens(oo, TRUE, FALSE);
7148                         rfs4_sw_exit(&oo->ro_sw);
7149                         rfs4_openowner_rele(oo);
7150                         goto retry;
7151                 }
7152         }
7153         /* Grace only applies to regular-type OPENs */
7154         if (rfs4_clnt_in_grace(cp) &&
7155             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7156                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7157                 goto out;
7158         }
7159
7160         /*
7161          * If previous state at the server existed then can_reclaim
7162          * will be set. If not reply NFS4ERR_NO_GRACE to the
7163          * client.
7164          */
7165         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7166                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7167                 goto out;
7168         }
7169
7170
7171         /*
7172          * Reject the open if the client has missed the grace period
7173          */
7174         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7175                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7176                 goto out;
7177         }
7178
7179         /* Couple of up-front bookkeeping items */
7180         if (oo->ro_need_confirm) {
7181                 /*
7182                  * If this is a reclaim OPEN then we should not ask
7183                  * for a confirmation of the open_owner per the
7184                  * protocol specification.
7185                  */
7186                 if (claim == CLAIM_PREVIOUS)
7187                         oo->ro_need_confirm = FALSE;
7188                 else
7189                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7190         }
7191         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7192
7193         /*
7194          * If there is an unshared filesystem mounted on this vnode,
7195          * do not allow to open/create in this directory.
7196          */
7197         if (vn_ismntpt(cs->vp)) {
7198                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7199                 goto out;
7200         }
7201
7202         /*
7203          * access must READ, WRITE, or BOTH.  No access is invalid.
7204          * deny can be READ, WRITE, BOTH, or NONE.
7205          * bits not defined for access/deny are invalid.
7206          */
7207         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7208             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7209             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7210                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7211                 goto out;
7212         }
7213
7214
7215         /*
7216          * make sure attrset is zero before response is built.
7217          */
7218         resp->attrset = 0;
7219
7220         switch (claim) {
7221         case CLAIM_NULL:
7222                 rfs4_do_opennull(cs, req, args, oo, resp);
7223                 break;
7224         case CLAIM_PREVIOUS:
7225                 rfs4_do_openprev(cs, req, args, oo, resp);
7226                 break;
7227         case CLAIM_DELEGATE_CUR:
7228                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7229                 break;
7230         case CLAIM_DELEGATE_PREV:
7231                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7232                 break;
7233         default:
7234                 resp->status = NFS4ERR_INVAL;
7235                 break;
7236         }
7237
7238 out:
7239         rfs4_client_rele(cp);
7240
7241         /* Catch sequence id handling here to make it a little easier */
7242         switch (resp->status) {
7243         case NFS4ERR_BADXDR:
7244         case NFS4ERR_BAD_SEQID:
7245         case NFS4ERR_BAD_STATEID:
7246         case NFS4ERR_NOFILEHANDLE:
7247         case NFS4ERR_RESOURCE:
7248         case NFS4ERR_STALE_CLIENTID:
7249         case NFS4ERR_STALE_STATEID:
7250                 /*
7251                  * The protocol states that if any of these errors are
7252                  * being returned, the sequence id should not be
7253                  * incremented.  Any other return requires an
7254                  * increment.
7255                  */
7256                 break;
7257         default:
7258                 /* Always update the lease in this case */
7259                 rfs4_update_lease(oo->ro_client);
7260
7261                 /* Regular response - copy the result */
7262                 if (!replay)
7263                         rfs4_update_open_resp(oo, resop, &cs->fh);
7264
7265                 /*
7266                  * REPLAY case: Only if the previous response was OK
7267                  * do we copy the filehandle.  If not OK, no
7268                  * filehandle to copy.
7269                  */
7270                 if (replay == TRUE &&
7271                     resp->status == NFS4_OK &&
7272                     oo->ro_reply_fh.nfs_fh4_val) {
7273                         /*
7274                          * If this is a replay, we must restore the
7275                          * current filehandle/vp to that of what was
7276                          * returned originally.  Try our best to do
7277                          * it.
7278                          */
7279                         nfs_fh4_fmt_t *fh_fmtp =
7280                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7281
7282                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7283                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7284
7285                         if (cs->exi == NULL) {
7286                                 resp->status = NFS4ERR_STALE;
7287                                 goto finish;
7288                         }
7289
7290                         VN_RELE(cs->vp);
7291
7292                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7293                             &resp->status);
7294
7295                         if (cs->vp == NULL)
7296                                 goto finish;
7297
7298                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7299                 }
7300
7301                 /*
7302                  * If this was a replay, no need to update the
7303                  * sequence id. If the open_owner was not created on
7304                  * this pass, then update.  The first use of an
7305                  * open_owner will not bump the sequence id.
7306                  */
7307                 if (replay == FALSE && !create)
7308                         rfs4_update_open_sequence(oo);
7309                 /*
7310                  * If the client is receiving an error and the
7311                  * open_owner needs to be confirmed, there is no way
7312                  * to notify the client of this fact ignoring the fact
7313                  * that the server has no method of returning a
7314                  * stateid to confirm.  Therefore, the server needs to
7315                  * mark this open_owner in a way as to avoid the
7316                  * sequence id checking the next time the client uses
7317                  * this open_owner.
7318                  */
7319                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7320                         oo->ro_postpone_confirm = TRUE;
7321                 /*
7322                  * If OK response then clear the postpone flag and
7323                  * reset the sequence id to keep in sync with the
7324                  * client.
7325                  */
7326                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7327                         oo->ro_postpone_confirm = FALSE;
7328                         oo->ro_open_seqid = args->seqid;
7329                 }
7330                 break;
7331         }
7332
7333 finish:
7334         *cs->statusp = resp->status;
7335
7336         rfs4_sw_exit(&oo->ro_sw);
7337         rfs4_openowner_rele(oo);
7338
7339 end:
7340         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7341             OPEN4res *, resp);
7342 }
7343
7344 /*ARGSUSED*/
7345 void
7346 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7347     struct svc_req *req, struct compound_state *cs)
7348 {
7349         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7350         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7351         rfs4_state_t *sp;
7352         nfsstat4 status;
7353
7354         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7355             OPEN_CONFIRM4args *, args);
7356
7357         if (cs->vp == NULL) {
7358                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7359                 goto out;
7360         }
7361
7362         if (cs->vp->v_type != VREG) {
7363                 *cs->statusp = resp->status =
7364                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7365                 return;
7366         }
7367
7368         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7369         if (status != NFS4_OK) {
7370                 *cs->statusp = resp->status = status;
7371                 goto out;
7372         }
7373
7374         /* Ensure specified filehandle matches */
7375         if (cs->vp != sp->rs_finfo->rf_vp) {
7376                 rfs4_state_rele(sp);
7377                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7378                 goto out;
7379         }
7380
7381         /* hold off other access to open_owner while we tinker */
7382         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7383
7384         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7385         case NFS4_CHECK_STATEID_OKAY:
7386                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7387                     resop) != 0) {
7388                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7389                         break;
7390                 }
7391                 /*
7392                  * If it is the appropriate stateid and determined to
7393                  * be "OKAY" then this means that the stateid does not
7394                  * need to be confirmed and the client is in error for
7395                  * sending an OPEN_CONFIRM.
7396                  */
7397                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7398                 break;
7399         case NFS4_CHECK_STATEID_OLD:
7400                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7401                 break;
7402         case NFS4_CHECK_STATEID_BAD:
7403                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7404                 break;
7405         case NFS4_CHECK_STATEID_EXPIRED:
7406                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7407                 break;
7408         case NFS4_CHECK_STATEID_CLOSED:
7409                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7410                 break;
7411         case NFS4_CHECK_STATEID_REPLAY:
7412                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7413                     resop)) {
7414                 case NFS4_CHKSEQ_OKAY:
7415                         /*
7416                          * This is replayed stateid; if seqid matches
7417                          * next expected, then client is using wrong seqid.
7418                          */
7419                         /* fall through */
7420                 case NFS4_CHKSEQ_BAD:
7421                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7422                         break;
7423                 case NFS4_CHKSEQ_REPLAY:
7424                         /*
7425                          * Note this case is the duplicate case so
7426                          * resp->status is already set.
7427                          */
7428                         *cs->statusp = resp->status;
7429                         rfs4_update_lease(sp->rs_owner->ro_client);
7430                         break;
7431                 }
7432                 break;
7433         case NFS4_CHECK_STATEID_UNCONFIRMED:
7434                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7435                     resop) != NFS4_CHKSEQ_OKAY) {
7436                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7437                         break;
7438                 }
7439                 *cs->statusp = resp->status = NFS4_OK;
7440
7441                 next_stateid(&sp->rs_stateid);
7442                 resp->open_stateid = sp->rs_stateid.stateid;
7443                 sp->rs_owner->ro_need_confirm = FALSE;
7444                 rfs4_update_lease(sp->rs_owner->ro_client);
7445                 rfs4_update_open_sequence(sp->rs_owner);
7446                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7447                 break;
7448         default:
7449                 ASSERT(FALSE);
7450                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7451                 break;
7452         }
7453         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7454         rfs4_state_rele(sp);
7455
7456 out:
7457         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7458             OPEN_CONFIRM4res *, resp);
7459 }
7460
7461 /*ARGSUSED*/
7462 void
7463 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7464     struct svc_req *req, struct compound_state *cs)
7465 {
7466         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7467         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7468         uint32_t access = args->share_access;
7469         uint32_t deny = args->share_deny;
7470         nfsstat4 status;
7471         rfs4_state_t *sp;
7472         rfs4_file_t *fp;
7473         int fflags = 0;
7474
7475         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7476             OPEN_DOWNGRADE4args *, args);
7477
7478         if (cs->vp == NULL) {
7479                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7480                 goto out;
7481         }
7482
7483         if (cs->vp->v_type != VREG) {
7484                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7485                 return;
7486         }
7487
7488         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7489         if (status != NFS4_OK) {
7490                 *cs->statusp = resp->status = status;
7491                 goto out;
7492         }
7493
7494         /* Ensure specified filehandle matches */
7495         if (cs->vp != sp->rs_finfo->rf_vp) {
7496                 rfs4_state_rele(sp);
7497                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7498                 goto out;
7499         }
7500
7501         /* hold off other access to open_owner while we tinker */
7502         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7503
7504         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7505         case NFS4_CHECK_STATEID_OKAY:
7506                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7507                     resop) != NFS4_CHKSEQ_OKAY) {
7508                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7509                         goto end;
7510                 }
7511                 break;
7512         case NFS4_CHECK_STATEID_OLD:
7513                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7514                 goto end;
7515         case NFS4_CHECK_STATEID_BAD:
7516                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7517                 goto end;
7518         case NFS4_CHECK_STATEID_EXPIRED:
7519                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7520                 goto end;
7521         case NFS4_CHECK_STATEID_CLOSED:
7522                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7523                 goto end;
7524         case NFS4_CHECK_STATEID_UNCONFIRMED:
7525                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7526                 goto end;
7527         case NFS4_CHECK_STATEID_REPLAY:
7528                 /* Check the sequence id for the open owner */
7529                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7530                     resop)) {
7531                 case NFS4_CHKSEQ_OKAY:
7532                         /*
7533                          * This is replayed stateid; if seqid matches
7534                          * next expected, then client is using wrong seqid.
7535                          */
7536                         /* fall through */
7537                 case NFS4_CHKSEQ_BAD:
7538                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7539                         goto end;
7540                 case NFS4_CHKSEQ_REPLAY:
7541                         /*
7542                          * Note this case is the duplicate case so
7543                          * resp->status is already set.
7544                          */
7545                         *cs->statusp = resp->status;
7546                         rfs4_update_lease(sp->rs_owner->ro_client);
7547                         goto end;
7548                 }
7549                 break;
7550         default:
7551                 ASSERT(FALSE);
7552                 break;
7553         }
7554
7555         rfs4_dbe_lock(sp->rs_dbe);
7556         /*
7557          * Check that the new access modes and deny modes are valid.
7558          * Check that no invalid bits are set.
7559          */
7560         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7561             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7562                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7563                 rfs4_update_open_sequence(sp->rs_owner);
7564                 rfs4_dbe_unlock(sp->rs_dbe);
7565                 goto end;
7566         }
7567
7568         /*
7569          * The new modes must be a subset of the current modes and
7570          * the access must specify at least one mode. To test that
7571          * the new mode is a subset of the current modes we bitwise
7572          * AND them together and check that the result equals the new
7573          * mode. For example:
7574          * New mode, access == R and current mode, sp->rs_open_access  == RW
7575          * access & sp->rs_open_access == R == access, so the new access mode
7576          * is valid. Consider access == RW, sp->rs_open_access = R
7577          * access & sp->rs_open_access == R != access, so the new access mode
7578          * is invalid.
7579          */
7580         if ((access & sp->rs_open_access) != access ||
7581             (deny & sp->rs_open_deny) != deny ||
7582             (access &
7583             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7584                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7585                 rfs4_update_open_sequence(sp->rs_owner);
7586                 rfs4_dbe_unlock(sp->rs_dbe);
7587                 goto end;
7588         }
7589
7590         /*
7591          * Release any share locks associated with this stateID.
7592          * Strictly speaking, this violates the spec because the
7593          * spec effectively requires that open downgrade be atomic.
7594          * At present, fs_shrlock does not have this capability.
7595          */
7596         (void) rfs4_unshare(sp);
7597
7598         status = rfs4_share(sp, access, deny);
7599         if (status != NFS4_OK) {
7600                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7601                 rfs4_update_open_sequence(sp->rs_owner);
7602                 rfs4_dbe_unlock(sp->rs_dbe);
7603                 goto end;
7604         }
7605
7606         fp = sp->rs_finfo;
7607         rfs4_dbe_lock(fp->rf_dbe);
7608
7609         /*
7610          * If the current mode has deny read and the new mode
7611          * does not, decrement the number of deny read mode bits
7612          * and if it goes to zero turn off the deny read bit
7613          * on the file.
7614          */
7615         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7616             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7617                 fp->rf_deny_read--;
7618                 if (fp->rf_deny_read == 0)
7619                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7620         }
7621
7622         /*
7623          * If the current mode has deny write and the new mode
7624          * does not, decrement the number of deny write mode bits
7625          * and if it goes to zero turn off the deny write bit
7626          * on the file.
7627          */
7628         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7629             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7630                 fp->rf_deny_write--;
7631                 if (fp->rf_deny_write == 0)
7632                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7633         }
7634
7635         /*
7636          * If the current mode has access read and the new mode
7637          * does not, decrement the number of access read mode bits
7638          * and if it goes to zero turn off the access read bit
7639          * on the file.  set fflags to FREAD for the call to
7640          * vn_open_downgrade().
7641          */
7642         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7643             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7644                 fp->rf_access_read--;
7645                 if (fp->rf_access_read == 0)
7646                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7647                 fflags |= FREAD;
7648         }
7649
7650         /*
7651          * If the current mode has access write and the new mode
7652          * does not, decrement the number of access write mode bits
7653          * and if it goes to zero turn off the access write bit
7654          * on the file.  set fflags to FWRITE for the call to
7655          * vn_open_downgrade().
7656          */
7657         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7658             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7659                 fp->rf_access_write--;
7660                 if (fp->rf_access_write == 0)
7661                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7662                 fflags |= FWRITE;
7663         }
7664
7665         /* Check that the file is still accessible */
7666         ASSERT(fp->rf_share_access);
7667
7668         rfs4_dbe_unlock(fp->rf_dbe);
7669
7670         /* now set the new open access and deny modes */
7671         sp->rs_open_access = access;
7672         sp->rs_open_deny = deny;
7673
7674         /*
7675          * we successfully downgraded the share lock, now we need to downgrade
7676          * the open. it is possible that the downgrade was only for a deny
7677          * mode and we have nothing else to do.
7678          */
7679         if ((fflags & (FREAD|FWRITE)) != 0)
7680                 vn_open_downgrade(cs->vp, fflags);
7681
7682         /* Update the stateid */
7683         next_stateid(&sp->rs_stateid);
7684         resp->open_stateid = sp->rs_stateid.stateid;
7685
7686         rfs4_dbe_unlock(sp->rs_dbe);
7687
7688         *cs->statusp = resp->status = NFS4_OK;
7689         /* Update the lease */
7690         rfs4_update_lease(sp->rs_owner->ro_client);
7691         /* And the sequence */
7692         rfs4_update_open_sequence(sp->rs_owner);
7693         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7694
7695 end:
7696         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7697         rfs4_state_rele(sp);
7698 out:
7699         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7700             OPEN_DOWNGRADE4res *, resp);
7701 }
7702
7703 static void *
7704 memstr(const void *s1, const char *s2, size_t n)
7705 {
7706         size_t l = strlen(s2);
7707         char *p = (char *)s1;
7708
7709         while (n >= l) {
7710                 if (bcmp(p, s2, l) == 0)
7711                         return (p);
7712                 p++;
7713                 n--;
7714         }
7715
7716         return (NULL);
7717 }
7718
7719 /*
7720  * The logic behind this function is detailed in the NFSv4 RFC in the
7721  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7722  * that section for explicit guidance to server behavior for
7723  * SETCLIENTID.
7724  */
7725 void
7726 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7727     struct svc_req *req, struct compound_state *cs)
7728 {
7729         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7730         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7731         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7732         rfs4_clntip_t *ci;
7733         bool_t create;
7734         char *addr, *netid;
7735         int len;
7736
7737         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7738             SETCLIENTID4args *, args);
7739 retry:
7740         newcp = cp_confirmed = cp_unconfirmed = NULL;
7741
7742         /*
7743          * Save the caller's IP address
7744          */
7745         args->client.cl_addr =
7746             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7747
7748         /*
7749          * Record if it is a Solaris client that cannot handle referrals.
7750          */
7751         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7752             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7753                 /* Add a "yes, it's downrev" record */
7754                 create = TRUE;
7755                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7756                 ASSERT(ci != NULL);
7757                 rfs4_dbe_rele(ci->ri_dbe);
7758         } else {
7759                 /* Remove any previous record */
7760                 rfs4_invalidate_clntip(args->client.cl_addr);
7761         }
7762
7763         /*
7764          * In search of an EXISTING client matching the incoming
7765          * request to establish a new client identifier at the server
7766          */
7767         create = TRUE;
7768         cp = rfs4_findclient(&args->client, &create, NULL);
7769
7770         /* Should never happen */
7771         ASSERT(cp != NULL);
7772
7773         if (cp == NULL) {
7774                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7775                 goto out;
7776         }
7777
7778         /*
7779          * Easiest case. Client identifier is newly created and is
7780          * unconfirmed.  Also note that for this case, no other
7781          * entries exist for the client identifier.  Nothing else to
7782          * check.  Just setup the response and respond.
7783          */
7784         if (create) {
7785                 *cs->statusp = res->status = NFS4_OK;
7786                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7787                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7788                     cp->rc_confirm_verf;
7789                 /* Setup callback information; CB_NULL confirmation later */
7790                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7791
7792                 rfs4_client_rele(cp);
7793                 goto out;
7794         }
7795
7796         /*
7797          * An existing, confirmed client may exist but it may not have
7798          * been active for at least one lease period.  If so, then
7799          * "close" the client and create a new client identifier
7800          */
7801         if (rfs4_lease_expired(cp)) {
7802                 rfs4_client_close(cp);
7803                 goto retry;
7804         }
7805
7806         if (cp->rc_need_confirm == TRUE)
7807                 cp_unconfirmed = cp;
7808         else
7809                 cp_confirmed = cp;
7810
7811         cp = NULL;
7812
7813         /*
7814          * We have a confirmed client, now check for an
7815          * unconfimred entry
7816          */
7817         if (cp_confirmed) {
7818                 /* If creds don't match then client identifier is inuse */
7819                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
7820                         rfs4_cbinfo_t *cbp;
7821                         /*
7822                          * Some one else has established this client
7823                          * id. Try and say * who they are. We will use
7824                          * the call back address supplied by * the
7825                          * first client.
7826                          */
7827                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7828
7829                         addr = netid = NULL;
7830
7831                         cbp = &cp_confirmed->rc_cbinfo;
7832                         if (cbp->cb_callback.cb_location.r_addr &&
7833                             cbp->cb_callback.cb_location.r_netid) {
7834                                 cb_client4 *cbcp = &cbp->cb_callback;
7835
7836                                 len = strlen(cbcp->cb_location.r_addr)+1;
7837                                 addr = kmem_alloc(len, KM_SLEEP);
7838                                 bcopy(cbcp->cb_location.r_addr, addr, len);
7839                                 len = strlen(cbcp->cb_location.r_netid)+1;
7840                                 netid = kmem_alloc(len, KM_SLEEP);
7841                                 bcopy(cbcp->cb_location.r_netid, netid, len);
7842                         }
7843
7844                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
7845                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
7846
7847                         rfs4_client_rele(cp_confirmed);
7848                 }
7849
7850                 /*
7851                  * Confirmed, creds match, and verifier matches; must
7852                  * be an update of the callback info
7853                  */
7854                 if (cp_confirmed->rc_nfs_client.verifier ==
7855                     args->client.verifier) {
7856                         /* Setup callback information */
7857                         rfs4_client_setcb(cp_confirmed, &args->callback,
7858                             args->callback_ident);
7859
7860                         /* everything okay -- move ahead */
7861                         *cs->statusp = res->status = NFS4_OK;
7862                         res->SETCLIENTID4res_u.resok4.clientid =
7863                             cp_confirmed->rc_clientid;
7864
7865                         /* update the confirm_verifier and return it */
7866                         rfs4_client_scv_next(cp_confirmed);
7867                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7868                             cp_confirmed->rc_confirm_verf;
7869
7870                         rfs4_client_rele(cp_confirmed);
7871                         goto out;
7872                 }
7873
7874                 /*
7875                  * Creds match but the verifier doesn't.  Must search
7876                  * for an unconfirmed client that would be replaced by
7877                  * this request.
7878                  */
7879                 create = FALSE;
7880                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
7881                     cp_confirmed);
7882         }
7883
7884         /*
7885          * At this point, we have taken care of the brand new client
7886          * struct, INUSE case, update of an existing, and confirmed
7887          * client struct.
7888          */
7889
7890         /*
7891          * check to see if things have changed while we originally
7892          * picked up the client struct.  If they have, then return and
7893          * retry the processing of this SETCLIENTID request.
7894          */
7895         if (cp_unconfirmed) {
7896                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
7897                 if (!cp_unconfirmed->rc_need_confirm) {
7898                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
7899                         rfs4_client_rele(cp_unconfirmed);
7900                         if (cp_confirmed)
7901                                 rfs4_client_rele(cp_confirmed);
7902                         goto retry;
7903                 }
7904                 /* do away with the old unconfirmed one */
7905                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
7906                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
7907                 rfs4_client_rele(cp_unconfirmed);
7908                 cp_unconfirmed = NULL;
7909         }
7910
7911         /*
7912          * This search will temporarily hide the confirmed client
7913          * struct while a new client struct is created as the
7914          * unconfirmed one.
7915          */
7916         create = TRUE;
7917         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7918
7919         ASSERT(newcp != NULL);
7920
7921         if (newcp == NULL) {
7922                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7923                 rfs4_client_rele(cp_confirmed);
7924                 goto out;
7925         }
7926
7927         /*
7928          * If one was not created, then a similar request must be in
7929          * process so release and start over with this one
7930          */
7931         if (create != TRUE) {
7932                 rfs4_client_rele(newcp);
7933                 if (cp_confirmed)
7934                         rfs4_client_rele(cp_confirmed);
7935                 goto retry;
7936         }
7937
7938         *cs->statusp = res->status = NFS4_OK;
7939         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
7940         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7941             newcp->rc_confirm_verf;
7942         /* Setup callback information; CB_NULL confirmation later */
7943         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
7944
7945         newcp->rc_cp_confirmed = cp_confirmed;
7946
7947         rfs4_client_rele(newcp);
7948
7949 out:
7950         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
7951             SETCLIENTID4res *, res);
7952 }
7953
7954 /*ARGSUSED*/
7955 void
7956 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7957     struct svc_req *req, struct compound_state *cs)
7958 {
7959         SETCLIENTID_CONFIRM4args *args =
7960             &argop->nfs_argop4_u.opsetclientid_confirm;
7961         SETCLIENTID_CONFIRM4res *res =
7962             &resop->nfs_resop4_u.opsetclientid_confirm;
7963         rfs4_client_t *cp, *cptoclose = NULL;
7964
7965         DTRACE_NFSV4_2(op__setclientid__confirm__start,
7966             struct compound_state *, cs,
7967             SETCLIENTID_CONFIRM4args *, args);
7968
7969         *cs->statusp = res->status = NFS4_OK;
7970
7971         cp = rfs4_findclient_by_id(args->clientid, TRUE);
7972
7973         if (cp == NULL) {
7974                 *cs->statusp = res->status =
7975                     rfs4_check_clientid(&args->clientid, 1);
7976                 goto out;
7977         }
7978
7979         if (!creds_ok(cp, req, cs)) {
7980                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7981                 rfs4_client_rele(cp);
7982                 goto out;
7983         }
7984
7985         /* If the verifier doesn't match, the record doesn't match */
7986         if (cp->rc_confirm_verf != args->setclientid_confirm) {
7987                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7988                 rfs4_client_rele(cp);
7989                 goto out;
7990         }
7991
7992         rfs4_dbe_lock(cp->rc_dbe);
7993         cp->rc_need_confirm = FALSE;
7994         if (cp->rc_cp_confirmed) {
7995                 cptoclose = cp->rc_cp_confirmed;
7996                 cptoclose->rc_ss_remove = 1;
7997                 cp->rc_cp_confirmed = NULL;
7998         }
7999
8000         /*
8001          * Update the client's associated server instance, if it's changed
8002          * since the client was created.
8003          */
8004         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8005                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8006
8007         /*
8008          * Record clientid in stable storage.
8009          * Must be done after server instance has been assigned.
8010          */
8011         rfs4_ss_clid(cp);
8012
8013         rfs4_dbe_unlock(cp->rc_dbe);
8014
8015         if (cptoclose)
8016                 /* don't need to rele, client_close does it */
8017                 rfs4_client_close(cptoclose);
8018
8019         /* If needed, initiate CB_NULL call for callback path */
8020         rfs4_deleg_cb_check(cp);
8021         rfs4_update_lease(cp);
8022
8023         /*
8024          * Check to see if client can perform reclaims
8025          */
8026         rfs4_ss_chkclid(cp);
8027
8028         rfs4_client_rele(cp);
8029
8030 out:
8031         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8032             struct compound_state *, cs,
8033             SETCLIENTID_CONFIRM4 *, res);
8034 }
8035
8036
8037 /*ARGSUSED*/
8038 void
8039 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8040     struct svc_req *req, struct compound_state *cs)
8041 {
8042         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8043         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8044         rfs4_state_t *sp;
8045         nfsstat4 status;
8046
8047         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8048             CLOSE4args *, args);
8049
8050         if (cs->vp == NULL) {
8051                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8052                 goto out;
8053         }
8054
8055         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8056         if (status != NFS4_OK) {
8057                 *cs->statusp = resp->status = status;
8058                 goto out;
8059         }
8060
8061         /* Ensure specified filehandle matches */
8062         if (cs->vp != sp->rs_finfo->rf_vp) {
8063                 rfs4_state_rele(sp);
8064                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8065                 goto out;
8066         }
8067
8068         /* hold off other access to open_owner while we tinker */
8069         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8070
8071         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8072         case NFS4_CHECK_STATEID_OKAY:
8073                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8074                     resop) != NFS4_CHKSEQ_OKAY) {
8075                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8076                         goto end;
8077                 }
8078                 break;
8079         case NFS4_CHECK_STATEID_OLD:
8080                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8081                 goto end;
8082         case NFS4_CHECK_STATEID_BAD:
8083                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8084                 goto end;
8085         case NFS4_CHECK_STATEID_EXPIRED:
8086                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8087                 goto end;
8088         case NFS4_CHECK_STATEID_CLOSED:
8089                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8090                 goto end;
8091         case NFS4_CHECK_STATEID_UNCONFIRMED:
8092                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8093                 goto end;
8094         case NFS4_CHECK_STATEID_REPLAY:
8095                 /* Check the sequence id for the open owner */
8096                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8097                     resop)) {
8098                 case NFS4_CHKSEQ_OKAY:
8099                         /*
8100                          * This is replayed stateid; if seqid matches
8101                          * next expected, then client is using wrong seqid.
8102                          */
8103                         /* FALL THROUGH */
8104                 case NFS4_CHKSEQ_BAD:
8105                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8106                         goto end;
8107                 case NFS4_CHKSEQ_REPLAY:
8108                         /*
8109                          * Note this case is the duplicate case so
8110                          * resp->status is already set.
8111                          */
8112                         *cs->statusp = resp->status;
8113                         rfs4_update_lease(sp->rs_owner->ro_client);
8114                         goto end;
8115                 }
8116                 break;
8117         default:
8118                 ASSERT(FALSE);
8119                 break;
8120         }
8121
8122         rfs4_dbe_lock(sp->rs_dbe);
8123
8124         /* Update the stateid. */
8125         next_stateid(&sp->rs_stateid);
8126         resp->open_stateid = sp->rs_stateid.stateid;
8127
8128         rfs4_dbe_unlock(sp->rs_dbe);
8129
8130         rfs4_update_lease(sp->rs_owner->ro_client);
8131         rfs4_update_open_sequence(sp->rs_owner);
8132         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8133
8134         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8135
8136         *cs->statusp = resp->status = status;
8137
8138 end:
8139         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8140         rfs4_state_rele(sp);
8141 out:
8142         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8143             CLOSE4res *, resp);
8144 }
8145
8146 /*
8147  * Manage the counts on the file struct and close all file locks
8148  */
8149 /*ARGSUSED*/
8150 void
8151 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8152     bool_t close_of_client)
8153 {
8154         rfs4_file_t *fp = sp->rs_finfo;
8155         rfs4_lo_state_t *lsp;
8156         int fflags = 0;
8157
8158         /*
8159          * If this call is part of the larger closing down of client
8160          * state then it is just easier to release all locks
8161          * associated with this client instead of going through each
8162          * individual file and cleaning locks there.
8163          */
8164         if (close_of_client) {
8165                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8166                     !list_is_empty(&sp->rs_lostatelist) &&
8167                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8168                         /* Is the PxFS kernel module loaded? */
8169                         if (lm_remove_file_locks != NULL) {
8170                                 int new_sysid;
8171
8172                                 /* Encode the cluster nodeid in new sysid */
8173                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8174                                 lm_set_nlmid_flk(&new_sysid);
8175
8176                                 /*
8177                                  * This PxFS routine removes file locks for a
8178                                  * client over all nodes of a cluster.
8179                                  */
8180                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8181                                     "lm_remove_file_locks(sysid=0x%x)\n",
8182                                     new_sysid));
8183                                 (*lm_remove_file_locks)(new_sysid);
8184                         } else {
8185                                 struct flock64 flk;
8186
8187                                 /* Release all locks for this client */
8188                                 flk.l_type = F_UNLKSYS;
8189                                 flk.l_whence = 0;
8190                                 flk.l_start = 0;
8191                                 flk.l_len = 0;
8192                                 flk.l_sysid =
8193                                     sp->rs_owner->ro_client->rc_sysidt;
8194                                 flk.l_pid = 0;
8195                                 (void) fop_frlock(sp->rs_finfo->rf_vp, F_SETLK,
8196                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8197                                     0, NULL, CRED(), NULL);
8198                         }
8199
8200                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8201                 }
8202         }
8203
8204         /*
8205          * Release all locks on this file by this lock owner or at
8206          * least mark the locks as having been released
8207          */
8208         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8209             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8210                 lsp->rls_locks_cleaned = TRUE;
8211
8212                 /* Was this already taken care of above? */
8213                 if (!close_of_client &&
8214                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8215                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8216                             lsp->rls_locker->rl_pid,
8217                             lsp->rls_locker->rl_client->rc_sysidt);
8218         }
8219
8220         /*
8221          * Release any shrlocks associated with this open state ID.
8222          * This must be done before the rfs4_state gets marked closed.
8223          */
8224         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8225                 (void) rfs4_unshare(sp);
8226
8227         if (sp->rs_open_access) {
8228                 rfs4_dbe_lock(fp->rf_dbe);
8229
8230                 /*
8231                  * Decrement the count for each access and deny bit that this
8232                  * state has contributed to the file.
8233                  * If the file counts go to zero
8234                  * clear the appropriate bit in the appropriate mask.
8235                  */
8236                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8237                         fp->rf_access_read--;
8238                         fflags |= FREAD;
8239                         if (fp->rf_access_read == 0)
8240                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8241                 }
8242                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8243                         fp->rf_access_write--;
8244                         fflags |= FWRITE;
8245                         if (fp->rf_access_write == 0)
8246                                 fp->rf_share_access &=
8247                                     ~OPEN4_SHARE_ACCESS_WRITE;
8248                 }
8249                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8250                         fp->rf_deny_read--;
8251                         if (fp->rf_deny_read == 0)
8252                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8253                 }
8254                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8255                         fp->rf_deny_write--;
8256                         if (fp->rf_deny_write == 0)
8257                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8258                 }
8259
8260                 (void) fop_close(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8261
8262                 rfs4_dbe_unlock(fp->rf_dbe);
8263
8264                 sp->rs_open_access = 0;
8265                 sp->rs_open_deny = 0;
8266         }
8267 }
8268
8269 /*
8270  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8271  */
8272 static nfsstat4
8273 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8274 {
8275         rfs4_lockowner_t *lo;
8276         rfs4_client_t *cp;
8277         uint32_t len;
8278
8279         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8280         if (lo != NULL) {
8281                 cp = lo->rl_client;
8282                 if (rfs4_lease_expired(cp)) {
8283                         rfs4_lockowner_rele(lo);
8284                         rfs4_dbe_hold(cp->rc_dbe);
8285                         rfs4_client_close(cp);
8286                         return (NFS4ERR_EXPIRED);
8287                 }
8288                 dp->owner.clientid = lo->rl_owner.clientid;
8289                 len = lo->rl_owner.owner_len;
8290                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8291                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8292                 dp->owner.owner_len = len;
8293                 rfs4_lockowner_rele(lo);
8294                 goto finish;
8295         }
8296
8297         /*
8298          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8299          * of the client id contain the boot time for a NFS4 lock. So we
8300          * fabricate and identity by setting clientid to the sysid, and
8301          * the lock owner to the pid.
8302          */
8303         dp->owner.clientid = flk->l_sysid;
8304         len = sizeof (pid_t);
8305         dp->owner.owner_len = len;
8306         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8307         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8308 finish:
8309         dp->offset = flk->l_start;
8310         dp->length = flk->l_len;
8311
8312         if (flk->l_type == F_RDLCK)
8313                 dp->locktype = READ_LT;
8314         else if (flk->l_type == F_WRLCK)
8315                 dp->locktype = WRITE_LT;
8316         else
8317                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8318
8319         return (NFS4_OK);
8320 }
8321
8322 /*
8323  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8324  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8325  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8326  * for that (obviously); they are sending the LOCK requests with some delays
8327  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8328  * locking and delay implementation at the client side.
8329  *
8330  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8331  * fast retries on its own (the for loop below) in a hope the lock will be
8332  * available soon.  And if not, the client won't need to resend the LOCK
8333  * requests so fast to check the lock availability.  This basically saves some
8334  * network traffic and tries to make sure the client gets the lock ASAP.
8335  */
8336 static int
8337 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8338 {
8339         int error;
8340         struct flock64 flk;
8341         int i;
8342         clock_t delaytime;
8343         int cmd;
8344         int spin_cnt = 0;
8345
8346         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8347 retry:
8348         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8349
8350         for (i = 0; i < rfs4_maxlock_tries; i++) {
8351                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8352                 error = fop_frlock(vp, cmd,
8353                     flock, flag, 0, NULL, cred, NULL);
8354
8355                 if (error != EAGAIN && error != EACCES)
8356                         break;
8357
8358                 if (i < rfs4_maxlock_tries - 1) {
8359                         delay(delaytime);
8360                         delaytime *= 2;
8361                 }
8362         }
8363
8364         if (error == EAGAIN || error == EACCES) {
8365                 /* Get the owner of the lock */
8366                 flk = *flock;
8367                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8368                 if (fop_frlock(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8369                     NULL) == 0) {
8370                         /*
8371                          * There's a race inherent in the current fop_frlock
8372                          * design where:
8373                          * a: "other guy" takes a lock that conflicts with a
8374                          * lock we want
8375                          * b: we attempt to take our lock (non-blocking) and
8376                          * the attempt fails.
8377                          * c: "other guy" releases the conflicting lock
8378                          * d: we ask what lock conflicts with the lock we want,
8379                          * getting F_UNLCK (no lock blocks us)
8380                          *
8381                          * If we retry the non-blocking lock attempt in this
8382                          * case (restart at step 'b') there's some possibility
8383                          * that many such attempts might fail.  However a test
8384                          * designed to actually provoke this race shows that
8385                          * the vast majority of cases require no retry, and
8386                          * only a few took as many as three retries.  Here's
8387                          * the test outcome:
8388                          *
8389                          *         number of retries    how many times we needed
8390                          *                              that many retries
8391                          *         0                    79461
8392                          *         1                      862
8393                          *         2                       49
8394                          *         3                        5
8395                          *
8396                          * Given those empirical results, we arbitrarily limit
8397                          * the retry count to ten.
8398                          *
8399                          * If we actually make to ten retries and give up,
8400                          * nothing catastrophic happens, but we're unable to
8401                          * return the information about the conflicting lock to
8402                          * the NFS client.  That's an acceptable trade off vs.
8403                          * letting this retry loop run forever.
8404                          */
8405                         if (flk.l_type == F_UNLCK) {
8406                                 if (spin_cnt++ < 10) {
8407                                         /* No longer locked, retry */
8408                                         goto retry;
8409                                 }
8410                         } else {
8411                                 *flock = flk;
8412                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8413                                     F_GETLK, &flk);
8414                         }
8415                 }
8416         }
8417
8418         return (error);
8419 }
8420
8421 /*ARGSUSED*/
8422 static nfsstat4
8423 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8424     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8425 {
8426         nfsstat4 status;
8427         rfs4_lockowner_t *lo = lsp->rls_locker;
8428         rfs4_state_t *sp = lsp->rls_state;
8429         struct flock64 flock;
8430         int16_t ltype;
8431         int flag;
8432         int error;
8433         sysid_t sysid;
8434         LOCK4res *lres;
8435         vnode_t *vp;
8436
8437         if (rfs4_lease_expired(lo->rl_client)) {
8438                 return (NFS4ERR_EXPIRED);
8439         }
8440
8441         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8442                 return (status);
8443
8444         /* Check for zero length. To lock to end of file use all ones for V4 */
8445         if (length == 0)
8446                 return (NFS4ERR_INVAL);
8447         else if (length == (length4)(~0))
8448                 length = 0;             /* Posix to end of file  */
8449
8450 retry:
8451         rfs4_dbe_lock(sp->rs_dbe);
8452         if (sp->rs_closed == TRUE) {
8453                 rfs4_dbe_unlock(sp->rs_dbe);
8454                 return (NFS4ERR_OLD_STATEID);
8455         }
8456
8457         if (resop->resop != OP_LOCKU) {
8458                 switch (locktype) {
8459                 case READ_LT:
8460                 case READW_LT:
8461                         if ((sp->rs_share_access
8462                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8463                                 rfs4_dbe_unlock(sp->rs_dbe);
8464
8465                                 return (NFS4ERR_OPENMODE);
8466                         }
8467                         ltype = F_RDLCK;
8468                         break;
8469                 case WRITE_LT:
8470                 case WRITEW_LT:
8471                         if ((sp->rs_share_access
8472                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8473                                 rfs4_dbe_unlock(sp->rs_dbe);
8474
8475                                 return (NFS4ERR_OPENMODE);
8476                         }
8477                         ltype = F_WRLCK;
8478                         break;
8479                 }
8480         } else
8481                 ltype = F_UNLCK;
8482
8483         flock.l_type = ltype;
8484         flock.l_whence = 0;             /* SEEK_SET */
8485         flock.l_start = offset;
8486         flock.l_len = length;
8487         flock.l_sysid = sysid;
8488         flock.l_pid = lsp->rls_locker->rl_pid;
8489
8490         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8491         if (flock.l_len < 0 || flock.l_start < 0) {
8492                 rfs4_dbe_unlock(sp->rs_dbe);
8493                 return (NFS4ERR_INVAL);
8494         }
8495
8496         /*
8497          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8498          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8499          */
8500         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8501
8502         vp = sp->rs_finfo->rf_vp;
8503         VN_HOLD(vp);
8504
8505         /*
8506          * We need to unlock sp before we call the underlying filesystem to
8507          * acquire the file lock.
8508          */
8509         rfs4_dbe_unlock(sp->rs_dbe);
8510
8511         error = setlock(vp, &flock, flag, cred);
8512
8513         /*
8514          * Make sure the file is still open.  In a case the file was closed in
8515          * the meantime, clean the lock we acquired using the setlock() call
8516          * above, and return the appropriate error.
8517          */
8518         rfs4_dbe_lock(sp->rs_dbe);
8519         if (sp->rs_closed == TRUE) {
8520                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8521                 rfs4_dbe_unlock(sp->rs_dbe);
8522
8523                 VN_RELE(vp);
8524
8525                 return (NFS4ERR_OLD_STATEID);
8526         }
8527         rfs4_dbe_unlock(sp->rs_dbe);
8528
8529         VN_RELE(vp);
8530
8531         if (error == 0) {
8532                 rfs4_dbe_lock(lsp->rls_dbe);
8533                 next_stateid(&lsp->rls_lockid);
8534                 rfs4_dbe_unlock(lsp->rls_dbe);
8535         }
8536
8537         /*
8538          * N.B. We map error values to nfsv4 errors. This is differrent
8539          * than puterrno4 routine.
8540          */
8541         switch (error) {
8542         case 0:
8543                 status = NFS4_OK;
8544                 break;
8545         case EAGAIN:
8546         case EACCES:            /* Old value */
8547                 /* Can only get here if op is OP_LOCK */
8548                 ASSERT(resop->resop == OP_LOCK);
8549                 lres = &resop->nfs_resop4_u.oplock;
8550                 status = NFS4ERR_DENIED;
8551                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8552                     == NFS4ERR_EXPIRED)
8553                         goto retry;
8554                 break;
8555         case ENOLCK:
8556                 status = NFS4ERR_DELAY;
8557                 break;
8558         case EOVERFLOW:
8559                 status = NFS4ERR_INVAL;
8560                 break;
8561         case EINVAL:
8562                 status = NFS4ERR_NOTSUPP;
8563                 break;
8564         default:
8565                 status = NFS4ERR_SERVERFAULT;
8566                 break;
8567         }
8568
8569         return (status);
8570 }
8571
8572 /*ARGSUSED*/
8573 void
8574 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8575     struct svc_req *req, struct compound_state *cs)
8576 {
8577         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8578         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8579         nfsstat4 status;
8580         stateid4 *stateid;
8581         rfs4_lockowner_t *lo;
8582         rfs4_client_t *cp;
8583         rfs4_state_t *sp = NULL;
8584         rfs4_lo_state_t *lsp = NULL;
8585         bool_t ls_sw_held = FALSE;
8586         bool_t create = TRUE;
8587         bool_t lcreate = TRUE;
8588         bool_t dup_lock = FALSE;
8589         int rc;
8590
8591         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8592             LOCK4args *, args);
8593
8594         if (cs->vp == NULL) {
8595                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8596                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8597                     cs, LOCK4res *, resp);
8598                 return;
8599         }
8600
8601         if (args->locker.new_lock_owner) {
8602                 /* Create a new lockowner for this instance */
8603                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8604
8605                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8606
8607                 stateid = &olo->open_stateid;
8608                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8609                 if (status != NFS4_OK) {
8610                         NFS4_DEBUG(rfs4_debug,
8611                             (CE_NOTE, "Get state failed in lock %d", status));
8612                         *cs->statusp = resp->status = status;
8613                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8614                             cs, LOCK4res *, resp);
8615                         return;
8616                 }
8617
8618                 /* Ensure specified filehandle matches */
8619                 if (cs->vp != sp->rs_finfo->rf_vp) {
8620                         rfs4_state_rele(sp);
8621                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8622                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8623                             cs, LOCK4res *, resp);
8624                         return;
8625                 }
8626
8627                 /* hold off other access to open_owner while we tinker */
8628                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8629
8630                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8631                 case NFS4_CHECK_STATEID_OLD:
8632                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8633                         goto end;
8634                 case NFS4_CHECK_STATEID_BAD:
8635                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8636                         goto end;
8637                 case NFS4_CHECK_STATEID_EXPIRED:
8638                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8639                         goto end;
8640                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8641                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8642                         goto end;
8643                 case NFS4_CHECK_STATEID_CLOSED:
8644                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8645                         goto end;
8646                 case NFS4_CHECK_STATEID_OKAY:
8647                 case NFS4_CHECK_STATEID_REPLAY:
8648                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8649                             sp->rs_owner, resop)) {
8650                         case NFS4_CHKSEQ_OKAY:
8651                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8652                                         break;
8653                                 /*
8654                                  * This is replayed stateid; if seqid
8655                                  * matches next expected, then client
8656                                  * is using wrong seqid.
8657                                  */
8658                                 /* FALLTHROUGH */
8659                         case NFS4_CHKSEQ_BAD:
8660                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8661                                 goto end;
8662                         case NFS4_CHKSEQ_REPLAY:
8663                                 /* This is a duplicate LOCK request */
8664                                 dup_lock = TRUE;
8665
8666                                 /*
8667                                  * For a duplicate we do not want to
8668                                  * create a new lockowner as it should
8669                                  * already exist.
8670                                  * Turn off the lockowner create flag.
8671                                  */
8672                                 lcreate = FALSE;
8673                         }
8674                         break;
8675                 }
8676
8677                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8678                 if (lo == NULL) {
8679                         NFS4_DEBUG(rfs4_debug,
8680                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8681                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8682                         goto end;
8683                 }
8684
8685                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8686                 if (lsp == NULL) {
8687                         rfs4_update_lease(sp->rs_owner->ro_client);
8688                         /*
8689                          * Only update theh open_seqid if this is not
8690                          * a duplicate request
8691                          */
8692                         if (dup_lock == FALSE) {
8693                                 rfs4_update_open_sequence(sp->rs_owner);
8694                         }
8695
8696                         NFS4_DEBUG(rfs4_debug,
8697                             (CE_NOTE, "rfs4_op_lock: no state"));
8698                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8699                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8700                         rfs4_lockowner_rele(lo);
8701                         goto end;
8702                 }
8703
8704                 /*
8705                  * This is the new_lock_owner branch and the client is
8706                  * supposed to be associating a new lock_owner with
8707                  * the open file at this point.  If we find that a
8708                  * lock_owner/state association already exists and a
8709                  * successful LOCK request was returned to the client,
8710                  * an error is returned to the client since this is
8711                  * not appropriate.  The client should be using the
8712                  * existing lock_owner branch.
8713                  */
8714                 if (dup_lock == FALSE && create == FALSE) {
8715                         if (lsp->rls_lock_completed == TRUE) {
8716                                 *cs->statusp =
8717                                     resp->status = NFS4ERR_BAD_SEQID;
8718                                 rfs4_lockowner_rele(lo);
8719                                 goto end;
8720                         }
8721                 }
8722
8723                 rfs4_update_lease(sp->rs_owner->ro_client);
8724
8725                 /*
8726                  * Only update theh open_seqid if this is not
8727                  * a duplicate request
8728                  */
8729                 if (dup_lock == FALSE) {
8730                         rfs4_update_open_sequence(sp->rs_owner);
8731                 }
8732
8733                 /*
8734                  * If this is a duplicate lock request, just copy the
8735                  * previously saved reply and return.
8736                  */
8737                 if (dup_lock == TRUE) {
8738                         /* verify that lock_seqid's match */
8739                         if (lsp->rls_seqid != olo->lock_seqid) {
8740                                 NFS4_DEBUG(rfs4_debug,
8741                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8742                                     "lsp->seqid=%d old->seqid=%d",
8743                                     lsp->rls_seqid, olo->lock_seqid));
8744                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8745                         } else {
8746                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8747                                 /*
8748                                  * Make sure to copy the just
8749                                  * retrieved reply status into the
8750                                  * overall compound status
8751                                  */
8752                                 *cs->statusp = resp->status;
8753                         }
8754                         rfs4_lockowner_rele(lo);
8755                         goto end;
8756                 }
8757
8758                 rfs4_dbe_lock(lsp->rls_dbe);
8759
8760                 /* Make sure to update the lock sequence id */
8761                 lsp->rls_seqid = olo->lock_seqid;
8762
8763                 NFS4_DEBUG(rfs4_debug,
8764                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8765
8766                 /*
8767                  * This is used to signify the newly created lockowner
8768                  * stateid and its sequence number.  The checks for
8769                  * sequence number and increment don't occur on the
8770                  * very first lock request for a lockowner.
8771                  */
8772                 lsp->rls_skip_seqid_check = TRUE;
8773
8774                 /* hold off other access to lsp while we tinker */
8775                 rfs4_sw_enter(&lsp->rls_sw);
8776                 ls_sw_held = TRUE;
8777
8778                 rfs4_dbe_unlock(lsp->rls_dbe);
8779
8780                 rfs4_lockowner_rele(lo);
8781         } else {
8782                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8783                 /* get lsp and hold the lock on the underlying file struct */
8784                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8785                     != NFS4_OK) {
8786                         *cs->statusp = resp->status = status;
8787                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8788                             cs, LOCK4res *, resp);
8789                         return;
8790                 }
8791                 create = FALSE; /* We didn't create lsp */
8792
8793                 /* Ensure specified filehandle matches */
8794                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8795                         rfs4_lo_state_rele(lsp, TRUE);
8796                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8797                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8798                             cs, LOCK4res *, resp);
8799                         return;
8800                 }
8801
8802                 /* hold off other access to lsp while we tinker */
8803                 rfs4_sw_enter(&lsp->rls_sw);
8804                 ls_sw_held = TRUE;
8805
8806                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8807                 /*
8808                  * The stateid looks like it was okay (expected to be
8809                  * the next one)
8810                  */
8811                 case NFS4_CHECK_STATEID_OKAY:
8812                         /*
8813                          * The sequence id is now checked.  Determine
8814                          * if this is a replay or if it is in the
8815                          * expected (next) sequence.  In the case of a
8816                          * replay, there are two replay conditions
8817                          * that may occur.  The first is the normal
8818                          * condition where a LOCK is done with a
8819                          * NFS4_OK response and the stateid is
8820                          * updated.  That case is handled below when
8821                          * the stateid is identified as a REPLAY.  The
8822                          * second is the case where an error is
8823                          * returned, like NFS4ERR_DENIED, and the
8824                          * sequence number is updated but the stateid
8825                          * is not updated.  This second case is dealt
8826                          * with here.  So it may seem odd that the
8827                          * stateid is okay but the sequence id is a
8828                          * replay but it is okay.
8829                          */
8830                         switch (rfs4_check_lock_seqid(
8831                             args->locker.locker4_u.lock_owner.lock_seqid,
8832                             lsp, resop)) {
8833                         case NFS4_CHKSEQ_REPLAY:
8834                                 if (resp->status != NFS4_OK) {
8835                                         /*
8836                                          * Here is our replay and need
8837                                          * to verify that the last
8838                                          * response was an error.
8839                                          */
8840                                         *cs->statusp = resp->status;
8841                                         goto end;
8842                                 }
8843                                 /*
8844                                  * This is done since the sequence id
8845                                  * looked like a replay but it didn't
8846                                  * pass our check so a BAD_SEQID is
8847                                  * returned as a result.
8848                                  */
8849                                 /*FALLTHROUGH*/
8850                         case NFS4_CHKSEQ_BAD:
8851                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8852                                 goto end;
8853                         case NFS4_CHKSEQ_OKAY:
8854                                 /* Everything looks okay move ahead */
8855                                 break;
8856                         }
8857                         break;
8858                 case NFS4_CHECK_STATEID_OLD:
8859                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8860                         goto end;
8861                 case NFS4_CHECK_STATEID_BAD:
8862                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8863                         goto end;
8864                 case NFS4_CHECK_STATEID_EXPIRED:
8865                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8866                         goto end;
8867                 case NFS4_CHECK_STATEID_CLOSED:
8868                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8869                         goto end;
8870                 case NFS4_CHECK_STATEID_REPLAY:
8871                         switch (rfs4_check_lock_seqid(
8872                             args->locker.locker4_u.lock_owner.lock_seqid,
8873                             lsp, resop)) {
8874                         case NFS4_CHKSEQ_OKAY:
8875                                 /*
8876                                  * This is a replayed stateid; if
8877                                  * seqid matches the next expected,
8878                                  * then client is using wrong seqid.
8879                                  */
8880                         case NFS4_CHKSEQ_BAD:
8881                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8882                                 goto end;
8883                         case NFS4_CHKSEQ_REPLAY:
8884                                 rfs4_update_lease(lsp->rls_locker->rl_client);
8885                                 *cs->statusp = status = resp->status;
8886                                 goto end;
8887                         }
8888                         break;
8889                 default:
8890                         ASSERT(FALSE);
8891                         break;
8892                 }
8893
8894                 rfs4_update_lock_sequence(lsp);
8895                 rfs4_update_lease(lsp->rls_locker->rl_client);
8896         }
8897
8898         /*
8899          * NFS4 only allows locking on regular files, so
8900          * verify type of object.
8901          */
8902         if (cs->vp->v_type != VREG) {
8903                 if (cs->vp->v_type == VDIR)
8904                         status = NFS4ERR_ISDIR;
8905                 else
8906                         status = NFS4ERR_INVAL;
8907                 goto out;
8908         }
8909
8910         cp = lsp->rls_state->rs_owner->ro_client;
8911
8912         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8913                 status = NFS4ERR_GRACE;
8914                 goto out;
8915         }
8916
8917         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
8918                 status = NFS4ERR_NO_GRACE;
8919                 goto out;
8920         }
8921
8922         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8923                 status = NFS4ERR_NO_GRACE;
8924                 goto out;
8925         }
8926
8927         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
8928                 cs->deleg = TRUE;
8929
8930         status = rfs4_do_lock(lsp, args->locktype,
8931             args->offset, args->length, cs->cr, resop);
8932
8933 out:
8934         lsp->rls_skip_seqid_check = FALSE;
8935
8936         *cs->statusp = resp->status = status;
8937
8938         if (status == NFS4_OK) {
8939                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
8940                 lsp->rls_lock_completed = TRUE;
8941         }
8942         /*
8943          * Only update the "OPEN" response here if this was a new
8944          * lock_owner
8945          */
8946         if (sp)
8947                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8948
8949         rfs4_update_lock_resp(lsp, resop);
8950
8951 end:
8952         if (lsp) {
8953                 if (ls_sw_held)
8954                         rfs4_sw_exit(&lsp->rls_sw);
8955                 /*
8956                  * If an sp obtained, then the lsp does not represent
8957                  * a lock on the file struct.
8958                  */
8959                 if (sp != NULL)
8960                         rfs4_lo_state_rele(lsp, FALSE);
8961                 else
8962                         rfs4_lo_state_rele(lsp, TRUE);
8963         }
8964         if (sp) {
8965                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8966                 rfs4_state_rele(sp);
8967         }
8968
8969         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
8970             LOCK4res *, resp);
8971 }
8972
8973 /* free function for LOCK/LOCKT */
8974 static void
8975 lock_denied_free(nfs_resop4 *resop)
8976 {
8977         LOCK4denied *dp = NULL;
8978
8979         switch (resop->resop) {
8980         case OP_LOCK:
8981                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8982                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8983                 break;
8984         case OP_LOCKT:
8985                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8986                         dp = &resop->nfs_resop4_u.oplockt.denied;
8987                 break;
8988         default:
8989                 break;
8990         }
8991
8992         if (dp)
8993                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8994 }
8995
8996 /*ARGSUSED*/
8997 void
8998 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8999     struct svc_req *req, struct compound_state *cs)
9000 {
9001         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9002         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9003         nfsstat4 status;
9004         stateid4 *stateid = &args->lock_stateid;
9005         rfs4_lo_state_t *lsp;
9006
9007         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9008             LOCKU4args *, args);
9009
9010         if (cs->vp == NULL) {
9011                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9012                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9013                     LOCKU4res *, resp);
9014                 return;
9015         }
9016
9017         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9018                 *cs->statusp = resp->status = status;
9019                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9020                     LOCKU4res *, resp);
9021                 return;
9022         }
9023
9024         /* Ensure specified filehandle matches */
9025         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9026                 rfs4_lo_state_rele(lsp, TRUE);
9027                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9028                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9029                     LOCKU4res *, resp);
9030                 return;
9031         }
9032
9033         /* hold off other access to lsp while we tinker */
9034         rfs4_sw_enter(&lsp->rls_sw);
9035
9036         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9037         case NFS4_CHECK_STATEID_OKAY:
9038                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9039                     != NFS4_CHKSEQ_OKAY) {
9040                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9041                         goto end;
9042                 }
9043                 break;
9044         case NFS4_CHECK_STATEID_OLD:
9045                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9046                 goto end;
9047         case NFS4_CHECK_STATEID_BAD:
9048                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9049                 goto end;
9050         case NFS4_CHECK_STATEID_EXPIRED:
9051                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9052                 goto end;
9053         case NFS4_CHECK_STATEID_CLOSED:
9054                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9055                 goto end;
9056         case NFS4_CHECK_STATEID_REPLAY:
9057                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9058                 case NFS4_CHKSEQ_OKAY:
9059                                 /*
9060                                  * This is a replayed stateid; if
9061                                  * seqid matches the next expected,
9062                                  * then client is using wrong seqid.
9063                                  */
9064                 case NFS4_CHKSEQ_BAD:
9065                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9066                         goto end;
9067                 case NFS4_CHKSEQ_REPLAY:
9068                         rfs4_update_lease(lsp->rls_locker->rl_client);
9069                         *cs->statusp = status = resp->status;
9070                         goto end;
9071                 }
9072                 break;
9073         default:
9074                 ASSERT(FALSE);
9075                 break;
9076         }
9077
9078         rfs4_update_lock_sequence(lsp);
9079         rfs4_update_lease(lsp->rls_locker->rl_client);
9080
9081         /*
9082          * NFS4 only allows locking on regular files, so
9083          * verify type of object.
9084          */
9085         if (cs->vp->v_type != VREG) {
9086                 if (cs->vp->v_type == VDIR)
9087                         status = NFS4ERR_ISDIR;
9088                 else
9089                         status = NFS4ERR_INVAL;
9090                 goto out;
9091         }
9092
9093         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9094                 status = NFS4ERR_GRACE;
9095                 goto out;
9096         }
9097
9098         status = rfs4_do_lock(lsp, args->locktype,
9099             args->offset, args->length, cs->cr, resop);
9100
9101 out:
9102         *cs->statusp = resp->status = status;
9103
9104         if (status == NFS4_OK)
9105                 resp->lock_stateid = lsp->rls_lockid.stateid;
9106
9107         rfs4_update_lock_resp(lsp, resop);
9108
9109 end:
9110         rfs4_sw_exit(&lsp->rls_sw);
9111         rfs4_lo_state_rele(lsp, TRUE);
9112
9113         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9114             LOCKU4res *, resp);
9115 }
9116
9117 /*
9118  * LOCKT is a best effort routine, the client can not be guaranteed that
9119  * the status return is still in effect by the time the reply is received.
9120  * They are numerous race conditions in this routine, but we are not required
9121  * and can not be accurate.
9122  */
9123 /*ARGSUSED*/
9124 void
9125 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9126     struct svc_req *req, struct compound_state *cs)
9127 {
9128         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9129         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9130         rfs4_lockowner_t *lo;
9131         rfs4_client_t *cp;
9132         bool_t create = FALSE;
9133         struct flock64 flk;
9134         int error;
9135         int flag = FREAD | FWRITE;
9136         int ltype;
9137         length4 posix_length;
9138         sysid_t sysid;
9139         pid_t pid;
9140
9141         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9142             LOCKT4args *, args);
9143
9144         if (cs->vp == NULL) {
9145                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9146                 goto out;
9147         }
9148
9149         /*
9150          * NFS4 only allows locking on regular files, so
9151          * verify type of object.
9152          */
9153         if (cs->vp->v_type != VREG) {
9154                 if (cs->vp->v_type == VDIR)
9155                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9156                 else
9157                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9158                 goto out;
9159         }
9160
9161         /*
9162          * Check out the clientid to ensure the server knows about it
9163          * so that we correctly inform the client of a server reboot.
9164          */
9165         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9166             == NULL) {
9167                 *cs->statusp = resp->status =
9168                     rfs4_check_clientid(&args->owner.clientid, 0);
9169                 goto out;
9170         }
9171         if (rfs4_lease_expired(cp)) {
9172                 rfs4_client_close(cp);
9173                 /*
9174                  * Protocol doesn't allow returning NFS4ERR_STALE as
9175                  * other operations do on this check so STALE_CLIENTID
9176                  * is returned instead
9177                  */
9178                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9179                 goto out;
9180         }
9181
9182         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9183                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9184                 rfs4_client_rele(cp);
9185                 goto out;
9186         }
9187         rfs4_client_rele(cp);
9188
9189         resp->status = NFS4_OK;
9190
9191         switch (args->locktype) {
9192         case READ_LT:
9193         case READW_LT:
9194                 ltype = F_RDLCK;
9195                 break;
9196         case WRITE_LT:
9197         case WRITEW_LT:
9198                 ltype = F_WRLCK;
9199                 break;
9200         }
9201
9202         posix_length = args->length;
9203         /* Check for zero length. To lock to end of file use all ones for V4 */
9204         if (posix_length == 0) {
9205                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9206                 goto out;
9207         } else if (posix_length == (length4)(~0)) {
9208                 posix_length = 0;       /* Posix to end of file  */
9209         }
9210
9211         /* Find or create a lockowner */
9212         lo = rfs4_findlockowner(&args->owner, &create);
9213
9214         if (lo) {
9215                 pid = lo->rl_pid;
9216                 if ((resp->status =
9217                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9218                         goto err;
9219         } else {
9220                 pid = 0;
9221                 sysid = lockt_sysid;
9222         }
9223 retry:
9224         flk.l_type = ltype;
9225         flk.l_whence = 0;               /* SEEK_SET */
9226         flk.l_start = args->offset;
9227         flk.l_len = posix_length;
9228         flk.l_sysid = sysid;
9229         flk.l_pid = pid;
9230         flag |= F_REMOTELOCK;
9231
9232         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9233
9234         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9235         if (flk.l_len < 0 || flk.l_start < 0) {
9236                 resp->status = NFS4ERR_INVAL;
9237                 goto err;
9238         }
9239         error = fop_frlock(cs->vp, F_GETLK, &flk, flag, 0,
9240             NULL, cs->cr, NULL);
9241
9242         /*
9243          * N.B. We map error values to nfsv4 errors. This is differrent
9244          * than puterrno4 routine.
9245          */
9246         switch (error) {
9247         case 0:
9248                 if (flk.l_type == F_UNLCK)
9249                         resp->status = NFS4_OK;
9250                 else {
9251                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9252                                 goto retry;
9253                         resp->status = NFS4ERR_DENIED;
9254                 }
9255                 break;
9256         case EOVERFLOW:
9257                 resp->status = NFS4ERR_INVAL;
9258                 break;
9259         case EINVAL:
9260                 resp->status = NFS4ERR_NOTSUPP;
9261                 break;
9262         default:
9263                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9264                     error);
9265                 resp->status = NFS4ERR_SERVERFAULT;
9266                 break;
9267         }
9268
9269 err:
9270         if (lo)
9271                 rfs4_lockowner_rele(lo);
9272         *cs->statusp = resp->status;
9273 out:
9274         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9275             LOCKT4res *, resp);
9276 }
9277
9278 int
9279 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9280 {
9281         int err;
9282         int cmd;
9283         vnode_t *vp;
9284         struct shrlock shr;
9285         struct shr_locowner shr_loco;
9286         int fflags = 0;
9287
9288         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9289         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9290
9291         if (sp->rs_closed)
9292                 return (NFS4ERR_OLD_STATEID);
9293
9294         vp = sp->rs_finfo->rf_vp;
9295         ASSERT(vp);
9296
9297         shr.s_access = shr.s_deny = 0;
9298
9299         if (access & OPEN4_SHARE_ACCESS_READ) {
9300                 fflags |= FREAD;
9301                 shr.s_access |= F_RDACC;
9302         }
9303         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9304                 fflags |= FWRITE;
9305                 shr.s_access |= F_WRACC;
9306         }
9307         ASSERT(shr.s_access);
9308
9309         if (deny & OPEN4_SHARE_DENY_READ)
9310                 shr.s_deny |= F_RDDNY;
9311         if (deny & OPEN4_SHARE_DENY_WRITE)
9312                 shr.s_deny |= F_WRDNY;
9313
9314         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9315         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9316         shr_loco.sl_pid = shr.s_pid;
9317         shr_loco.sl_id = shr.s_sysid;
9318         shr.s_owner = (caddr_t)&shr_loco;
9319         shr.s_own_len = sizeof (shr_loco);
9320
9321         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9322
9323         err = fop_shrlock(vp, cmd, &shr, fflags, CRED(), NULL);
9324         if (err != 0) {
9325                 if (err == EAGAIN)
9326                         err = NFS4ERR_SHARE_DENIED;
9327                 else
9328                         err = puterrno4(err);
9329                 return (err);
9330         }
9331
9332         sp->rs_share_access |= access;
9333         sp->rs_share_deny |= deny;
9334
9335         return (0);
9336 }
9337
9338 int
9339 rfs4_unshare(rfs4_state_t *sp)
9340 {
9341         int err;
9342         struct shrlock shr;
9343         struct shr_locowner shr_loco;
9344
9345         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9346
9347         if (sp->rs_closed || sp->rs_share_access == 0)
9348                 return (0);
9349
9350         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9351         ASSERT(sp->rs_finfo->rf_vp);
9352
9353         shr.s_access = shr.s_deny = 0;
9354         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9355         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9356         shr_loco.sl_pid = shr.s_pid;
9357         shr_loco.sl_id = shr.s_sysid;
9358         shr.s_owner = (caddr_t)&shr_loco;
9359         shr.s_own_len = sizeof (shr_loco);
9360
9361         err = fop_shrlock(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9362             NULL);
9363         if (err != 0) {
9364                 err = puterrno4(err);
9365                 return (err);
9366         }
9367
9368         sp->rs_share_access = 0;
9369         sp->rs_share_deny = 0;
9370
9371         return (0);
9372
9373 }
9374
9375 static int
9376 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9377 {
9378         struct clist    *wcl;
9379         count4          count = rok->data_len;
9380         int             wlist_len;
9381
9382         wcl = args->wlist;
9383         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9384                 return (FALSE);
9385         }
9386         wcl = args->wlist;
9387         rok->wlist_len = wlist_len;
9388         rok->wlist = wcl;
9389         return (TRUE);
9390 }
9391
9392 /* tunable to disable server referrals */
9393 int rfs4_no_referrals = 0;
9394
9395 /*
9396  * Find an NFS record in reparse point data.
9397  * Returns 0 for success and <0 or an errno value on failure.
9398  */
9399 int
9400 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9401 {
9402         int err;
9403         char *stype, *val;
9404         nvlist_t *nvl;
9405         nvpair_t *curr;
9406
9407         if ((nvl = reparse_init()) == NULL)
9408                 return (-1);
9409
9410         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9411                 reparse_free(nvl);
9412                 return (err);
9413         }
9414
9415         curr = NULL;
9416         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9417                 if ((stype = nvpair_name(curr)) == NULL) {
9418                         reparse_free(nvl);
9419                         return (-2);
9420                 }
9421                 if (strncasecmp(stype, "NFS", 3) == 0)
9422                         break;
9423         }
9424
9425         if ((curr == NULL) ||
9426             (nvpair_value_string(curr, &val))) {
9427                 reparse_free(nvl);
9428                 return (-3);
9429         }
9430         *nvlp = nvl;
9431         *svcp = stype;
9432         *datap = val;
9433         return (0);
9434 }
9435
9436 int
9437 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9438 {
9439         nvlist_t *nvl;
9440         char *s, *d;
9441
9442         if (rfs4_no_referrals != 0)
9443                 return (B_FALSE);
9444
9445         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9446                 return (B_FALSE);
9447
9448         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9449                 return (B_FALSE);
9450
9451         reparse_free(nvl);
9452
9453         return (B_TRUE);
9454 }
9455
9456 /*
9457  * There is a user-level copy of this routine in ref_subr.c.
9458  * Changes should be kept in sync.
9459  */
9460 static int
9461 nfs4_create_components(char *path, component4 *comp4)
9462 {
9463         int slen, plen, ncomp;
9464         char *ori_path, *nxtc, buf[MAXNAMELEN];
9465
9466         if (path == NULL)
9467                 return (0);
9468
9469         plen = strlen(path) + 1;        /* include the terminator */
9470         ori_path = path;
9471         ncomp = 0;
9472
9473         /* count number of components in the path */
9474         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9475                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9476                         if ((slen = nxtc - path) == 0) {
9477                                 path = nxtc + 1;
9478                                 continue;
9479                         }
9480
9481                         if (comp4 != NULL) {
9482                                 bcopy(path, buf, slen);
9483                                 buf[slen] = '\0';
9484                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9485                         }
9486
9487                         ncomp++;        /* 1 valid component */
9488                         path = nxtc + 1;
9489                 }
9490                 if (*nxtc == '\0' || *nxtc == '\n')
9491                         break;
9492         }
9493
9494         return (ncomp);
9495 }
9496
9497 /*
9498  * There is a user-level copy of this routine in ref_subr.c.
9499  * Changes should be kept in sync.
9500  */
9501 static int
9502 make_pathname4(char *path, pathname4 *pathname)
9503 {
9504         int ncomp;
9505         component4 *comp4;
9506
9507         if (pathname == NULL)
9508                 return (0);
9509
9510         if (path == NULL) {
9511                 pathname->pathname4_val = NULL;
9512                 pathname->pathname4_len = 0;
9513                 return (0);
9514         }
9515
9516         /* count number of components to alloc buffer */
9517         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9518                 pathname->pathname4_val = NULL;
9519                 pathname->pathname4_len = 0;
9520                 return (0);
9521         }
9522         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9523
9524         /* copy components into allocated buffer */
9525         ncomp = nfs4_create_components(path, comp4);
9526
9527         pathname->pathname4_val = comp4;
9528         pathname->pathname4_len = ncomp;
9529
9530         return (ncomp);
9531 }
9532
9533 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9534
9535 fs_locations4 *
9536 fetch_referral(vnode_t *vp, cred_t *cr)
9537 {
9538         nvlist_t *nvl;
9539         char *stype, *sdata;
9540         fs_locations4 *result;
9541         char buf[1024];
9542         size_t bufsize;
9543         XDR xdr;
9544         int err;
9545
9546         /*
9547          * Check attrs to ensure it's a reparse point
9548          */
9549         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9550                 return (NULL);
9551
9552         /*
9553          * Look for an NFS record and get the type and data
9554          */
9555         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9556                 return (NULL);
9557
9558         /*
9559          * With the type and data, upcall to get the referral
9560          */
9561         bufsize = sizeof (buf);
9562         bzero(buf, sizeof (buf));
9563         err = reparse_kderef((const char *)stype, (const char *)sdata,
9564             buf, &bufsize);
9565         reparse_free(nvl);
9566
9567         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9568             char *, stype, char *, sdata, char *, buf, int, err);
9569         if (err) {
9570                 cmn_err(CE_NOTE,
9571                     "reparsed daemon not running: unable to get referral (%d)",
9572                     err);
9573                 return (NULL);
9574         }
9575
9576         /*
9577          * We get an XDR'ed record back from the kderef call
9578          */
9579         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9580         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9581         err = xdr_fs_locations4(&xdr, result);
9582         XDR_DESTROY(&xdr);
9583         if (err != TRUE) {
9584                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9585                     int, err);
9586                 return (NULL);
9587         }
9588
9589         /*
9590          * Look at path to recover fs_root, ignoring the leading '/'
9591          */
9592         (void) make_pathname4(vp->v_path, &result->fs_root);
9593
9594         return (result);
9595 }
9596
9597 char *
9598 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9599 {
9600         fs_locations4 *fsl;
9601         fs_location4 *fs;
9602         char *server, *path, *symbuf;
9603         static char *prefix = "/net/";
9604         int i, size, npaths;
9605         uint_t len;
9606
9607         /* Get the referral */
9608         if ((fsl = fetch_referral(vp, cr)) == NULL)
9609                 return (NULL);
9610
9611         /* Deal with only the first location and first server */
9612         fs = &fsl->locations_val[0];
9613         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9614         if (server == NULL) {
9615                 rfs4_free_fs_locations4(fsl);
9616                 kmem_free(fsl, sizeof (fs_locations4));
9617                 return (NULL);
9618         }
9619
9620         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9621         size = strlen(prefix) + len;
9622         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9623                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9624
9625         /* Allocate the symlink buffer and fill it */
9626         symbuf = kmem_zalloc(size, KM_SLEEP);
9627         (void) strcat(symbuf, prefix);
9628         (void) strcat(symbuf, server);
9629         kmem_free(server, len);
9630
9631         npaths = 0;
9632         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9633                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9634                 if (path == NULL)
9635                         continue;
9636                 (void) strcat(symbuf, "/");
9637                 (void) strcat(symbuf, path);
9638                 npaths++;
9639                 kmem_free(path, len);
9640         }
9641
9642         rfs4_free_fs_locations4(fsl);
9643         kmem_free(fsl, sizeof (fs_locations4));
9644
9645         if (strsz != NULL)
9646                 *strsz = size;
9647         return (symbuf);
9648 }
9649
9650 /*
9651  * Check to see if we have a downrev Solaris client, so that we
9652  * can send it a symlink instead of a referral.
9653  */
9654 int
9655 client_is_downrev(struct svc_req *req)
9656 {
9657         struct sockaddr *ca;
9658         rfs4_clntip_t *ci;
9659         bool_t create = FALSE;
9660         int is_downrev;
9661
9662         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9663         ASSERT(ca);
9664         ci = rfs4_find_clntip(ca, &create);
9665         if (ci == NULL)
9666                 return (0);
9667         is_downrev = ci->ri_no_referrals;
9668         rfs4_dbe_rele(ci->ri_dbe);
9669         return (is_downrev);
9670 }