kernel/fs/nfs/nfs4_srv.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  26  */
  27
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/uio.h>
  41 #include <sys/errno.h>
  42 #include <sys/sysmacros.h>
  43 #include <sys/statvfs.h>
  44 #include <sys/kmem.h>
  45 #include <sys/dirent.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/debug.h>
  48 #include <sys/systeminfo.h>
  49 #include <sys/flock.h>
  50 #include <sys/pathname.h>
  51 #include <sys/nbmlock.h>
  52 #include <sys/share.h>
  53 #include <sys/atomic.h>
  54 #include <sys/policy.h>
  55 #include <sys/fem.h>
  56 #include <sys/sdt.h>
  57 #include <sys/ddi.h>
  58 #include <sys/zone.h>
  59
  60 #include <sys/fs_reparse.h>
  61
  62 #include <rpc/types.h>
  63 #include <rpc/auth.h>
  64 #include <rpc/rpcsec_gss.h>
  65 #include <rpc/svc.h>
  66
  67 #include <nfs/nfs.h>
  68 #include <nfs/export.h>
  69 #include <nfs/nfs_cmd.h>
  70 #include <nfs/lm.h>
  71 #include <nfs/nfs4.h>
  72
  73 #include <sys/strsubr.h>
  74 #include <sys/strsun.h>
  75
  76 #include <inet/common.h>
  77 #include <inet/ip.h>
  78 #include <inet/ip6.h>
  79
  80 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  81 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  82 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  83 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  84 extern struct svc_ops rdma_svc_ops;
  85 extern int nfs_loaned_buffers;
  86 /* End of Tunables */
  87
  88 static int rdma_setup_read_data4(READ4args *, READ4res *);
  89
  90 /*
  91  * Used to bump the stateid4.seqid value and show changes in the stateid
  92  */
  93 #define next_stateid(sp) (++(sp)->bits.chgseq)
  94
  95 /*
  96  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
  97  *      This is used to return NFS4ERR_TOOSMALL when clients specify
  98  *      maxcount that isn't large enough to hold the smallest possible
  99  *      XDR encoded dirent.
 100  *
 101  *          sizeof cookie (8 bytes) +
 102  *          sizeof name_len (4 bytes) +
 103  *          sizeof smallest (padded) name (4 bytes) +
 104  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 105  *          sizeof attrlist4_len (4 bytes) +
 106  *          sizeof next boolean (4 bytes)
 107  *
 108  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 109  * the smallest possible entry4 (assumes no attrs requested).
 110  *      sizeof nfsstat4 (4 bytes) +
 111  *      sizeof verifier4 (8 bytes) +
 112  *      sizeof entry4list bool (4 bytes) +
 113  *      sizeof entry4   (36 bytes) +
 114  *      sizeof eof bool  (4 bytes)
 115  *
 116  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 117  *      fop_readdir.  Its value is the size of the maximum possible dirent
 118  *      for solaris.  The DIRENT_RECLEN macro returns   the size of dirent
 119  *      required for a given name length.  MAXNAMELEN is the maximum
 120  *      filename length allowed in Solaris.  The first two DIRENT_RECLEN()
 121  *      macros are to allow for . and .. entries -- just a minor tweak to try
 122  *      and guarantee that buffer we give to fop_readdir will be large enough
 123  *      to hold ., .., and the largest possible solaris dirent64.
 124  */
 125 #define RFS4_MINLEN_ENTRY4 36
 126 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 127 #define RFS4_MINLEN_RDDIR_BUF \
 128         (DIRENT_RECLEN(1) + DIRENT_RECLEN(2) + DIRENT_RECLEN(MAXNAMELEN))
 129
 130 /*
 131  * It would be better to pad to 4 bytes since that's what XDR would do,
 132  * but the dirents UFS gives us are already padded to 8, so just take
 133  * what we're given.  Dircount is only a hint anyway.  Currently the
 134  * solaris kernel is ASCII only, so there's no point in calling the
 135  * UTF8 functions.
 136  *
 137  * dirent64: named padded to provide 8 byte struct alignment
 138  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 139  *
 140  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 141  *
 142  */
 143 #define DIRENT64_TO_DIRCOUNT(dp) \
 144         (3 * BYTES_PER_XDR_UNIT + DIRENT_NAMELEN((dp)->d_reclen))
 145
 146 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 147
 148 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 149
 150 u_longlong_t    nfs4_srv_caller_id;
 151 uint_t          nfs4_srv_vkey = 0;
 152
 153 verifier4       Write4verf;
 154 verifier4       Readdir4verf;
 155
 156 void    rfs4_init_compound_state(struct compound_state *);
 157
 158 static void     nullfree(caddr_t);
 159 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 160                         struct compound_state *);
 161 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 162                         struct compound_state *);
 163 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164                         struct compound_state *);
 165 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                         struct compound_state *);
 167 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                         struct compound_state *);
 169 static void     rfs4_op_create_free(nfs_resop4 *resop);
 170 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 171                         struct svc_req *, struct compound_state *);
 172 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 173                         struct svc_req *, struct compound_state *);
 174 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 175                         struct compound_state *);
 176 static void     rfs4_op_getattr_free(nfs_resop4 *);
 177 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 178                         struct compound_state *);
 179 static void     rfs4_op_getfh_free(nfs_resop4 *);
 180 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                         struct compound_state *);
 182 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 183                         struct compound_state *);
 184 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                         struct compound_state *);
 186 static void     lock_denied_free(nfs_resop4 *);
 187 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188                         struct compound_state *);
 189 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 190                         struct compound_state *);
 191 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                         struct compound_state *);
 193 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                         struct compound_state *);
 195 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 196                                 struct svc_req *req, struct compound_state *cs);
 197 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                         struct compound_state *);
 199 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 200                         struct compound_state *);
 201 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 202                         struct svc_req *, struct compound_state *);
 203 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 204                         struct svc_req *, struct compound_state *);
 205 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 206                         struct compound_state *);
 207 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 208                         struct compound_state *);
 209 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210                         struct compound_state *);
 211 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                         struct compound_state *);
 213 static void     rfs4_op_read_free(nfs_resop4 *);
 214 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 215 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                         struct compound_state *);
 217 static void     rfs4_op_readlink_free(nfs_resop4 *);
 218 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 219                         struct svc_req *, struct compound_state *);
 220 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 221                         struct compound_state *);
 222 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 223                         struct compound_state *);
 224 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225                         struct compound_state *);
 226 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                         struct compound_state *);
 228 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                         struct compound_state *);
 230 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                         struct compound_state *);
 232 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                         struct compound_state *);
 234 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                         struct compound_state *);
 236 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 237                         struct svc_req *, struct compound_state *);
 238 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 239                         struct svc_req *req, struct compound_state *);
 240 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 241                         struct compound_state *);
 242 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 243
 244 static nfsstat4 check_open_access(uint32_t,
 245                                 struct compound_state *, struct svc_req *);
 246 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 247 void rfs4_ss_clid(rfs4_client_t *);
 248
 249 /*
 250  * translation table for attrs
 251  */
 252 struct nfs4_ntov_table {
 253         union nfs4_attr_u *na;
 254         uint8_t amap[NFS4_MAXNUM_ATTRS];
 255         int attrcnt;
 256         bool_t vfsstat;
 257 };
 258
 259 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 260 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 261                                     struct nfs4_svgetit_arg *sargp);
 262
 263 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 264                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 265                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 266
 267 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 268 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 269 int             rfs4_seen_first_compound;       /* set first time we see one */
 270
 271 /*
 272  * NFS4 op dispatch table
 273  */
 274
 275 struct rfsv4disp {
 276         void    (*dis_proc)();          /* proc to call */
 277         void    (*dis_resfree)();       /* frees space allocated by proc */
 278         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 279 };
 280
 281 static struct rfsv4disp rfsv4disptab[] = {
 282         /*
 283          * NFS VERSION 4
 284          */
 285
 286         /* RFS_NULL = 0 */
 287         {rfs4_op_illegal, nullfree, 0},
 288
 289         /* UNUSED = 1 */
 290         {rfs4_op_illegal, nullfree, 0},
 291
 292         /* UNUSED = 2 */
 293         {rfs4_op_illegal, nullfree, 0},
 294
 295         /* OP_ACCESS = 3 */
 296         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 297
 298         /* OP_CLOSE = 4 */
 299         {rfs4_op_close, nullfree, 0},
 300
 301         /* OP_COMMIT = 5 */
 302         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 303
 304         /* OP_CREATE = 6 */
 305         {rfs4_op_create, nullfree, 0},
 306
 307         /* OP_DELEGPURGE = 7 */
 308         {rfs4_op_delegpurge, nullfree, 0},
 309
 310         /* OP_DELEGRETURN = 8 */
 311         {rfs4_op_delegreturn, nullfree, 0},
 312
 313         /* OP_GETATTR = 9 */
 314         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 315
 316         /* OP_GETFH = 10 */
 317         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 318
 319         /* OP_LINK = 11 */
 320         {rfs4_op_link, nullfree, 0},
 321
 322         /* OP_LOCK = 12 */
 323         {rfs4_op_lock, lock_denied_free, 0},
 324
 325         /* OP_LOCKT = 13 */
 326         {rfs4_op_lockt, lock_denied_free, 0},
 327
 328         /* OP_LOCKU = 14 */
 329         {rfs4_op_locku, nullfree, 0},
 330
 331         /* OP_LOOKUP = 15 */
 332         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 333
 334         /* OP_LOOKUPP = 16 */
 335         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 336
 337         /* OP_NVERIFY = 17 */
 338         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 339
 340         /* OP_OPEN = 18 */
 341         {rfs4_op_open, rfs4_free_reply, 0},
 342
 343         /* OP_OPENATTR = 19 */
 344         {rfs4_op_openattr, nullfree, 0},
 345
 346         /* OP_OPEN_CONFIRM = 20 */
 347         {rfs4_op_open_confirm, nullfree, 0},
 348
 349         /* OP_OPEN_DOWNGRADE = 21 */
 350         {rfs4_op_open_downgrade, nullfree, 0},
 351
 352         /* OP_OPEN_PUTFH = 22 */
 353         {rfs4_op_putfh, nullfree, RPC_ALL},
 354
 355         /* OP_PUTPUBFH = 23 */
 356         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 357
 358         /* OP_PUTROOTFH = 24 */
 359         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 360
 361         /* OP_READ = 25 */
 362         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 363
 364         /* OP_READDIR = 26 */
 365         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 366
 367         /* OP_READLINK = 27 */
 368         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 369
 370         /* OP_REMOVE = 28 */
 371         {rfs4_op_remove, nullfree, 0},
 372
 373         /* OP_RENAME = 29 */
 374         {rfs4_op_rename, nullfree, 0},
 375
 376         /* OP_RENEW = 30 */
 377         {rfs4_op_renew, nullfree, 0},
 378
 379         /* OP_RESTOREFH = 31 */
 380         {rfs4_op_restorefh, nullfree, RPC_ALL},
 381
 382         /* OP_SAVEFH = 32 */
 383         {rfs4_op_savefh, nullfree, RPC_ALL},
 384
 385         /* OP_SECINFO = 33 */
 386         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 387
 388         /* OP_SETATTR = 34 */
 389         {rfs4_op_setattr, nullfree, 0},
 390
 391         /* OP_SETCLIENTID = 35 */
 392         {rfs4_op_setclientid, nullfree, 0},
 393
 394         /* OP_SETCLIENTID_CONFIRM = 36 */
 395         {rfs4_op_setclientid_confirm, nullfree, 0},
 396
 397         /* OP_VERIFY = 37 */
 398         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 399
 400         /* OP_WRITE = 38 */
 401         {rfs4_op_write, nullfree, 0},
 402
 403         /* OP_RELEASE_LOCKOWNER = 39 */
 404         {rfs4_op_release_lockowner, nullfree, 0},
 405 };
 406
 407 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 408
 409 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 410
 411 #ifdef DEBUG
 412
 413 int             rfs4_fillone_debug = 0;
 414 int             rfs4_no_stub_access = 1;
 415 int             rfs4_rddir_debug = 0;
 416
 417 static char    *rfs4_op_string[] = {
 418         "rfs4_op_null",
 419         "rfs4_op_1 unused",
 420         "rfs4_op_2 unused",
 421         "rfs4_op_access",
 422         "rfs4_op_close",
 423         "rfs4_op_commit",
 424         "rfs4_op_create",
 425         "rfs4_op_delegpurge",
 426         "rfs4_op_delegreturn",
 427         "rfs4_op_getattr",
 428         "rfs4_op_getfh",
 429         "rfs4_op_link",
 430         "rfs4_op_lock",
 431         "rfs4_op_lockt",
 432         "rfs4_op_locku",
 433         "rfs4_op_lookup",
 434         "rfs4_op_lookupp",
 435         "rfs4_op_nverify",
 436         "rfs4_op_open",
 437         "rfs4_op_openattr",
 438         "rfs4_op_open_confirm",
 439         "rfs4_op_open_downgrade",
 440         "rfs4_op_putfh",
 441         "rfs4_op_putpubfh",
 442         "rfs4_op_putrootfh",
 443         "rfs4_op_read",
 444         "rfs4_op_readdir",
 445         "rfs4_op_readlink",
 446         "rfs4_op_remove",
 447         "rfs4_op_rename",
 448         "rfs4_op_renew",
 449         "rfs4_op_restorefh",
 450         "rfs4_op_savefh",
 451         "rfs4_op_secinfo",
 452         "rfs4_op_setattr",
 453         "rfs4_op_setclientid",
 454         "rfs4_op_setclient_confirm",
 455         "rfs4_op_verify",
 456         "rfs4_op_write",
 457         "rfs4_op_release_lockowner",
 458         "rfs4_op_illegal"
 459 };
 460 #endif
 461
 462 void    rfs4_ss_chkclid(rfs4_client_t *);
 463
 464 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 465
 466 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 467
 468 #ifdef  nextdp
 469 #undef nextdp
 470 #endif
 471 #define nextdp(dp)      ((struct dirent *)((char *)(dp) + (dp)->d_reclen))
 472
 473 fem_t deleg_rdops = {
 474         .name = "deleg_rdops",
 475         .femop_open = deleg_rd_open,
 476         .femop_write = deleg_rd_write,
 477         .femop_setattr = deleg_rd_setattr,
 478         .femop_rwlock = deleg_rd_rwlock,
 479         .femop_space = deleg_rd_space,
 480         .femop_setsecattr = deleg_rd_setsecattr,
 481         .femop_vnevent = deleg_rd_vnevent,
 482 };
 483
 484 fem_t deleg_wrops = {
 485         .name = "deleg_wrops",
 486         .femop_open = deleg_wr_open,
 487         .femop_read = deleg_wr_read,
 488         .femop_write = deleg_wr_write,
 489         .femop_setattr = deleg_wr_setattr,
 490         .femop_rwlock = deleg_wr_rwlock,
 491         .femop_space = deleg_wr_space,
 492         .femop_setsecattr = deleg_wr_setsecattr,
 493         .femop_vnevent = deleg_wr_vnevent,
 494 };
 495
 496 int
 497 rfs4_srvrinit(void)
 498 {
 499         timespec32_t verf;
 500         int error;
 501         extern void rfs4_attr_init();
 502         extern krwlock_t rfs4_deleg_policy_lock;
 503
 504         /*
 505          * The following algorithm attempts to find a unique verifier
 506          * to be used as the write verifier returned from the server
 507          * to the client.  It is important that this verifier change
 508          * whenever the server reboots.  Of secondary importance, it
 509          * is important for the verifier to be unique between two
 510          * different servers.
 511          *
 512          * Thus, an attempt is made to use the system hostid and the
 513          * current time in seconds when the nfssrv kernel module is
 514          * loaded.  It is assumed that an NFS server will not be able
 515          * to boot and then to reboot in less than a second.  If the
 516          * hostid has not been set, then the current high resolution
 517          * time is used.  This will ensure different verifiers each
 518          * time the server reboots and minimize the chances that two
 519          * different servers will have the same verifier.
 520          * XXX - this is broken on LP64 kernels.
 521          */
 522         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 523         if (verf.tv_sec != 0) {
 524                 verf.tv_nsec = gethrestime_sec();
 525         } else {
 526                 timespec_t tverf;
 527
 528                 gethrestime(&tverf);
 529                 verf.tv_sec = (time_t)tverf.tv_sec;
 530                 verf.tv_nsec = tverf.tv_nsec;
 531         }
 532
 533         Write4verf = *(uint64_t *)&verf;
 534
 535         rfs4_attr_init();
 536         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 537
 538         /* Used to manage create/destroy of server state */
 539         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 540
 541         /* Used to manage access to server instance linked list */
 542         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 543
 544         /* Used to manage access to rfs4_deleg_policy */
 545         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 546
 547         nfs4_srv_caller_id = fs_new_caller_id();
 548
 549         lockt_sysid = lm_alloc_sysidt();
 550
 551         vsd_create(&nfs4_srv_vkey, NULL);
 552
 553         return (0);
 554 }
 555
 556 void
 557 rfs4_srvrfini(void)
 558 {
 559         extern krwlock_t rfs4_deleg_policy_lock;
 560
 561         if (lockt_sysid != LM_NOSYSID) {
 562                 lm_free_sysidt(lockt_sysid);
 563                 lockt_sysid = LM_NOSYSID;
 564         }
 565
 566         mutex_destroy(&rfs4_deleg_lock);
 567         mutex_destroy(&rfs4_state_lock);
 568         rw_destroy(&rfs4_deleg_policy_lock);
 569 }
 570
 571 void
 572 rfs4_init_compound_state(struct compound_state *cs)
 573 {
 574         bzero(cs, sizeof (*cs));
 575         cs->cont = TRUE;
 576         cs->access = CS_ACCESS_DENIED;
 577         cs->deleg = FALSE;
 578         cs->mandlock = FALSE;
 579         cs->fh.nfs_fh4_val = cs->fhbuf;
 580 }
 581
 582 void
 583 rfs4_grace_start(rfs4_servinst_t *sip)
 584 {
 585         rw_enter(&sip->rwlock, RW_WRITER);
 586         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 587         sip->grace_period = rfs4_grace_period;
 588         rw_exit(&sip->rwlock);
 589 }
 590
 591 /*
 592  * returns true if the instance's grace period has never been started
 593  */
 594 int
 595 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 596 {
 597         time_t start_time;
 598
 599         rw_enter(&sip->rwlock, RW_READER);
 600         start_time = sip->start_time;
 601         rw_exit(&sip->rwlock);
 602
 603         return (start_time == 0);
 604 }
 605
 606 /*
 607  * Indicates if server instance is within the
 608  * grace period.
 609  */
 610 int
 611 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 612 {
 613         time_t grace_expiry;
 614
 615         rw_enter(&sip->rwlock, RW_READER);
 616         grace_expiry = sip->start_time + sip->grace_period;
 617         rw_exit(&sip->rwlock);
 618
 619         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 620 }
 621
 622 int
 623 rfs4_clnt_in_grace(rfs4_client_t *cp)
 624 {
 625         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 626
 627         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 628 }
 629
 630 /*
 631  * reset all currently active grace periods
 632  */
 633 void
 634 rfs4_grace_reset_all(void)
 635 {
 636         rfs4_servinst_t *sip;
 637
 638         mutex_enter(&rfs4_servinst_lock);
 639         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 640                 if (rfs4_servinst_in_grace(sip))
 641                         rfs4_grace_start(sip);
 642         mutex_exit(&rfs4_servinst_lock);
 643 }
 644
 645 /*
 646  * start any new instances' grace periods
 647  */
 648 void
 649 rfs4_grace_start_new(void)
 650 {
 651         rfs4_servinst_t *sip;
 652
 653         mutex_enter(&rfs4_servinst_lock);
 654         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 655                 if (rfs4_servinst_grace_new(sip))
 656                         rfs4_grace_start(sip);
 657         mutex_exit(&rfs4_servinst_lock);
 658 }
 659
 660 static rfs4_dss_path_t *
 661 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 662 {
 663         size_t len;
 664         rfs4_dss_path_t *dss_path;
 665
 666         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 667
 668         /*
 669          * Take a copy of the string, since the original may be overwritten.
 670          * Sadly, no strdup() in the kernel.
 671          */
 672         /* allow for NUL */
 673         len = strlen(path) + 1;
 674         dss_path->path = kmem_alloc(len, KM_SLEEP);
 675         (void) strlcpy(dss_path->path, path, len);
 676
 677         /* associate with servinst */
 678         dss_path->sip = sip;
 679         dss_path->index = index;
 680
 681         /*
 682          * Add to list of served paths.
 683          * No locking required, as we're only ever called at startup.
 684          */
 685         if (rfs4_dss_pathlist == NULL) {
 686                 /* this is the first dss_path_t */
 687
 688                 /* needed for insque/remque */
 689                 dss_path->next = dss_path->prev = dss_path;
 690
 691                 rfs4_dss_pathlist = dss_path;
 692         } else {
 693                 insque(dss_path, rfs4_dss_pathlist);
 694         }
 695
 696         return (dss_path);
 697 }
 698
 699 /*
 700  * Create a new server instance, and make it the currently active instance.
 701  * Note that starting the grace period too early will reduce the clients'
 702  * recovery window.
 703  */
 704 void
 705 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 706 {
 707         unsigned i;
 708         rfs4_servinst_t *sip;
 709         rfs4_oldstate_t *oldstate;
 710
 711         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 712         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 713
 714         sip->start_time = (time_t)0;
 715         sip->grace_period = (time_t)0;
 716         sip->next = NULL;
 717         sip->prev = NULL;
 718
 719         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 720         /*
 721          * This initial dummy entry is required to setup for insque/remque.
 722          * It must be skipped over whenever the list is traversed.
 723          */
 724         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 725         /* insque/remque require initial list entry to be self-terminated */
 726         oldstate->next = oldstate;
 727         oldstate->prev = oldstate;
 728         sip->oldstate = oldstate;
 729
 730
 731         sip->dss_npaths = dss_npaths;
 732         sip->dss_paths = kmem_alloc(dss_npaths *
 733             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 734
 735         for (i = 0; i < dss_npaths; i++) {
 736                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 737         }
 738
 739         mutex_enter(&rfs4_servinst_lock);
 740         if (rfs4_cur_servinst != NULL) {
 741                 /* add to linked list */
 742                 sip->prev = rfs4_cur_servinst;
 743                 rfs4_cur_servinst->next = sip;
 744         }
 745         if (start_grace)
 746                 rfs4_grace_start(sip);
 747         /* make the new instance "current" */
 748         rfs4_cur_servinst = sip;
 749
 750         mutex_exit(&rfs4_servinst_lock);
 751 }
 752
 753 /*
 754  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 755  * all instances directly.
 756  */
 757 void
 758 rfs4_servinst_destroy_all(void)
 759 {
 760         rfs4_servinst_t *sip, *prev, *current;
 761 #ifdef DEBUG
 762         int n = 0;
 763 #endif
 764
 765         mutex_enter(&rfs4_servinst_lock);
 766         ASSERT(rfs4_cur_servinst != NULL);
 767         current = rfs4_cur_servinst;
 768         rfs4_cur_servinst = NULL;
 769         for (sip = current; sip != NULL; sip = prev) {
 770                 prev = sip->prev;
 771                 rw_destroy(&sip->rwlock);
 772                 if (sip->oldstate)
 773                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 774                 if (sip->dss_paths)
 775                         kmem_free(sip->dss_paths,
 776                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 777                 kmem_free(sip, sizeof (rfs4_servinst_t));
 778 #ifdef DEBUG
 779                 n++;
 780 #endif
 781         }
 782         mutex_exit(&rfs4_servinst_lock);
 783 }
 784
 785 /*
 786  * Assign the current server instance to a client_t.
 787  * Should be called with cp->rc_dbe held.
 788  */
 789 void
 790 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 791 {
 792         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 793
 794         /*
 795          * The lock ensures that if the current instance is in the process
 796          * of changing, we will see the new one.
 797          */
 798         mutex_enter(&rfs4_servinst_lock);
 799         cp->rc_server_instance = sip;
 800         mutex_exit(&rfs4_servinst_lock);
 801 }
 802
 803 rfs4_servinst_t *
 804 rfs4_servinst(rfs4_client_t *cp)
 805 {
 806         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 807
 808         return (cp->rc_server_instance);
 809 }
 810
 811 /* ARGSUSED */
 812 static void
 813 nullfree(caddr_t resop)
 814 {
 815 }
 816
 817 /*
 818  * This is a fall-through for invalid or not implemented (yet) ops
 819  */
 820 /* ARGSUSED */
 821 static void
 822 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 823     struct compound_state *cs)
 824 {
 825         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 826 }
 827
 828 /*
 829  * Check if the security flavor, nfsnum, is in the flavor_list.
 830  */
 831 bool_t
 832 in_flavor_list(int nfsnum, int *flavor_list, int count)
 833 {
 834         int i;
 835
 836         for (i = 0; i < count; i++) {
 837                 if (nfsnum == flavor_list[i])
 838                         return (TRUE);
 839         }
 840         return (FALSE);
 841 }
 842
 843 /*
 844  * Used by rfs4_op_secinfo to get the security information from the
 845  * export structure associated with the component.
 846  */
 847 /* ARGSUSED */
 848 static nfsstat4
 849 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 850 {
 851         int error, different_export = 0;
 852         vnode_t *dvp, *vp;
 853         struct exportinfo *exi = NULL;
 854         fid_t fid;
 855         uint_t count, i;
 856         secinfo4 *resok_val;
 857         struct secinfo *secp;
 858         seconfig_t *si;
 859         bool_t did_traverse = FALSE;
 860         int dotdot, walk;
 861
 862         dvp = cs->vp;
 863         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 864
 865         /*
 866          * If dotdotting, then need to check whether it's above the
 867          * root of a filesystem, or above an export point.
 868          */
 869         if (dotdot) {
 870
 871                 /*
 872                  * If dotdotting at the root of a filesystem, then
 873                  * need to traverse back to the mounted-on filesystem
 874                  * and do the dotdot lookup there.
 875                  */
 876                 if (cs->vp->v_flag & VROOT) {
 877
 878                         /*
 879                          * If at the system root, then can
 880                          * go up no further.
 881                          */
 882                         if (VN_CMP(dvp, rootdir))
 883                                 return (puterrno4(ENOENT));
 884
 885                         /*
 886                          * Traverse back to the mounted-on filesystem
 887                          */
 888                         dvp = untraverse(cs->vp);
 889
 890                         /*
 891                          * Set the different_export flag so we remember
 892                          * to pick up a new exportinfo entry for
 893                          * this new filesystem.
 894                          */
 895                         different_export = 1;
 896                 } else {
 897
 898                         /*
 899                          * If dotdotting above an export point then set
 900                          * the different_export to get new export info.
 901                          */
 902                         different_export = nfs_exported(cs->exi, cs->vp);
 903                 }
 904         }
 905
 906         /*
 907          * Get the vnode for the component "nm".
 908          */
 909         error = fop_lookup(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 910             NULL, NULL, NULL);
 911         if (error)
 912                 return (puterrno4(error));
 913
 914         /*
 915          * If the vnode is in a pseudo filesystem, or if the security flavor
 916          * used in the request is valid but not an explicitly shared flavor,
 917          * or the access bit indicates that this is a limited access,
 918          * check whether this vnode is visible.
 919          */
 920         if (!different_export &&
 921             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 922             cs->access & CS_ACCESS_LIMITED)) {
 923                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 924                         VN_RELE(vp);
 925                         return (puterrno4(ENOENT));
 926                 }
 927         }
 928
 929         /*
 930          * If it's a mountpoint, then traverse it.
 931          */
 932         if (vn_ismntpt(vp)) {
 933                 if ((error = traverse(&vp)) != 0) {
 934                         VN_RELE(vp);
 935                         return (puterrno4(error));
 936                 }
 937                 /* remember that we had to traverse mountpoint */
 938                 did_traverse = TRUE;
 939                 different_export = 1;
 940         } else if (vp->v_vfsp != dvp->v_vfsp) {
 941                 /*
 942                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 943                  * then vp is probably an LOFS object.  We don't need the
 944                  * realvp, we just need to know that we might have crossed
 945                  * a server fs boundary and need to call checkexport4.
 946                  * (LOFS lookup hides server fs mountpoints, and actually calls
 947                  * traverse)
 948                  */
 949                 different_export = 1;
 950         }
 951
 952         /*
 953          * Get the export information for it.
 954          */
 955         if (different_export) {
 956
 957                 bzero(&fid, sizeof (fid));
 958                 fid.fid_len = MAXFIDSZ;
 959                 error = vop_fid_pseudo(vp, &fid);
 960                 if (error) {
 961                         VN_RELE(vp);
 962                         return (puterrno4(error));
 963                 }
 964
 965                 if (dotdot)
 966                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 967                 else
 968                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 969
 970                 if (exi == NULL) {
 971                         if (did_traverse == TRUE) {
 972                                 /*
 973                                  * If this vnode is a mounted-on vnode,
 974                                  * but the mounted-on file system is not
 975                                  * exported, send back the secinfo for
 976                                  * the exported node that the mounted-on
 977                                  * vnode lives in.
 978                                  */
 979                                 exi = cs->exi;
 980                         } else {
 981                                 VN_RELE(vp);
 982                                 return (puterrno4(EACCES));
 983                         }
 984                 }
 985         } else {
 986                 exi = cs->exi;
 987         }
 988         ASSERT(exi != NULL);
 989
 990
 991         /*
 992          * Create the secinfo result based on the security information
 993          * from the exportinfo structure (exi).
 994          *
 995          * Return all flavors for a pseudo node.
 996          * For a real export node, return the flavor that the client
 997          * has access with.
 998          */
 999         ASSERT(RW_LOCK_HELD(&exported_lock));
1000         if (PSEUDO(exi)) {
1001                 count = exi->exi_export.ex_seccnt; /* total sec count */
1002                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1003                 secp = exi->exi_export.ex_secinfo;
1004
1005                 for (i = 0; i < count; i++) {
1006                         si = &secp[i].s_secinfo;
1007                         resok_val[i].flavor = si->sc_rpcnum;
1008                         if (resok_val[i].flavor == RPCSEC_GSS) {
1009                                 rpcsec_gss_info *info;
1010
1011                                 info = &resok_val[i].flavor_info;
1012                                 info->qop = si->sc_qop;
1013                                 info->service = (rpc_gss_svc_t)si->sc_service;
1014
1015                                 /* get oid opaque data */
1016                                 info->oid.sec_oid4_len =
1017                                     si->sc_gss_mech_type->length;
1018                                 info->oid.sec_oid4_val = kmem_alloc(
1019                                     si->sc_gss_mech_type->length, KM_SLEEP);
1020                                 bcopy(
1021                                     si->sc_gss_mech_type->elements,
1022                                     info->oid.sec_oid4_val,
1023                                     info->oid.sec_oid4_len);
1024                         }
1025                 }
1026                 resp->SECINFO4resok_len = count;
1027                 resp->SECINFO4resok_val = resok_val;
1028         } else {
1029                 int ret_cnt = 0, k = 0;
1030                 int *flavor_list;
1031
1032                 count = exi->exi_export.ex_seccnt; /* total sec count */
1033                 secp = exi->exi_export.ex_secinfo;
1034
1035                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1036                 /* find out which flavors to return */
1037                 for (i = 0; i < count; i ++) {
1038                         int access, flavor, perm;
1039
1040                         flavor = secp[i].s_secinfo.sc_nfsnum;
1041                         perm = secp[i].s_flags;
1042
1043                         access = nfsauth4_secinfo_access(exi, cs->req,
1044                             flavor, perm, cs->basecr);
1045
1046                         if (! (access & NFSAUTH_DENIED) &&
1047                             ! (access & NFSAUTH_WRONGSEC)) {
1048                                 flavor_list[ret_cnt] = flavor;
1049                                 ret_cnt++;
1050                         }
1051                 }
1052
1053                 /* Create the returning SECINFO value */
1054                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1055
1056                 for (i = 0; i < count; i++) {
1057                         /*
1058                          * If the flavor is in the flavor list,
1059                          * fill in resok_val.
1060                          */
1061                         si = &secp[i].s_secinfo;
1062                         if (in_flavor_list(si->sc_nfsnum,
1063                             flavor_list, ret_cnt)) {
1064                                 resok_val[k].flavor = si->sc_rpcnum;
1065                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1066                                         rpcsec_gss_info *info;
1067
1068                                         info = &resok_val[k].flavor_info;
1069                                         info->qop = si->sc_qop;
1070                                         info->service = (rpc_gss_svc_t)
1071                                             si->sc_service;
1072
1073                                         /* get oid opaque data */
1074                                         info->oid.sec_oid4_len =
1075                                             si->sc_gss_mech_type->length;
1076                                         info->oid.sec_oid4_val = kmem_alloc(
1077                                             si->sc_gss_mech_type->length,
1078                                             KM_SLEEP);
1079                                         bcopy(si->sc_gss_mech_type->elements,
1080                                             info->oid.sec_oid4_val,
1081                                             info->oid.sec_oid4_len);
1082                                 }
1083                                 k++;
1084                         }
1085                         if (k >= ret_cnt)
1086                                 break;
1087                 }
1088                 resp->SECINFO4resok_len = ret_cnt;
1089                 resp->SECINFO4resok_val = resok_val;
1090                 kmem_free(flavor_list, count * sizeof (int));
1091         }
1092
1093         VN_RELE(vp);
1094         return (NFS4_OK);
1095 }
1096
1097 /*
1098  * SECINFO (Operation 33): Obtain required security information on
1099  * the component name in the format of (security-mechanism-oid, qop, service)
1100  * triplets.
1101  */
1102 /* ARGSUSED */
1103 static void
1104 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1105     struct compound_state *cs)
1106 {
1107         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1108         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1109         utf8string *utfnm = &args->name;
1110         uint_t len;
1111         char *nm;
1112         struct sockaddr *ca;
1113         char *name = NULL;
1114         nfsstat4 status = NFS4_OK;
1115
1116         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1117             SECINFO4args *, args);
1118
1119         /*
1120          * Current file handle (cfh) should have been set before getting
1121          * into this function. If not, return error.
1122          */
1123         if (cs->vp == NULL) {
1124                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1125                 goto out;
1126         }
1127
1128         if (cs->vp->v_type != VDIR) {
1129                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1130                 goto out;
1131         }
1132
1133         /*
1134          * Verify the component name. If failed, error out, but
1135          * do not error out if the component name is a "..".
1136          * SECINFO will return its parents secinfo data for SECINFO "..".
1137          */
1138         status = utf8_dir_verify(utfnm);
1139         if (status != NFS4_OK) {
1140                 if (utfnm->utf8string_len != 2 ||
1141                     utfnm->utf8string_val[0] != '.' ||
1142                     utfnm->utf8string_val[1] != '.') {
1143                         *cs->statusp = resp->status = status;
1144                         goto out;
1145                 }
1146         }
1147
1148         nm = utf8_to_str(utfnm, &len, NULL);
1149         if (nm == NULL) {
1150                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1151                 goto out;
1152         }
1153
1154         if (len > MAXNAMELEN) {
1155                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1156                 kmem_free(nm, len);
1157                 goto out;
1158         }
1159
1160         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1161         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1162             MAXPATHLEN  + 1);
1163
1164         if (name == NULL) {
1165                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1166                 kmem_free(nm, len);
1167                 goto out;
1168         }
1169
1170
1171         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1172
1173         if (name != nm)
1174                 kmem_free(name, MAXPATHLEN + 1);
1175         kmem_free(nm, len);
1176
1177 out:
1178         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1179             SECINFO4res *, resp);
1180 }
1181
1182 /*
1183  * Free SECINFO result.
1184  */
1185 /* ARGSUSED */
1186 static void
1187 rfs4_op_secinfo_free(nfs_resop4 *resop)
1188 {
1189         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1190         int count, i;
1191         secinfo4 *resok_val;
1192
1193         /* If this is not an Ok result, nothing to free. */
1194         if (resp->status != NFS4_OK) {
1195                 return;
1196         }
1197
1198         count = resp->SECINFO4resok_len;
1199         resok_val = resp->SECINFO4resok_val;
1200
1201         for (i = 0; i < count; i++) {
1202                 if (resok_val[i].flavor == RPCSEC_GSS) {
1203                         rpcsec_gss_info *info;
1204
1205                         info = &resok_val[i].flavor_info;
1206                         kmem_free(info->oid.sec_oid4_val,
1207                             info->oid.sec_oid4_len);
1208                 }
1209         }
1210         kmem_free(resok_val, count * sizeof (secinfo4));
1211         resp->SECINFO4resok_len = 0;
1212         resp->SECINFO4resok_val = NULL;
1213 }
1214
1215 /* ARGSUSED */
1216 static void
1217 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1218     struct compound_state *cs)
1219 {
1220         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1221         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1222         int error;
1223         vnode_t *vp;
1224         struct vattr va;
1225         int checkwriteperm;
1226         cred_t *cr = cs->cr;
1227
1228         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1229             ACCESS4args *, args);
1230
1231 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1232         if (cs->access == CS_ACCESS_DENIED) {
1233                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1234                 goto out;
1235         }
1236 #endif
1237         if (cs->vp == NULL) {
1238                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1239                 goto out;
1240         }
1241
1242         ASSERT(cr != NULL);
1243
1244         vp = cs->vp;
1245
1246         /*
1247          * If the file system is exported read only, it is not appropriate
1248          * to check write permissions for regular files and directories.
1249          * Special files are interpreted by the client, so the underlying
1250          * permissions are sent back to the client for interpretation.
1251          */
1252         if (rdonly4(req, cs) &&
1253             (vp->v_type == VREG || vp->v_type == VDIR))
1254                 checkwriteperm = 0;
1255         else
1256                 checkwriteperm = 1;
1257
1258         /*
1259          * XXX
1260          * We need the mode so that we can correctly determine access
1261          * permissions relative to a mandatory lock file.  Access to
1262          * mandatory lock files is denied on the server, so it might
1263          * as well be reflected to the server during the open.
1264          */
1265         va.va_mask = VATTR_MODE;
1266         error = fop_getattr(vp, &va, 0, cr, NULL);
1267         if (error) {
1268                 *cs->statusp = resp->status = puterrno4(error);
1269                 goto out;
1270         }
1271         resp->access = 0;
1272         resp->supported = 0;
1273
1274         if (args->access & ACCESS4_READ) {
1275                 error = fop_access(vp, VREAD, 0, cr, NULL);
1276                 if (!error && !MANDLOCK(vp, va.va_mode))
1277                         resp->access |= ACCESS4_READ;
1278                 resp->supported |= ACCESS4_READ;
1279         }
1280         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1281                 error = fop_access(vp, VEXEC, 0, cr, NULL);
1282                 if (!error)
1283                         resp->access |= ACCESS4_LOOKUP;
1284                 resp->supported |= ACCESS4_LOOKUP;
1285         }
1286         if (checkwriteperm &&
1287             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1288                 error = fop_access(vp, VWRITE, 0, cr, NULL);
1289                 if (!error && !MANDLOCK(vp, va.va_mode))
1290                         resp->access |=
1291                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1292                 resp->supported |=
1293                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1294         }
1295
1296         if (checkwriteperm &&
1297             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1298                 error = fop_access(vp, VWRITE, 0, cr, NULL);
1299                 if (!error)
1300                         resp->access |= ACCESS4_DELETE;
1301                 resp->supported |= ACCESS4_DELETE;
1302         }
1303         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1304                 error = fop_access(vp, VEXEC, 0, cr, NULL);
1305                 if (!error && !MANDLOCK(vp, va.va_mode))
1306                         resp->access |= ACCESS4_EXECUTE;
1307                 resp->supported |= ACCESS4_EXECUTE;
1308         }
1309
1310         *cs->statusp = resp->status = NFS4_OK;
1311 out:
1312         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1313             ACCESS4res *, resp);
1314 }
1315
1316 /* ARGSUSED */
1317 static void
1318 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1319     struct compound_state *cs)
1320 {
1321         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1322         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1323         int error;
1324         vnode_t *vp = cs->vp;
1325         cred_t *cr = cs->cr;
1326         vattr_t va;
1327
1328         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1329             COMMIT4args *, args);
1330
1331         if (vp == NULL) {
1332                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1333                 goto out;
1334         }
1335         if (cs->access == CS_ACCESS_DENIED) {
1336                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1337                 goto out;
1338         }
1339
1340         if (args->offset + args->count < args->offset) {
1341                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1342                 goto out;
1343         }
1344
1345         va.va_mask = VATTR_UID;
1346         error = fop_getattr(vp, &va, 0, cr, NULL);
1347
1348         /*
1349          * If we can't get the attributes, then we can't do the
1350          * right access checking.  So, we'll fail the request.
1351          */
1352         if (error) {
1353                 *cs->statusp = resp->status = puterrno4(error);
1354                 goto out;
1355         }
1356         if (rdonly4(req, cs)) {
1357                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1358                 goto out;
1359         }
1360
1361         if (vp->v_type != VREG) {
1362                 if (vp->v_type == VDIR)
1363                         resp->status = NFS4ERR_ISDIR;
1364                 else
1365                         resp->status = NFS4ERR_INVAL;
1366                 *cs->statusp = resp->status;
1367                 goto out;
1368         }
1369
1370         if (crgetuid(cr) != va.va_uid &&
1371             (error = fop_access(vp, VWRITE, 0, cs->cr, NULL))) {
1372                 *cs->statusp = resp->status = puterrno4(error);
1373                 goto out;
1374         }
1375
1376         error = fop_fsync(vp, FSYNC, cr, NULL);
1377
1378         if (error) {
1379                 *cs->statusp = resp->status = puterrno4(error);
1380                 goto out;
1381         }
1382
1383         *cs->statusp = resp->status = NFS4_OK;
1384         resp->writeverf = Write4verf;
1385 out:
1386         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1387             COMMIT4res *, resp);
1388 }
1389
1390 /*
1391  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1392  * was completed. It does the nfsv4 create for special files.
1393  */
1394 /* ARGSUSED */
1395 static vnode_t *
1396 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1397     struct compound_state *cs, vattr_t *vap, char *nm)
1398 {
1399         int error;
1400         cred_t *cr = cs->cr;
1401         vnode_t *dvp = cs->vp;
1402         vnode_t *vp = NULL;
1403         int mode;
1404         enum vcexcl excl;
1405
1406         switch (args->type) {
1407         case NF4CHR:
1408         case NF4BLK:
1409                 if (secpolicy_sys_devices(cr) != 0) {
1410                         *cs->statusp = resp->status = NFS4ERR_PERM;
1411                         return (NULL);
1412                 }
1413                 if (args->type == NF4CHR)
1414                         vap->va_type = VCHR;
1415                 else
1416                         vap->va_type = VBLK;
1417                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1418                     args->ftype4_u.devdata.specdata2);
1419                 vap->va_mask |= VATTR_RDEV;
1420                 break;
1421         case NF4SOCK:
1422                 vap->va_type = VSOCK;
1423                 break;
1424         case NF4FIFO:
1425                 vap->va_type = VFIFO;
1426                 break;
1427         default:
1428                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1429                 return (NULL);
1430         }
1431
1432         /*
1433          * Must specify the mode.
1434          */
1435         if (!(vap->va_mask & VATTR_MODE)) {
1436                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1437                 return (NULL);
1438         }
1439
1440         excl = EXCL;
1441
1442         mode = 0;
1443
1444         error = fop_create(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1445         if (error) {
1446                 *cs->statusp = resp->status = puterrno4(error);
1447                 return (NULL);
1448         }
1449         return (vp);
1450 }
1451
1452 /*
1453  * nfsv4 create is used to create non-regular files. For regular files,
1454  * use nfsv4 open.
1455  */
1456 /* ARGSUSED */
1457 static void
1458 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1459     struct compound_state *cs)
1460 {
1461         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1462         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1463         int error;
1464         struct vattr bva, iva, iva2, ava, *vap;
1465         cred_t *cr = cs->cr;
1466         vnode_t *dvp = cs->vp;
1467         vnode_t *vp = NULL;
1468         vnode_t *realvp;
1469         char *nm, *lnm;
1470         uint_t len, llen;
1471         int syncval = 0;
1472         struct nfs4_svgetit_arg sarg;
1473         struct nfs4_ntov_table ntov;
1474         struct statvfs64 sb;
1475         nfsstat4 status;
1476         struct sockaddr *ca;
1477         char *name = NULL;
1478         char *lname = NULL;
1479
1480         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1481             CREATE4args *, args);
1482
1483         resp->attrset = 0;
1484
1485         if (dvp == NULL) {
1486                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1487                 goto out;
1488         }
1489
1490         /*
1491          * If there is an unshared filesystem mounted on this vnode,
1492          * do not allow to create an object in this directory.
1493          */
1494         if (vn_ismntpt(dvp)) {
1495                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1496                 goto out;
1497         }
1498
1499         /* Verify that type is correct */
1500         switch (args->type) {
1501         case NF4LNK:
1502         case NF4BLK:
1503         case NF4CHR:
1504         case NF4SOCK:
1505         case NF4FIFO:
1506         case NF4DIR:
1507                 break;
1508         default:
1509                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1510                 goto out;
1511         };
1512
1513         if (cs->access == CS_ACCESS_DENIED) {
1514                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1515                 goto out;
1516         }
1517         if (dvp->v_type != VDIR) {
1518                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1519                 goto out;
1520         }
1521         status = utf8_dir_verify(&args->objname);
1522         if (status != NFS4_OK) {
1523                 *cs->statusp = resp->status = status;
1524                 goto out;
1525         }
1526
1527         if (rdonly4(req, cs)) {
1528                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1529                 goto out;
1530         }
1531
1532         /*
1533          * Name of newly created object
1534          */
1535         nm = utf8_to_fn(&args->objname, &len, NULL);
1536         if (nm == NULL) {
1537                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1538                 goto out;
1539         }
1540
1541         if (len > MAXNAMELEN) {
1542                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1543                 kmem_free(nm, len);
1544                 goto out;
1545         }
1546
1547         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1548         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1549             MAXPATHLEN  + 1);
1550
1551         if (name == NULL) {
1552                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1553                 kmem_free(nm, len);
1554                 goto out;
1555         }
1556
1557         resp->attrset = 0;
1558
1559         sarg.sbp = &sb;
1560         sarg.is_referral = B_FALSE;
1561         nfs4_ntov_table_init(&ntov);
1562
1563         status = do_rfs4_set_attrs(&resp->attrset,
1564             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1565
1566         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1567                 status = NFS4ERR_INVAL;
1568
1569         if (status != NFS4_OK) {
1570                 *cs->statusp = resp->status = status;
1571                 if (name != nm)
1572                         kmem_free(name, MAXPATHLEN + 1);
1573                 kmem_free(nm, len);
1574                 nfs4_ntov_table_free(&ntov, &sarg);
1575                 resp->attrset = 0;
1576                 goto out;
1577         }
1578
1579         /* Get "before" change value */
1580         bva.va_mask = VATTR_CTIME|VATTR_SEQ|VATTR_MODE;
1581         error = fop_getattr(dvp, &bva, 0, cr, NULL);
1582         if (error) {
1583                 *cs->statusp = resp->status = puterrno4(error);
1584                 if (name != nm)
1585                         kmem_free(name, MAXPATHLEN + 1);
1586                 kmem_free(nm, len);
1587                 nfs4_ntov_table_free(&ntov, &sarg);
1588                 resp->attrset = 0;
1589                 goto out;
1590         }
1591         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1592
1593         vap = sarg.vap;
1594
1595         /*
1596          * Set the default initial values for attributes when the parent
1597          * directory does not have the VSUID/VSGID bit set and they have
1598          * not been specified in createattrs.
1599          */
1600         if (!(bva.va_mode & VSUID) && (vap->va_mask & VATTR_UID) == 0) {
1601                 vap->va_uid = crgetuid(cr);
1602                 vap->va_mask |= VATTR_UID;
1603         }
1604         if (!(bva.va_mode & VSGID) && (vap->va_mask & VATTR_GID) == 0) {
1605                 vap->va_gid = crgetgid(cr);
1606                 vap->va_mask |= VATTR_GID;
1607         }
1608
1609         vap->va_mask |= VATTR_TYPE;
1610         switch (args->type) {
1611         case NF4DIR:
1612                 vap->va_type = VDIR;
1613                 if ((vap->va_mask & VATTR_MODE) == 0) {
1614                         vap->va_mode = 0700;    /* default: owner rwx only */
1615                         vap->va_mask |= VATTR_MODE;
1616                 }
1617                 error = fop_mkdir(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1618                 if (error)
1619                         break;
1620
1621                 /*
1622                  * Get the initial "after" sequence number, if it fails,
1623                  * set to zero
1624                  */
1625                 iva.va_mask = VATTR_SEQ;
1626                 if (fop_getattr(dvp, &iva, 0, cs->cr, NULL))
1627                         iva.va_seq = 0;
1628                 break;
1629         case NF4LNK:
1630                 vap->va_type = VLNK;
1631                 if ((vap->va_mask & VATTR_MODE) == 0) {
1632                         vap->va_mode = 0700;    /* default: owner rwx only */
1633                         vap->va_mask |= VATTR_MODE;
1634                 }
1635
1636                 /*
1637                  * symlink names must be treated as data
1638                  */
1639                 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1640                     &llen, NULL);
1641
1642                 if (lnm == NULL) {
1643                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1644                         if (name != nm)
1645                                 kmem_free(name, MAXPATHLEN + 1);
1646                         kmem_free(nm, len);
1647                         nfs4_ntov_table_free(&ntov, &sarg);
1648                         resp->attrset = 0;
1649                         goto out;
1650                 }
1651
1652                 if (llen > MAXPATHLEN) {
1653                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1654                         if (name != nm)
1655                                 kmem_free(name, MAXPATHLEN + 1);
1656                         kmem_free(nm, len);
1657                         kmem_free(lnm, llen);
1658                         nfs4_ntov_table_free(&ntov, &sarg);
1659                         resp->attrset = 0;
1660                         goto out;
1661                 }
1662
1663                 lname = nfscmd_convname(ca, cs->exi, lnm,
1664                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1665
1666                 if (lname == NULL) {
1667                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1668                         if (name != nm)
1669                                 kmem_free(name, MAXPATHLEN + 1);
1670                         kmem_free(nm, len);
1671                         kmem_free(lnm, llen);
1672                         nfs4_ntov_table_free(&ntov, &sarg);
1673                         resp->attrset = 0;
1674                         goto out;
1675                 }
1676
1677                 error = fop_symlink(dvp, name, vap, lname, cr, NULL, 0);
1678                 if (lname != lnm)
1679                         kmem_free(lname, MAXPATHLEN + 1);
1680                 kmem_free(lnm, llen);
1681                 if (error)
1682                         break;
1683
1684                 /*
1685                  * Get the initial "after" sequence number, if it fails,
1686                  * set to zero
1687                  */
1688                 iva.va_mask = VATTR_SEQ;
1689                 if (fop_getattr(dvp, &iva, 0, cs->cr, NULL))
1690                         iva.va_seq = 0;
1691
1692                 error = fop_lookup(dvp, name, &vp, NULL, 0, NULL, cr,
1693                     NULL, NULL, NULL);
1694                 if (error)
1695                         break;
1696
1697                 /*
1698                  * va_seq is not safe over VOP calls, check it again
1699                  * if it has changed zero out iva to force atomic = FALSE.
1700                  */
1701                 iva2.va_mask = VATTR_SEQ;
1702                 if (fop_getattr(dvp, &iva2, 0, cs->cr, NULL) ||
1703                     iva2.va_seq != iva.va_seq)
1704                         iva.va_seq = 0;
1705                 break;
1706         default:
1707                 /*
1708                  * probably a special file.
1709                  */
1710                 if ((vap->va_mask & VATTR_MODE) == 0) {
1711                         vap->va_mode = 0600;    /* default: owner rw only */
1712                         vap->va_mask |= VATTR_MODE;
1713                 }
1714                 syncval = FNODSYNC;
1715                 /*
1716                  * We know this will only generate one VOP call
1717                  */
1718                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1719
1720                 if (vp == NULL) {
1721                         if (name != nm)
1722                                 kmem_free(name, MAXPATHLEN + 1);
1723                         kmem_free(nm, len);
1724                         nfs4_ntov_table_free(&ntov, &sarg);
1725                         resp->attrset = 0;
1726                         goto out;
1727                 }
1728
1729                 /*
1730                  * Get the initial "after" sequence number, if it fails,
1731                  * set to zero
1732                  */
1733                 iva.va_mask = VATTR_SEQ;
1734                 if (fop_getattr(dvp, &iva, 0, cs->cr, NULL))
1735                         iva.va_seq = 0;
1736
1737                 break;
1738         }
1739         if (name != nm)
1740                 kmem_free(name, MAXPATHLEN + 1);
1741         kmem_free(nm, len);
1742
1743         if (error) {
1744                 *cs->statusp = resp->status = puterrno4(error);
1745         }
1746
1747         /*
1748          * Force modified data and metadata out to stable storage.
1749          */
1750         (void) fop_fsync(dvp, 0, cr, NULL);
1751
1752         if (resp->status != NFS4_OK) {
1753                 if (vp != NULL)
1754                         VN_RELE(vp);
1755                 nfs4_ntov_table_free(&ntov, &sarg);
1756                 resp->attrset = 0;
1757                 goto out;
1758         }
1759
1760         /*
1761          * Finish setup of cinfo response, "before" value already set.
1762          * Get "after" change value, if it fails, simply return the
1763          * before value.
1764          */
1765         ava.va_mask = VATTR_CTIME|VATTR_SEQ;
1766         if (fop_getattr(dvp, &ava, 0, cr, NULL)) {
1767                 ava.va_ctime = bva.va_ctime;
1768                 ava.va_seq = 0;
1769         }
1770         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1771
1772         /*
1773          * True verification that object was created with correct
1774          * attrs is impossible.  The attrs could have been changed
1775          * immediately after object creation.  If attributes did
1776          * not verify, the only recourse for the server is to
1777          * destroy the object.  Maybe if some attrs (like gid)
1778          * are set incorrectly, the object should be destroyed;
1779          * however, seems bad as a default policy.  Do we really
1780          * want to destroy an object over one of the times not
1781          * verifying correctly?  For these reasons, the server
1782          * currently sets bits in attrset for createattrs
1783          * that were set; however, no verification is done.
1784          *
1785          * vmask_to_nmask accounts for vattr bits set on create
1786          *      [do_rfs4_set_attrs() only sets resp bits for
1787          *       non-vattr/vfs bits.]
1788          * Mask off any bits set by default so as not to return
1789          * more attrset bits than were requested in createattrs
1790          */
1791         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1792         resp->attrset &= args->createattrs.attrmask;
1793         nfs4_ntov_table_free(&ntov, &sarg);
1794
1795         error = makefh4(&cs->fh, vp, cs->exi);
1796         if (error) {
1797                 *cs->statusp = resp->status = puterrno4(error);
1798         }
1799
1800         /*
1801          * The cinfo.atomic = TRUE only if we got no errors, we have
1802          * non-zero va_seq's, and it has incremented by exactly one
1803          * during the creation and it didn't change during the fop_lookup
1804          * or fop_fsync.
1805          */
1806         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1807             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1808                 resp->cinfo.atomic = TRUE;
1809         else
1810                 resp->cinfo.atomic = FALSE;
1811
1812         /*
1813          * Force modified metadata out to stable storage.
1814          *
1815          * if a underlying vp exists, pass it to fop_fsync
1816          */
1817         if (fop_realvp(vp, &realvp, NULL) == 0)
1818                 (void) fop_fsync(realvp, syncval, cr, NULL);
1819         else
1820                 (void) fop_fsync(vp, syncval, cr, NULL);
1821
1822         if (resp->status != NFS4_OK) {
1823                 VN_RELE(vp);
1824                 goto out;
1825         }
1826         if (cs->vp)
1827                 VN_RELE(cs->vp);
1828
1829         cs->vp = vp;
1830         *cs->statusp = resp->status = NFS4_OK;
1831 out:
1832         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1833             CREATE4res *, resp);
1834 }
1835
1836 /*ARGSUSED*/
1837 static void
1838 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1839     struct compound_state *cs)
1840 {
1841         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1842             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1843
1844         rfs4_op_inval(argop, resop, req, cs);
1845
1846         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1847             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1848 }
1849
1850 /*ARGSUSED*/
1851 static void
1852 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1853     struct compound_state *cs)
1854 {
1855         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1856         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1857         rfs4_deleg_state_t *dsp;
1858         nfsstat4 status;
1859
1860         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1861             DELEGRETURN4args *, args);
1862
1863         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1864         resp->status = *cs->statusp = status;
1865         if (status != NFS4_OK)
1866                 goto out;
1867
1868         /* Ensure specified filehandle matches */
1869         if (cs->vp != dsp->rds_finfo->rf_vp) {
1870                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1871         } else
1872                 rfs4_return_deleg(dsp, FALSE);
1873
1874         rfs4_update_lease(dsp->rds_client);
1875
1876         rfs4_deleg_state_rele(dsp);
1877 out:
1878         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1879             DELEGRETURN4res *, resp);
1880 }
1881
1882 /*
1883  * Check to see if a given "flavor" is an explicitly shared flavor.
1884  * The assumption of this routine is the "flavor" is already a valid
1885  * flavor in the secinfo list of "exi".
1886  *
1887  *      e.g.
1888  *              # share -o sec=flavor1 /export
1889  *              # share -o sec=flavor2 /export/home
1890  *
1891  *              flavor2 is not an explicitly shared flavor for /export,
1892  *              however it is in the secinfo list for /export thru the
1893  *              server namespace setup.
1894  */
1895 int
1896 is_exported_sec(int flavor, struct exportinfo *exi)
1897 {
1898         int     i;
1899         struct secinfo *sp;
1900
1901         sp = exi->exi_export.ex_secinfo;
1902         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1903                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1904                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1905                         return (SEC_REF_EXPORTED(&sp[i]));
1906                 }
1907         }
1908
1909         /* Should not reach this point based on the assumption */
1910         return (0);
1911 }
1912
1913 /*
1914  * Check if the security flavor used in the request matches what is
1915  * required at the export point or at the root pseudo node (exi_root).
1916  *
1917  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1918  *
1919  */
1920 static int
1921 secinfo_match_or_authnone(struct compound_state *cs)
1922 {
1923         int     i;
1924         struct secinfo *sp;
1925
1926         /*
1927          * Check cs->nfsflavor (from the request) against
1928          * the current export data in cs->exi.
1929          */
1930         sp = cs->exi->exi_export.ex_secinfo;
1931         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1932                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1933                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1934                         return (1);
1935         }
1936
1937         return (0);
1938 }
1939
1940 /*
1941  * Check the access authority for the client and return the correct error.
1942  */
1943 nfsstat4
1944 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1945 {
1946         int     authres;
1947
1948         /*
1949          * First, check if the security flavor used in the request
1950          * are among the flavors set in the server namespace.
1951          */
1952         if (!secinfo_match_or_authnone(cs)) {
1953                 *cs->statusp = NFS4ERR_WRONGSEC;
1954                 return (*cs->statusp);
1955         }
1956
1957         authres = checkauth4(cs, req);
1958
1959         if (authres > 0) {
1960                 *cs->statusp = NFS4_OK;
1961                 if (! (cs->access & CS_ACCESS_LIMITED))
1962                         cs->access = CS_ACCESS_OK;
1963         } else if (authres == 0) {
1964                 *cs->statusp = NFS4ERR_ACCESS;
1965         } else if (authres == -2) {
1966                 *cs->statusp = NFS4ERR_WRONGSEC;
1967         } else {
1968                 *cs->statusp = NFS4ERR_DELAY;
1969         }
1970         return (*cs->statusp);
1971 }
1972
1973 /*
1974  * bitmap4_to_attrmask is called by getattr and readdir.
1975  * It sets up the vattr mask and determines whether vfsstat call is needed
1976  * based on the input bitmap.
1977  * Returns nfsv4 status.
1978  */
1979 static nfsstat4
1980 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1981 {
1982         int i;
1983         uint_t  va_mask;
1984         struct statvfs64 *sbp = sargp->sbp;
1985
1986         sargp->sbp = NULL;
1987         sargp->flag = 0;
1988         sargp->rdattr_error = NFS4_OK;
1989         sargp->mntdfid_set = FALSE;
1990         if (sargp->cs->vp)
1991                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1992                     FH4_ATTRDIR | FH4_NAMEDATTR);
1993         else
1994                 sargp->xattr = 0;
1995
1996         /*
1997          * Set rdattr_error_req to true if return error per
1998          * failed entry rather than fail the readdir.
1999          */
2000         if (breq & FATTR4_RDATTR_ERROR_MASK)
2001                 sargp->rdattr_error_req = 1;
2002         else
2003                 sargp->rdattr_error_req = 0;
2004
2005         /*
2006          * generate the va_mask
2007          * Handle the easy cases first
2008          */
2009         switch (breq) {
2010         case NFS4_NTOV_ATTR_MASK:
2011                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2012                 return (NFS4_OK);
2013
2014         case NFS4_FS_ATTR_MASK:
2015                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2016                 sargp->sbp = sbp;
2017                 return (NFS4_OK);
2018
2019         case NFS4_NTOV_ATTR_CACHE_MASK:
2020                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2021                 return (NFS4_OK);
2022
2023         case FATTR4_LEASE_TIME_MASK:
2024                 sargp->vap->va_mask = 0;
2025                 return (NFS4_OK);
2026
2027         default:
2028                 va_mask = 0;
2029                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2030                         if ((breq & nfs4_ntov_map[i].fbit) &&
2031                             nfs4_ntov_map[i].vbit)
2032                                 va_mask |= nfs4_ntov_map[i].vbit;
2033                 }
2034
2035                 /*
2036                  * Check is vfsstat is needed
2037                  */
2038                 if (breq & NFS4_FS_ATTR_MASK)
2039                         sargp->sbp = sbp;
2040
2041                 sargp->vap->va_mask = va_mask;
2042                 return (NFS4_OK);
2043         }
2044         /* NOTREACHED */
2045 }
2046
2047 /*
2048  * bitmap4_get_sysattrs is called by getattr and readdir.
2049  * It calls both fop_getattr and VFS_STATVFS calls to get the attrs.
2050  * Returns nfsv4 status.
2051  */
2052 static nfsstat4
2053 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2054 {
2055         int error;
2056         struct compound_state *cs = sargp->cs;
2057         vnode_t *vp = cs->vp;
2058
2059         if (sargp->sbp != NULL) {
2060                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2061                         sargp->sbp = NULL;      /* to identify error */
2062                         return (puterrno4(error));
2063                 }
2064         }
2065
2066         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2067 }
2068
2069 static void
2070 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2071 {
2072         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2073             KM_SLEEP);
2074         ntovp->attrcnt = 0;
2075         ntovp->vfsstat = FALSE;
2076 }
2077
2078 static void
2079 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2080     struct nfs4_svgetit_arg *sargp)
2081 {
2082         int i;
2083         union nfs4_attr_u *na;
2084         uint8_t *amap;
2085
2086         /*
2087          * XXX Should do the same checks for whether the bit is set
2088          */
2089         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2090             i < ntovp->attrcnt; i++, na++, amap++) {
2091                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2092                     NFS4ATTR_FREEIT, sargp, na);
2093         }
2094         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2095                 /*
2096                  * xdr_free for getattr will be done later
2097                  */
2098                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2099                     i < ntovp->attrcnt; i++, na++, amap++) {
2100                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2101                 }
2102         }
2103         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2104 }
2105
2106 /*
2107  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2108  */
2109 static nfsstat4
2110 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2111     struct nfs4_svgetit_arg *sargp)
2112 {
2113         int error = 0;
2114         int i, k;
2115         struct nfs4_ntov_table ntov;
2116         XDR xdr;
2117         ulong_t xdr_size;
2118         char *xdr_attrs;
2119         nfsstat4 status = NFS4_OK;
2120         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2121         union nfs4_attr_u *na;
2122         uint8_t *amap;
2123
2124         sargp->op = NFS4ATTR_GETIT;
2125         sargp->flag = 0;
2126
2127         fattrp->attrmask = 0;
2128         /* if no bits requested, then return empty fattr4 */
2129         if (breq == 0) {
2130                 fattrp->attrlist4_len = 0;
2131                 fattrp->attrlist4 = NULL;
2132                 return (NFS4_OK);
2133         }
2134
2135         /*
2136          * return NFS4ERR_INVAL when client requests write-only attrs
2137          */
2138         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2139                 return (NFS4ERR_INVAL);
2140
2141         nfs4_ntov_table_init(&ntov);
2142         na = ntov.na;
2143         amap = ntov.amap;
2144
2145         /*
2146          * Now loop to get or verify the attrs
2147          */
2148         for (i = 0; i < nfs4_ntov_map_size; i++) {
2149                 if (breq & nfs4_ntov_map[i].fbit) {
2150                         if ((*nfs4_ntov_map[i].sv_getit)(
2151                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2152
2153                                 error = (*nfs4_ntov_map[i].sv_getit)(
2154                                     NFS4ATTR_GETIT, sargp, na);
2155
2156                                 /*
2157                                  * Possible error values:
2158                                  * >0 if sv_getit failed to
2159                                  * get the attr; 0 if succeeded;
2160                                  * <0 if rdattr_error and the
2161                                  * attribute cannot be returned.
2162                                  */
2163                                 if (error && !(sargp->rdattr_error_req))
2164                                         goto done;
2165                                 /*
2166                                  * If error then just for entry
2167                                  */
2168                                 if (error == 0) {
2169                                         fattrp->attrmask |=
2170                                             nfs4_ntov_map[i].fbit;
2171                                         *amap++ =
2172                                             (uint8_t)nfs4_ntov_map[i].nval;
2173                                         na++;
2174                                         (ntov.attrcnt)++;
2175                                 } else if ((error > 0) &&
2176                                     (sargp->rdattr_error == NFS4_OK)) {
2177                                         sargp->rdattr_error = puterrno4(error);
2178                                 }
2179                                 error = 0;
2180                         }
2181                 }
2182         }
2183
2184         /*
2185          * If rdattr_error was set after the return value for it was assigned,
2186          * update it.
2187          */
2188         if (prev_rdattr_error != sargp->rdattr_error) {
2189                 na = ntov.na;
2190                 amap = ntov.amap;
2191                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2192                         k = *amap;
2193                         if (k < FATTR4_RDATTR_ERROR) {
2194                                 continue;
2195                         }
2196                         if ((k == FATTR4_RDATTR_ERROR) &&
2197                             ((*nfs4_ntov_map[k].sv_getit)(
2198                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2199
2200                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2201                                     NFS4ATTR_GETIT, sargp, na);
2202                         }
2203                         break;
2204                 }
2205         }
2206
2207         xdr_size = 0;
2208         na = ntov.na;
2209         amap = ntov.amap;
2210         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2211                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2212         }
2213
2214         fattrp->attrlist4_len = xdr_size;
2215         if (xdr_size) {
2216                 /* freed by rfs4_op_getattr_free() */
2217                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2218
2219                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2220
2221                 na = ntov.na;
2222                 amap = ntov.amap;
2223                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2224                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2225                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2226                                     int, *amap);
2227                                 status = NFS4ERR_SERVERFAULT;
2228                                 break;
2229                         }
2230                 }
2231                 /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2232         } else {
2233                 fattrp->attrlist4 = NULL;
2234         }
2235 done:
2236
2237         nfs4_ntov_table_free(&ntov, sargp);
2238
2239         if (error != 0)
2240                 status = puterrno4(error);
2241
2242         return (status);
2243 }
2244
2245 /* ARGSUSED */
2246 static void
2247 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2248     struct compound_state *cs)
2249 {
2250         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2251         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2252         struct nfs4_svgetit_arg sarg;
2253         struct statvfs64 sb;
2254         nfsstat4 status;
2255
2256         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2257             GETATTR4args *, args);
2258
2259         if (cs->vp == NULL) {
2260                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2261                 goto out;
2262         }
2263
2264         if (cs->access == CS_ACCESS_DENIED) {
2265                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2266                 goto out;
2267         }
2268
2269         sarg.sbp = &sb;
2270         sarg.cs = cs;
2271         sarg.is_referral = B_FALSE;
2272
2273         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2274         if (status == NFS4_OK) {
2275
2276                 status = bitmap4_get_sysattrs(&sarg);
2277                 if (status == NFS4_OK) {
2278
2279                         /* Is this a referral? */
2280                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2281                                 /* Older V4 Solaris client sees a link */
2282                                 if (client_is_downrev(req))
2283                                         sarg.vap->va_type = VLNK;
2284                                 else
2285                                         sarg.is_referral = B_TRUE;
2286                         }
2287
2288                         status = do_rfs4_op_getattr(args->attr_request,
2289                             &resp->obj_attributes, &sarg);
2290                 }
2291         }
2292         *cs->statusp = resp->status = status;
2293 out:
2294         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2295             GETATTR4res *, resp);
2296 }
2297
2298 static void
2299 rfs4_op_getattr_free(nfs_resop4 *resop)
2300 {
2301         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2302
2303         nfs4_fattr4_free(&resp->obj_attributes);
2304 }
2305
2306 /* ARGSUSED */
2307 static void
2308 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2309     struct compound_state *cs)
2310 {
2311         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2312
2313         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2314
2315         if (cs->vp == NULL) {
2316                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317                 goto out;
2318         }
2319         if (cs->access == CS_ACCESS_DENIED) {
2320                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2321                 goto out;
2322         }
2323
2324         /* check for reparse point at the share point */
2325         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2326                 /* it's all bad */
2327                 cs->exi->exi_moved = 1;
2328                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2329                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2330                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2331                 return;
2332         }
2333
2334         /* check for reparse point at vp */
2335         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2336                 /* it's not all bad */
2337                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2338                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2339                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2340                 return;
2341         }
2342
2343         resp->object.nfs_fh4_val =
2344             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2345         nfs_fh4_copy(&cs->fh, &resp->object);
2346         *cs->statusp = resp->status = NFS4_OK;
2347 out:
2348         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2349             GETFH4res *, resp);
2350 }
2351
2352 static void
2353 rfs4_op_getfh_free(nfs_resop4 *resop)
2354 {
2355         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2356
2357         if (resp->status == NFS4_OK &&
2358             resp->object.nfs_fh4_val != NULL) {
2359                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2360                 resp->object.nfs_fh4_val = NULL;
2361                 resp->object.nfs_fh4_len = 0;
2362         }
2363 }
2364
2365 /*
2366  * illegal: args: void
2367  *          res : status (NFS4ERR_OP_ILLEGAL)
2368  */
2369 /* ARGSUSED */
2370 static void
2371 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2372     struct svc_req *req, struct compound_state *cs)
2373 {
2374         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2375
2376         resop->resop = OP_ILLEGAL;
2377         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2378 }
2379
2380 /*
2381  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2382  *       res: status. If success - CURRENT_FH unchanged, return change_info
2383  */
2384 /* ARGSUSED */
2385 static void
2386 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2387     struct compound_state *cs)
2388 {
2389         LINK4args *args = &argop->nfs_argop4_u.oplink;
2390         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2391         int error;
2392         vnode_t *vp;
2393         vnode_t *dvp;
2394         struct vattr bdva, idva, adva;
2395         char *nm;
2396         uint_t  len;
2397         struct sockaddr *ca;
2398         char *name = NULL;
2399         nfsstat4 status;
2400
2401         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2402             LINK4args *, args);
2403
2404         /* SAVED_FH: source object */
2405         vp = cs->saved_vp;
2406         if (vp == NULL) {
2407                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2408                 goto out;
2409         }
2410
2411         /* CURRENT_FH: target directory */
2412         dvp = cs->vp;
2413         if (dvp == NULL) {
2414                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2415                 goto out;
2416         }
2417
2418         /*
2419          * If there is a non-shared filesystem mounted on this vnode,
2420          * do not allow to link any file in this directory.
2421          */
2422         if (vn_ismntpt(dvp)) {
2423                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2424                 goto out;
2425         }
2426
2427         if (cs->access == CS_ACCESS_DENIED) {
2428                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2429                 goto out;
2430         }
2431
2432         /* Check source object's type validity */
2433         if (vp->v_type == VDIR) {
2434                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2435                 goto out;
2436         }
2437
2438         /* Check target directory's type */
2439         if (dvp->v_type != VDIR) {
2440                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2441                 goto out;
2442         }
2443
2444         if (cs->saved_exi != cs->exi) {
2445                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2446                 goto out;
2447         }
2448
2449         status = utf8_dir_verify(&args->newname);
2450         if (status != NFS4_OK) {
2451                 *cs->statusp = resp->status = status;
2452                 goto out;
2453         }
2454
2455         nm = utf8_to_fn(&args->newname, &len, NULL);
2456         if (nm == NULL) {
2457                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2458                 goto out;
2459         }
2460
2461         if (len > MAXNAMELEN) {
2462                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2463                 kmem_free(nm, len);
2464                 goto out;
2465         }
2466
2467         if (rdonly4(req, cs)) {
2468                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2469                 kmem_free(nm, len);
2470                 goto out;
2471         }
2472
2473         /* Get "before" change value */
2474         bdva.va_mask = VATTR_CTIME|VATTR_SEQ;
2475         error = fop_getattr(dvp, &bdva, 0, cs->cr, NULL);
2476         if (error) {
2477                 *cs->statusp = resp->status = puterrno4(error);
2478                 kmem_free(nm, len);
2479                 goto out;
2480         }
2481
2482         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2483         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2484             MAXPATHLEN  + 1);
2485
2486         if (name == NULL) {
2487                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2488                 kmem_free(nm, len);
2489                 goto out;
2490         }
2491
2492         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2493
2494         error = fop_link(dvp, vp, name, cs->cr, NULL, 0);
2495
2496         if (nm != name)
2497                 kmem_free(name, MAXPATHLEN + 1);
2498         kmem_free(nm, len);
2499
2500         /*
2501          * Get the initial "after" sequence number, if it fails, set to zero
2502          */
2503         idva.va_mask = VATTR_SEQ;
2504         if (fop_getattr(dvp, &idva, 0, cs->cr, NULL))
2505                 idva.va_seq = 0;
2506
2507         /*
2508          * Force modified data and metadata out to stable storage.
2509          */
2510         (void) fop_fsync(vp, FNODSYNC, cs->cr, NULL);
2511         (void) fop_fsync(dvp, 0, cs->cr, NULL);
2512
2513         if (error) {
2514                 *cs->statusp = resp->status = puterrno4(error);
2515                 goto out;
2516         }
2517
2518         /*
2519          * Get "after" change value, if it fails, simply return the
2520          * before value.
2521          */
2522         adva.va_mask = VATTR_CTIME|VATTR_SEQ;
2523         if (fop_getattr(dvp, &adva, 0, cs->cr, NULL)) {
2524                 adva.va_ctime = bdva.va_ctime;
2525                 adva.va_seq = 0;
2526         }
2527
2528         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2529
2530         /*
2531          * The cinfo.atomic = TRUE only if we have
2532          * non-zero va_seq's, and it has incremented by exactly one
2533          * during the fop_link and it didn't change during the fop_fsync.
2534          */
2535         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2536             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2537                 resp->cinfo.atomic = TRUE;
2538         else
2539                 resp->cinfo.atomic = FALSE;
2540
2541         *cs->statusp = resp->status = NFS4_OK;
2542 out:
2543         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2544             LINK4res *, resp);
2545 }
2546
2547 /*
2548  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2549  */
2550
2551 /* ARGSUSED */
2552 static nfsstat4
2553 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2554 {
2555         int error;
2556         int different_export = 0;
2557         vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2558         struct exportinfo *exi = NULL, *pre_exi = NULL;
2559         nfsstat4 stat;
2560         fid_t fid;
2561         int attrdir, dotdot, walk;
2562         bool_t is_newvp = FALSE;
2563
2564         if (cs->vp->v_flag & V_XATTRDIR) {
2565                 attrdir = 1;
2566                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2567         } else {
2568                 attrdir = 0;
2569                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2570         }
2571
2572         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2573
2574         /*
2575          * If dotdotting, then need to check whether it's
2576          * above the root of a filesystem, or above an
2577          * export point.
2578          */
2579         if (dotdot) {
2580
2581                 /*
2582                  * If dotdotting at the root of a filesystem, then
2583                  * need to traverse back to the mounted-on filesystem
2584                  * and do the dotdot lookup there.
2585                  */
2586                 if (cs->vp->v_flag & VROOT) {
2587
2588                         /*
2589                          * If at the system root, then can
2590                          * go up no further.
2591                          */
2592                         if (VN_CMP(cs->vp, rootdir))
2593                                 return (puterrno4(ENOENT));
2594
2595                         /*
2596                          * Traverse back to the mounted-on filesystem
2597                          */
2598                         cs->vp = untraverse(cs->vp);
2599
2600                         /*
2601                          * Set the different_export flag so we remember
2602                          * to pick up a new exportinfo entry for
2603                          * this new filesystem.
2604                          */
2605                         different_export = 1;
2606                 } else {
2607
2608                         /*
2609                          * If dotdotting above an export point then set
2610                          * the different_export to get new export info.
2611                          */
2612                         different_export = nfs_exported(cs->exi, cs->vp);
2613                 }
2614         }
2615
2616         error = fop_lookup(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2617             NULL, NULL, NULL);
2618         if (error)
2619                 return (puterrno4(error));
2620
2621         /*
2622          * If the vnode is in a pseudo filesystem, check whether it is visible.
2623          *
2624          * XXX if the vnode is a symlink and it is not visible in
2625          * a pseudo filesystem, return ENOENT (not following symlink).
2626          * V4 client can not mount such symlink. This is a regression
2627          * from V2/V3.
2628          *
2629          * In the same exported filesystem, if the security flavor used
2630          * is not an explicitly shared flavor, limit the view to the visible
2631          * list entries only. This is not a WRONGSEC case because it's already
2632          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2633          */
2634         if (!different_export &&
2635             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2636             cs->access & CS_ACCESS_LIMITED)) {
2637                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2638                         VN_RELE(vp);
2639                         return (puterrno4(ENOENT));
2640                 }
2641         }
2642
2643         /*
2644          * If it's a mountpoint, then traverse it.
2645          */
2646         if (vn_ismntpt(vp)) {
2647                 pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2648                 pre_tvp = vp;           /* save pre-traversed vnode     */
2649
2650                 /*
2651                  * hold pre_tvp to counteract rele by traverse.  We will
2652                  * need pre_tvp below if checkexport4 fails
2653                  */
2654                 VN_HOLD(pre_tvp);
2655                 if ((error = traverse(&vp)) != 0) {
2656                         VN_RELE(vp);
2657                         VN_RELE(pre_tvp);
2658                         return (puterrno4(error));
2659                 }
2660                 different_export = 1;
2661         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2662                 /*
2663                  * The vfsp comparison is to handle the case where
2664                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2665                  * and NFS is unaware of local fs transistions because
2666                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2667                  * the dir and the obj returned by lookup will have different
2668                  * vfs ptrs.
2669                  */
2670                 different_export = 1;
2671         }
2672
2673         if (different_export) {
2674
2675                 bzero(&fid, sizeof (fid));
2676                 fid.fid_len = MAXFIDSZ;
2677                 error = vop_fid_pseudo(vp, &fid);
2678                 if (error) {
2679                         VN_RELE(vp);
2680                         if (pre_tvp)
2681                                 VN_RELE(pre_tvp);
2682                         return (puterrno4(error));
2683                 }
2684
2685                 if (dotdot)
2686                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2687                 else
2688                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2689
2690                 if (exi == NULL) {
2691                         if (pre_tvp) {
2692                                 /*
2693                                  * If this vnode is a mounted-on vnode,
2694                                  * but the mounted-on file system is not
2695                                  * exported, send back the filehandle for
2696                                  * the mounted-on vnode, not the root of
2697                                  * the mounted-on file system.
2698                                  */
2699                                 VN_RELE(vp);
2700                                 vp = pre_tvp;
2701                                 exi = pre_exi;
2702                         } else {
2703                                 VN_RELE(vp);
2704                                 return (puterrno4(EACCES));
2705                         }
2706                 } else if (pre_tvp) {
2707                         /* we're done with pre_tvp now. release extra hold */
2708                         VN_RELE(pre_tvp);
2709                 }
2710
2711                 cs->exi = exi;
2712
2713                 /*
2714                  * Now we do a checkauth4. The reason is that
2715                  * this client/user may not have access to the new
2716                  * exported file system, and if they do,
2717                  * the client/user may be mapped to a different uid.
2718                  *
2719                  * We start with a new cr, because the checkauth4 done
2720                  * in the PUT*FH operation over wrote the cred's uid,
2721                  * gid, etc, and we want the real thing before calling
2722                  * checkauth4()
2723                  */
2724                 crfree(cs->cr);
2725                 cs->cr = crdup(cs->basecr);
2726
2727                 oldvp = cs->vp;
2728                 cs->vp = vp;
2729                 is_newvp = TRUE;
2730
2731                 stat = call_checkauth4(cs, req);
2732                 if (stat != NFS4_OK) {
2733                         VN_RELE(cs->vp);
2734                         cs->vp = oldvp;
2735                         return (stat);
2736                 }
2737         }
2738
2739         error = makefh4(&cs->fh, vp, cs->exi);
2740
2741 err_out:
2742         if (error) {
2743                 if (is_newvp) {
2744                         VN_RELE(cs->vp);
2745                         cs->vp = oldvp;
2746                 } else
2747                         VN_RELE(vp);
2748                 return (puterrno4(error));
2749         }
2750
2751         if (!is_newvp) {
2752                 if (cs->vp)
2753                         VN_RELE(cs->vp);
2754                 cs->vp = vp;
2755         } else if (oldvp)
2756                 VN_RELE(oldvp);
2757
2758         /*
2759          * if did lookup on attrdir and didn't lookup .., set named
2760          * attr fh flag
2761          */
2762         if (attrdir && ! dotdot)
2763                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2764
2765         /* Assume false for now, open proc will set this */
2766         cs->mandlock = FALSE;
2767
2768         return (NFS4_OK);
2769 }
2770
2771 /* ARGSUSED */
2772 static void
2773 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2774     struct compound_state *cs)
2775 {
2776         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2777         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2778         char *nm;
2779         uint_t len;
2780         struct sockaddr *ca;
2781         char *name = NULL;
2782         nfsstat4 status;
2783
2784         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2785             LOOKUP4args *, args);
2786
2787         if (cs->vp == NULL) {
2788                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2789                 goto out;
2790         }
2791
2792         if (cs->vp->v_type == VLNK) {
2793                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2794                 goto out;
2795         }
2796
2797         if (cs->vp->v_type != VDIR) {
2798                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2799                 goto out;
2800         }
2801
2802         status = utf8_dir_verify(&args->objname);
2803         if (status != NFS4_OK) {
2804                 *cs->statusp = resp->status = status;
2805                 goto out;
2806         }
2807
2808         nm = utf8_to_str(&args->objname, &len, NULL);
2809         if (nm == NULL) {
2810                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2811                 goto out;
2812         }
2813
2814         if (len > MAXNAMELEN) {
2815                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2816                 kmem_free(nm, len);
2817                 goto out;
2818         }
2819
2820         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2821         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2822             MAXPATHLEN  + 1);
2823
2824         if (name == NULL) {
2825                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2826                 kmem_free(nm, len);
2827                 goto out;
2828         }
2829
2830         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2831
2832         if (name != nm)
2833                 kmem_free(name, MAXPATHLEN + 1);
2834         kmem_free(nm, len);
2835
2836 out:
2837         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2838             LOOKUP4res *, resp);
2839 }
2840
2841 /* ARGSUSED */
2842 static void
2843 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2844     struct compound_state *cs)
2845 {
2846         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2847
2848         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2849
2850         if (cs->vp == NULL) {
2851                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2852                 goto out;
2853         }
2854
2855         if (cs->vp->v_type != VDIR) {
2856                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2857                 goto out;
2858         }
2859
2860         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2861
2862         /*
2863          * From NFSV4 Specification, LOOKUPP should not check for
2864          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2865          */
2866         if (resp->status == NFS4ERR_WRONGSEC) {
2867                 *cs->statusp = resp->status = NFS4_OK;
2868         }
2869
2870 out:
2871         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2872             LOOKUPP4res *, resp);
2873 }
2874
2875
2876 /*ARGSUSED2*/
2877 static void
2878 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2879     struct compound_state *cs)
2880 {
2881         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2882         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2883         vnode_t         *avp = NULL;
2884         int             lookup_flags = LOOKUP_XATTR, error;
2885         int             exp_ro = 0;
2886
2887         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
2888             OPENATTR4args *, args);
2889
2890         if (cs->vp == NULL) {
2891                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2892                 goto out;
2893         }
2894
2895         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
2896             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
2897                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
2898                 goto out;
2899         }
2900
2901         /*
2902          * If file system supports passing ACE mask to fop_access then
2903          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
2904          */
2905
2906         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
2907                 error = fop_access(cs->vp, ACE_READ_NAMED_ATTRS,
2908                     V_ACE_MASK, cs->cr, NULL);
2909         else
2910                 error = ((fop_access(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
2911                     (fop_access(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
2912                     (fop_access(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
2913
2914         if (error) {
2915                 *cs->statusp = resp->status = puterrno4(EACCES);
2916                 goto out;
2917         }
2918
2919         /*
2920          * The CREATE_XATTR_DIR VOP flag cannot be specified if
2921          * the file system is exported read-only -- regardless of
2922          * createdir flag.  Otherwise the attrdir would be created
2923          * (assuming server fs isn't mounted readonly locally).  If
2924          * fop_lookup returns ENOENT in this case, the error will
2925          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2926          * because specfs has no fop_lookup op, so the macro would
2927          * return ENOSYS.  EINVAL is returned by all (current)
2928          * Solaris file system implementations when any of their
2929          * restrictions are violated (xattr(dir) can't have xattrdir).
2930          * Returning NOTSUPP is more appropriate in this case
2931          * because the object will never be able to have an attrdir.
2932          */
2933         if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
2934                 lookup_flags |= CREATE_XATTR_DIR;
2935
2936         error = fop_lookup(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
2937             NULL, NULL, NULL);
2938
2939         if (error) {
2940                 if (error == ENOENT && args->createdir && exp_ro)
2941                         *cs->statusp = resp->status = puterrno4(EROFS);
2942                 else if (error == EINVAL || error == ENOSYS)
2943                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
2944                 else
2945                         *cs->statusp = resp->status = puterrno4(error);
2946                 goto out;
2947         }
2948
2949         ASSERT(avp->v_flag & V_XATTRDIR);
2950
2951         error = makefh4(&cs->fh, avp, cs->exi);
2952
2953         if (error) {
2954                 VN_RELE(avp);
2955                 *cs->statusp = resp->status = puterrno4(error);
2956                 goto out;
2957         }
2958
2959         VN_RELE(cs->vp);
2960         cs->vp = avp;
2961
2962         /*
2963          * There is no requirement for an attrdir fh flag
2964          * because the attrdir has a vnode flag to distinguish
2965          * it from regular (non-xattr) directories.  The
2966          * FH4_ATTRDIR flag is set for future sanity checks.
2967          */
2968         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2969         *cs->statusp = resp->status = NFS4_OK;
2970
2971 out:
2972         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
2973             OPENATTR4res *, resp);
2974 }
2975
2976 static int
2977 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
2978     caller_context_t *ct)
2979 {
2980         int error;
2981         int i;
2982         clock_t delaytime;
2983
2984         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2985
2986         /*
2987          * Don't block on mandatory locks. If this routine returns
2988          * EAGAIN, the caller should return NFS4ERR_LOCKED.
2989          */
2990         uio->uio_fmode = FNONBLOCK;
2991
2992         for (i = 0; i < rfs4_maxlock_tries; i++) {
2993
2994
2995                 if (direction == FREAD) {
2996                         (void) fop_rwlock(vp, V_WRITELOCK_FALSE, ct);
2997                         error = fop_read(vp, uio, ioflag, cred, ct);
2998                         fop_rwunlock(vp, V_WRITELOCK_FALSE, ct);
2999                 } else {
3000                         (void) fop_rwlock(vp, V_WRITELOCK_TRUE, ct);
3001                         error = fop_write(vp, uio, ioflag, cred, ct);
3002                         fop_rwunlock(vp, V_WRITELOCK_TRUE, ct);
3003                 }
3004
3005                 if (error != EAGAIN)
3006                         break;
3007
3008                 if (i < rfs4_maxlock_tries - 1) {
3009                         delay(delaytime);
3010                         delaytime *= 2;
3011                 }
3012         }
3013
3014         return (error);
3015 }
3016
3017 /* ARGSUSED */
3018 static void
3019 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3020     struct compound_state *cs)
3021 {
3022         READ4args *args = &argop->nfs_argop4_u.opread;
3023         READ4res *resp = &resop->nfs_resop4_u.opread;
3024         int error;
3025         int verror;
3026         vnode_t *vp;
3027         struct vattr va;
3028         struct iovec iov, *iovp = NULL;
3029         int iovcnt;
3030         struct uio uio;
3031         uoff_t offset;
3032         bool_t *deleg = &cs->deleg;
3033         nfsstat4 stat;
3034         int in_crit = 0;
3035         mblk_t *mp = NULL;
3036         int alloc_err = 0;
3037         int rdma_used = 0;
3038         int loaned_buffers;
3039         caller_context_t ct;
3040         struct uio *uiop;
3041
3042         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3043             READ4args, args);
3044
3045         vp = cs->vp;
3046         if (vp == NULL) {
3047                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3048                 goto out;
3049         }
3050         if (cs->access == CS_ACCESS_DENIED) {
3051                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3052                 goto out;
3053         }
3054
3055         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3056             deleg, TRUE, &ct)) != NFS4_OK) {
3057                 *cs->statusp = resp->status = stat;
3058                 goto out;
3059         }
3060
3061         /*
3062          * Enter the critical region before calling fop_rwlock
3063          * to avoid a deadlock with write requests.
3064          */
3065         if (nbl_need_check(vp)) {
3066                 nbl_start_crit(vp, RW_READER);
3067                 in_crit = 1;
3068                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3069                     &ct)) {
3070                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3071                         goto out;
3072                 }
3073         }
3074
3075         if (args->wlist) {
3076                 if (args->count > clist_len(args->wlist)) {
3077                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3078                         goto out;
3079                 }
3080                 rdma_used = 1;
3081         }
3082
3083         /* use loaned buffers for TCP */
3084         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3085
3086         va.va_mask = VATTR_MODE|VATTR_SIZE|VATTR_UID;
3087         verror = fop_getattr(vp, &va, 0, cs->cr, &ct);
3088
3089         /*
3090          * If we can't get the attributes, then we can't do the
3091          * right access checking.  So, we'll fail the request.
3092          */
3093         if (verror) {
3094                 *cs->statusp = resp->status = puterrno4(verror);
3095                 goto out;
3096         }
3097
3098         if (vp->v_type != VREG) {
3099                 *cs->statusp = resp->status =
3100                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3101                 goto out;
3102         }
3103
3104         if (crgetuid(cs->cr) != va.va_uid &&
3105             (error = fop_access(vp, VREAD, 0, cs->cr, &ct)) &&
3106             (error = fop_access(vp, VEXEC, 0, cs->cr, &ct))) {
3107                 *cs->statusp = resp->status = puterrno4(error);
3108                 goto out;
3109         }
3110
3111         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3112                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3113                 goto out;
3114         }
3115
3116         offset = args->offset;
3117         if (offset >= va.va_size) {
3118                 *cs->statusp = resp->status = NFS4_OK;
3119                 resp->eof = TRUE;
3120                 resp->data_len = 0;
3121                 resp->data_val = NULL;
3122                 resp->mblk = NULL;
3123                 /* RDMA */
3124                 resp->wlist = args->wlist;
3125                 resp->wlist_len = resp->data_len;
3126                 *cs->statusp = resp->status = NFS4_OK;
3127                 if (resp->wlist)
3128                         clist_zero_len(resp->wlist);
3129                 goto out;
3130         }
3131
3132         if (args->count == 0) {
3133                 *cs->statusp = resp->status = NFS4_OK;
3134                 resp->eof = FALSE;
3135                 resp->data_len = 0;
3136                 resp->data_val = NULL;
3137                 resp->mblk = NULL;
3138                 /* RDMA */
3139                 resp->wlist = args->wlist;
3140                 resp->wlist_len = resp->data_len;
3141                 if (resp->wlist)
3142                         clist_zero_len(resp->wlist);
3143                 goto out;
3144         }
3145
3146         /*
3147          * Do not allocate memory more than maximum allowed
3148          * transfer size
3149          */
3150         if (args->count > rfs4_tsize(req))
3151                 args->count = rfs4_tsize(req);
3152
3153         if (loaned_buffers) {
3154                 uiop = (uio_t *)rfs_setup_xuio(vp);
3155                 ASSERT(uiop != NULL);
3156                 uiop->uio_segflg = UIO_SYSSPACE;
3157                 uiop->uio_loffset = args->offset;
3158                 uiop->uio_resid = args->count;
3159
3160                 /* Jump to do the read if successful */
3161                 if (!fop_reqzcbuf(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3162                         /*
3163                          * Need to hold the vnode until after fop_retzcbuf()
3164                          * is called.
3165                          */
3166                         VN_HOLD(vp);
3167                         goto doio_read;
3168                 }
3169
3170                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3171                     uiop->uio_loffset, int, uiop->uio_resid);
3172
3173                 uiop->uio_extflg = 0;
3174
3175                 /* failure to setup for zero copy */
3176                 rfs_free_xuio((void *)uiop);
3177                 loaned_buffers = 0;
3178         }
3179
3180         /*
3181          * If returning data via RDMA Write, then grab the chunk list. If we
3182          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3183          */
3184         if (rdma_used) {
3185                 mp = NULL;
3186                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3187                 uio.uio_iov = &iov;
3188                 uio.uio_iovcnt = 1;
3189         } else {
3190                 /*
3191                  * mp will contain the data to be sent out in the read reply.
3192                  * It will be freed after the reply has been sent.
3193                  */
3194                 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3195                 ASSERT(mp != NULL);
3196                 ASSERT(alloc_err == 0);
3197                 uio.uio_iov = iovp;
3198                 uio.uio_iovcnt = iovcnt;
3199         }
3200
3201         uio.uio_segflg = UIO_SYSSPACE;
3202         uio.uio_extflg = UIO_COPY_CACHED;
3203         uio.uio_loffset = args->offset;
3204         uio.uio_resid = args->count;
3205         uiop = &uio;
3206
3207 doio_read:
3208         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3209
3210         va.va_mask = VATTR_SIZE;
3211         verror = fop_getattr(vp, &va, 0, cs->cr, &ct);
3212
3213         if (error) {
3214                 if (mp)
3215                         freemsg(mp);
3216                 *cs->statusp = resp->status = puterrno4(error);
3217                 goto out;
3218         }
3219
3220         /* make mblk using zc buffers */
3221         if (loaned_buffers) {
3222                 mp = uio_to_mblk(uiop);
3223                 ASSERT(mp != NULL);
3224         }
3225
3226         *cs->statusp = resp->status = NFS4_OK;
3227
3228         ASSERT(uiop->uio_resid >= 0);
3229         resp->data_len = args->count - uiop->uio_resid;
3230         if (mp) {
3231                 resp->data_val = (char *)mp->b_datap->db_base;
3232                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3233         } else {
3234                 resp->data_val = (caddr_t)iov.iov_base;
3235         }
3236
3237         resp->mblk = mp;
3238
3239         if (!verror && offset + resp->data_len == va.va_size)
3240                 resp->eof = TRUE;
3241         else
3242                 resp->eof = FALSE;
3243
3244         if (rdma_used) {
3245                 if (!rdma_setup_read_data4(args, resp)) {
3246                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3247                 }
3248         } else {
3249                 resp->wlist = NULL;
3250         }
3251
3252 out:
3253         if (in_crit)
3254                 nbl_end_crit(vp);
3255
3256         if (iovp != NULL)
3257                 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3258
3259         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3260             READ4res *, resp);
3261 }
3262
3263 static void
3264 rfs4_op_read_free(nfs_resop4 *resop)
3265 {
3266         READ4res        *resp = &resop->nfs_resop4_u.opread;
3267
3268         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3269                 freemsg(resp->mblk);
3270                 resp->mblk = NULL;
3271                 resp->data_val = NULL;
3272                 resp->data_len = 0;
3273         }
3274 }
3275
3276 static void
3277 rfs4_op_readdir_free(nfs_resop4 * resop)
3278 {
3279         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3280
3281         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3282                 freeb(resp->mblk);
3283                 resp->mblk = NULL;
3284                 resp->data_len = 0;
3285         }
3286 }
3287
3288
3289 /* ARGSUSED */
3290 static void
3291 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3292     struct compound_state *cs)
3293 {
3294         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3295         int             error;
3296         vnode_t         *vp;
3297         struct exportinfo *exi, *sav_exi;
3298         nfs_fh4_fmt_t   *fh_fmtp;
3299
3300         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3301
3302         if (cs->vp) {
3303                 VN_RELE(cs->vp);
3304                 cs->vp = NULL;
3305         }
3306
3307         if (cs->cr)
3308                 crfree(cs->cr);
3309
3310         cs->cr = crdup(cs->basecr);
3311
3312         vp = exi_public->exi_vp;
3313         if (vp == NULL) {
3314                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3315                 goto out;
3316         }
3317
3318         error = makefh4(&cs->fh, vp, exi_public);
3319         if (error != 0) {
3320                 *cs->statusp = resp->status = puterrno4(error);
3321                 goto out;
3322         }
3323         sav_exi = cs->exi;
3324         if (exi_public == exi_root) {
3325                 /*
3326                  * No filesystem is actually shared public, so we default
3327                  * to exi_root. In this case, we must check whether root
3328                  * is exported.
3329                  */
3330                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3331
3332                 /*
3333                  * if root filesystem is exported, the exportinfo struct that we
3334                  * should use is what checkexport4 returns, because root_exi is
3335                  * actually a mostly empty struct.
3336                  */
3337                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3338                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3339                 cs->exi = ((exi != NULL) ? exi : exi_public);
3340         } else {
3341                 /*
3342                  * it's a properly shared filesystem
3343                  */
3344                 cs->exi = exi_public;
3345         }
3346
3347         VN_HOLD(vp);
3348         cs->vp = vp;
3349
3350         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3351                 VN_RELE(cs->vp);
3352                 cs->vp = NULL;
3353                 cs->exi = sav_exi;
3354                 goto out;
3355         }
3356
3357         *cs->statusp = resp->status = NFS4_OK;
3358 out:
3359         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3360             PUTPUBFH4res *, resp);
3361 }
3362
3363 /*
3364  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3365  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3366  * or joe have restrictive search permissions, then we shouldn't let
3367  * the client get a file handle. This is easy to enforce. However, we
3368  * don't know what security flavor should be used until we resolve the
3369  * path name. Another complication is uid mapping. If root is
3370  * the user, then it will be mapped to the anonymous user by default,
3371  * but we won't know that till we've resolved the path name. And we won't
3372  * know what the anonymous user is.
3373  * Luckily, SECINFO is specified to take a full filename.
3374  * So what we will have to in rfs4_op_lookup is check that flavor of
3375  * the target object matches that of the request, and if root was the
3376  * caller, check for the root= and anon= options, and if necessary,
3377  * repeat the lookup using the right cred_t. But that's not done yet.
3378  */
3379 /* ARGSUSED */
3380 static void
3381 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3382     struct compound_state *cs)
3383 {
3384         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3385         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3386         nfs_fh4_fmt_t *fh_fmtp;
3387
3388         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3389             PUTFH4args *, args);
3390
3391         if (cs->vp) {
3392                 VN_RELE(cs->vp);
3393                 cs->vp = NULL;
3394         }
3395
3396         if (cs->cr) {
3397                 crfree(cs->cr);
3398                 cs->cr = NULL;
3399         }
3400
3401
3402         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3403                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3404                 goto out;
3405         }
3406
3407         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3408         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3409             NULL);
3410
3411         if (cs->exi == NULL) {
3412                 *cs->statusp = resp->status = NFS4ERR_STALE;
3413                 goto out;
3414         }
3415
3416         cs->cr = crdup(cs->basecr);
3417
3418         ASSERT(cs->cr != NULL);
3419
3420         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3421                 *cs->statusp = resp->status;
3422                 goto out;
3423         }
3424
3425         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3426                 VN_RELE(cs->vp);
3427                 cs->vp = NULL;
3428                 goto out;
3429         }
3430
3431         nfs_fh4_copy(&args->object, &cs->fh);
3432         *cs->statusp = resp->status = NFS4_OK;
3433         cs->deleg = FALSE;
3434
3435 out:
3436         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3437             PUTFH4res *, resp);
3438 }
3439
3440 /* ARGSUSED */
3441 static void
3442 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3443     struct compound_state *cs)
3444 {
3445         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3446         int error;
3447         fid_t fid;
3448         struct exportinfo *exi, *sav_exi;
3449
3450         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3451
3452         if (cs->vp) {
3453                 VN_RELE(cs->vp);
3454                 cs->vp = NULL;
3455         }
3456
3457         if (cs->cr)
3458                 crfree(cs->cr);
3459
3460         cs->cr = crdup(cs->basecr);
3461
3462         /*
3463          * Using rootdir, the system root vnode,
3464          * get its fid.
3465          */
3466         bzero(&fid, sizeof (fid));
3467         fid.fid_len = MAXFIDSZ;
3468         error = vop_fid_pseudo(rootdir, &fid);
3469         if (error != 0) {
3470                 *cs->statusp = resp->status = puterrno4(error);
3471                 goto out;
3472         }
3473
3474         /*
3475          * Then use the root fsid & fid it to find out if it's exported
3476          *
3477          * If the server root isn't exported directly, then
3478          * it should at least be a pseudo export based on
3479          * one or more exports further down in the server's
3480          * file tree.
3481          */
3482         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3483         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3484                 NFS4_DEBUG(rfs4_debug,
3485                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3486                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3487                 goto out;
3488         }
3489
3490         /*
3491          * Now make a filehandle based on the root
3492          * export and root vnode.
3493          */
3494         error = makefh4(&cs->fh, rootdir, exi);
3495         if (error != 0) {
3496                 *cs->statusp = resp->status = puterrno4(error);
3497                 goto out;
3498         }
3499
3500         sav_exi = cs->exi;
3501         cs->exi = exi;
3502
3503         VN_HOLD(rootdir);
3504         cs->vp = rootdir;
3505
3506         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3507                 VN_RELE(rootdir);
3508                 cs->vp = NULL;
3509                 cs->exi = sav_exi;
3510                 goto out;
3511         }
3512
3513         *cs->statusp = resp->status = NFS4_OK;
3514         cs->deleg = FALSE;
3515 out:
3516         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3517             PUTROOTFH4res *, resp);
3518 }
3519
3520 /*
3521  * readlink: args: CURRENT_FH.
3522  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3523  */
3524
3525 /* ARGSUSED */
3526 static void
3527 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3528     struct compound_state *cs)
3529 {
3530         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3531         int error;
3532         vnode_t *vp;
3533         struct iovec iov;
3534         struct vattr va;
3535         struct uio uio;
3536         char *data;
3537         struct sockaddr *ca;
3538         char *name = NULL;
3539         int is_referral;
3540
3541         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3542
3543         /* CURRENT_FH: directory */
3544         vp = cs->vp;
3545         if (vp == NULL) {
3546                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3547                 goto out;
3548         }
3549
3550         if (cs->access == CS_ACCESS_DENIED) {
3551                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3552                 goto out;
3553         }
3554
3555         /* Is it a referral? */
3556         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3557
3558                 is_referral = 1;
3559
3560         } else {
3561
3562                 is_referral = 0;
3563
3564                 if (vp->v_type == VDIR) {
3565                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3566                         goto out;
3567                 }
3568
3569                 if (vp->v_type != VLNK) {
3570                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3571                         goto out;
3572                 }
3573
3574         }
3575
3576         va.va_mask = VATTR_MODE;
3577         error = fop_getattr(vp, &va, 0, cs->cr, NULL);
3578         if (error) {
3579                 *cs->statusp = resp->status = puterrno4(error);
3580                 goto out;
3581         }
3582
3583         if (MANDLOCK(vp, va.va_mode)) {
3584                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3585                 goto out;
3586         }
3587
3588         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3589
3590         if (is_referral) {
3591                 char *s;
3592                 size_t strsz;
3593
3594                 /* Get an artificial symlink based on a referral */
3595                 s = build_symlink(vp, cs->cr, &strsz);
3596                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3597                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3598                     vnode_t *, vp, char *, s);
3599                 if (s == NULL)
3600                         error = EINVAL;
3601                 else {
3602                         error = 0;
3603                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3604                         kmem_free(s, strsz);
3605                 }
3606
3607         } else {
3608
3609                 iov.iov_base = data;
3610                 iov.iov_len = MAXPATHLEN;
3611                 uio.uio_iov = &iov;
3612                 uio.uio_iovcnt = 1;
3613                 uio.uio_segflg = UIO_SYSSPACE;
3614                 uio.uio_extflg = UIO_COPY_CACHED;
3615                 uio.uio_loffset = 0;
3616                 uio.uio_resid = MAXPATHLEN;
3617
3618                 error = fop_readlink(vp, &uio, cs->cr, NULL);
3619
3620                 if (!error)
3621                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3622         }
3623
3624         if (error) {
3625                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3626                 *cs->statusp = resp->status = puterrno4(error);
3627                 goto out;
3628         }
3629
3630         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3631         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3632             MAXPATHLEN  + 1);
3633
3634         if (name == NULL) {
3635                 /*
3636                  * Even though the conversion failed, we return
3637                  * something. We just don't translate it.
3638                  */
3639                 name = data;
3640         }
3641
3642         /*
3643          * treat link name as data
3644          */
3645         (void) str_to_utf8(name, (utf8string *)&resp->link);
3646
3647         if (name != data)
3648                 kmem_free(name, MAXPATHLEN + 1);
3649         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3650         *cs->statusp = resp->status = NFS4_OK;
3651
3652 out:
3653         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3654             READLINK4res *, resp);
3655 }
3656
3657 static void
3658 rfs4_op_readlink_free(nfs_resop4 *resop)
3659 {
3660         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3661         utf8string *symlink = (utf8string *)&resp->link;
3662
3663         if (symlink->utf8string_val) {
3664                 UTF8STRING_FREE(*symlink)
3665         }
3666 }
3667
3668 /*
3669  * release_lockowner:
3670  *      Release any state associated with the supplied
3671  *      lockowner. Note if any lo_state is holding locks we will not
3672  *      rele that lo_state and thus the lockowner will not be destroyed.
3673  *      A client using lock after the lock owner stateid has been released
3674  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3675  *      to reissue the lock with new_lock_owner set to TRUE.
3676  *      args: lock_owner
3677  *      res:  status
3678  */
3679 /* ARGSUSED */
3680 static void
3681 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3682     struct svc_req *req, struct compound_state *cs)
3683 {
3684         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3685         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3686         rfs4_lockowner_t *lo;
3687         rfs4_openowner_t *oo;
3688         rfs4_state_t *sp;
3689         rfs4_lo_state_t *lsp;
3690         rfs4_client_t *cp;
3691         bool_t create = FALSE;
3692         locklist_t *llist;
3693         sysid_t sysid;
3694
3695         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3696             cs, RELEASE_LOCKOWNER4args *, ap);
3697
3698         /* Make sure there is a clientid around for this request */
3699         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3700
3701         if (cp == NULL) {
3702                 *cs->statusp = resp->status =
3703                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3704                 goto out;
3705         }
3706         rfs4_client_rele(cp);
3707
3708         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3709         if (lo == NULL) {
3710                 *cs->statusp = resp->status = NFS4_OK;
3711                 goto out;
3712         }
3713         ASSERT(lo->rl_client != NULL);
3714
3715         /*
3716          * Check for EXPIRED client. If so will reap state with in a lease
3717          * period or on next set_clientid_confirm step
3718          */
3719         if (rfs4_lease_expired(lo->rl_client)) {
3720                 rfs4_lockowner_rele(lo);
3721                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3722                 goto out;
3723         }
3724
3725         /*
3726          * If no sysid has been assigned, then no locks exist; just return.
3727          */
3728         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3729         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3730                 rfs4_lockowner_rele(lo);
3731                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3732                 goto out;
3733         }
3734
3735         sysid = lo->rl_client->rc_sysidt;
3736         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3737
3738         /*
3739          * Mark the lockowner invalid.
3740          */
3741         rfs4_dbe_hide(lo->rl_dbe);
3742
3743         /*
3744          * sysid-pid pair should now not be used since the lockowner is
3745          * invalid. If the client were to instantiate the lockowner again
3746          * it would be assigned a new pid. Thus we can get the list of
3747          * current locks.
3748          */
3749
3750         llist = flk_get_active_locks(sysid, lo->rl_pid);
3751         /* If we are still holding locks fail */
3752         if (llist != NULL) {
3753
3754                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3755
3756                 flk_free_locklist(llist);
3757                 /*
3758                  * We need to unhide the lockowner so the client can
3759                  * try it again. The bad thing here is if the client
3760                  * has a logic error that took it here in the first place
3761                  * they probably have lost accounting of the locks that it
3762                  * is holding. So we may have dangling state until the
3763                  * open owner state is reaped via close. One scenario
3764                  * that could possibly occur is that the client has
3765                  * sent the unlock request(s) in separate threads
3766                  * and has not waited for the replies before sending the
3767                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3768                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3769                  * reissuing the request.
3770                  */
3771                 rfs4_dbe_unhide(lo->rl_dbe);
3772                 rfs4_lockowner_rele(lo);
3773                 goto out;
3774         }
3775
3776         /*
3777          * For the corresponding client we need to check each open
3778          * owner for any opens that have lockowner state associated
3779          * with this lockowner.
3780          */
3781
3782         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3783         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3784             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3785
3786                 rfs4_dbe_lock(oo->ro_dbe);
3787                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3788                     sp = list_next(&oo->ro_statelist, sp)) {
3789
3790                         rfs4_dbe_lock(sp->rs_dbe);
3791                         for (lsp = list_head(&sp->rs_lostatelist);
3792                             lsp != NULL;
3793                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
3794                                 if (lsp->rls_locker == lo) {
3795                                         rfs4_dbe_lock(lsp->rls_dbe);
3796                                         rfs4_dbe_invalidate(lsp->rls_dbe);
3797                                         rfs4_dbe_unlock(lsp->rls_dbe);
3798                                 }
3799                         }
3800                         rfs4_dbe_unlock(sp->rs_dbe);
3801                 }
3802                 rfs4_dbe_unlock(oo->ro_dbe);
3803         }
3804         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3805
3806         rfs4_lockowner_rele(lo);
3807
3808         *cs->statusp = resp->status = NFS4_OK;
3809
3810 out:
3811         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
3812             cs, RELEASE_LOCKOWNER4res *, resp);
3813 }
3814
3815 /*
3816  * short utility function to lookup a file and recall the delegation
3817  */
3818 static rfs4_file_t *
3819 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3820     int *lkup_error, cred_t *cr)
3821 {
3822         vnode_t *vp;
3823         rfs4_file_t *fp = NULL;
3824         bool_t fcreate = FALSE;
3825         int error;
3826
3827         if (vpp)
3828                 *vpp = NULL;
3829
3830         if ((error = fop_lookup(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
3831             NULL)) == 0) {
3832                 if (vp->v_type == VREG)
3833                         fp = rfs4_findfile(vp, NULL, &fcreate);
3834                 if (vpp)
3835                         *vpp = vp;
3836                 else
3837                         VN_RELE(vp);
3838         }
3839
3840         if (lkup_error)
3841                 *lkup_error = error;
3842
3843         return (fp);
3844 }
3845
3846 /*
3847  * remove: args: CURRENT_FH: directory; name.
3848  *      res: status. If success - CURRENT_FH unchanged, return change_info
3849  *              for directory.
3850  */
3851 /* ARGSUSED */
3852 static void
3853 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3854     struct compound_state *cs)
3855 {
3856         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3857         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3858         int error;
3859         vnode_t *dvp, *vp;
3860         struct vattr bdva, idva, adva;
3861         char *nm;
3862         uint_t len;
3863         rfs4_file_t *fp;
3864         int in_crit = 0;
3865         struct sockaddr *ca;
3866         char *name = NULL;
3867         nfsstat4 status;
3868
3869         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
3870             REMOVE4args *, args);
3871
3872         /* CURRENT_FH: directory */
3873         dvp = cs->vp;
3874         if (dvp == NULL) {
3875                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3876                 goto out;
3877         }
3878
3879         if (cs->access == CS_ACCESS_DENIED) {
3880                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3881                 goto out;
3882         }
3883
3884         /*
3885          * If there is an unshared filesystem mounted on this vnode,
3886          * Do not allow to remove anything in this directory.
3887          */
3888         if (vn_ismntpt(dvp)) {
3889                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3890                 goto out;
3891         }
3892
3893         if (dvp->v_type != VDIR) {
3894                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3895                 goto out;
3896         }
3897
3898         status = utf8_dir_verify(&args->target);
3899         if (status != NFS4_OK) {
3900                 *cs->statusp = resp->status = status;
3901                 goto out;
3902         }
3903
3904         /*
3905          * Lookup the file so that we can check if it's a directory
3906          */
3907         nm = utf8_to_fn(&args->target, &len, NULL);
3908         if (nm == NULL) {
3909                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3910                 goto out;
3911         }
3912
3913         if (len > MAXNAMELEN) {
3914                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3915                 kmem_free(nm, len);
3916                 goto out;
3917         }
3918
3919         if (rdonly4(req, cs)) {
3920                 *cs->statusp = resp->status = NFS4ERR_ROFS;
3921                 kmem_free(nm, len);
3922                 goto out;
3923         }
3924
3925         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3926         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3927             MAXPATHLEN  + 1);
3928
3929         if (name == NULL) {
3930                 *cs->statusp = resp->status = NFS4ERR_INVAL;
3931                 kmem_free(nm, len);
3932                 goto out;
3933         }
3934
3935         /*
3936          * Lookup the file to determine type and while we are see if
3937          * there is a file struct around and check for delegation.
3938          * We don't need to acquire va_seq before this lookup, if
3939          * it causes an update, cinfo.before will not match, which will
3940          * trigger a cache flush even if atomic is TRUE.
3941          */
3942         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
3943                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3944                     NULL)) {
3945                         VN_RELE(vp);
3946                         rfs4_file_rele(fp);
3947                         *cs->statusp = resp->status = NFS4ERR_DELAY;
3948                         if (nm != name)
3949                                 kmem_free(name, MAXPATHLEN + 1);
3950                         kmem_free(nm, len);
3951                         goto out;
3952                 }
3953         }
3954
3955         /* Didn't find anything to remove */
3956         if (vp == NULL) {
3957                 *cs->statusp = resp->status = error;
3958                 if (nm != name)
3959                         kmem_free(name, MAXPATHLEN + 1);
3960                 kmem_free(nm, len);
3961                 goto out;
3962         }
3963
3964         if (nbl_need_check(vp)) {
3965                 nbl_start_crit(vp, RW_READER);
3966                 in_crit = 1;
3967                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
3968                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3969                         if (nm != name)
3970                                 kmem_free(name, MAXPATHLEN + 1);
3971                         kmem_free(nm, len);
3972                         nbl_end_crit(vp);
3973                         VN_RELE(vp);
3974                         if (fp) {
3975                                 rfs4_clear_dont_grant(fp);
3976                                 rfs4_file_rele(fp);
3977                         }
3978                         goto out;
3979                 }
3980         }
3981
3982         /* Get dir "before" change value */
3983         bdva.va_mask = VATTR_CTIME|VATTR_SEQ;
3984         error = fop_getattr(dvp, &bdva, 0, cs->cr, NULL);
3985         if (error) {
3986                 *cs->statusp = resp->status = puterrno4(error);
3987                 if (nm != name)
3988                         kmem_free(name, MAXPATHLEN + 1);
3989                 kmem_free(nm, len);
3990                 if (in_crit)
3991                         nbl_end_crit(vp);
3992                 VN_RELE(vp);
3993                 if (fp) {
3994                         rfs4_clear_dont_grant(fp);
3995                         rfs4_file_rele(fp);
3996                 }
3997                 goto out;
3998         }
3999         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4000
4001         /* Actually do the REMOVE operation */
4002         if (vp->v_type == VDIR) {
4003                 /*
4004                  * Can't remove a directory that has a mounted-on filesystem.
4005                  */
4006                 if (vn_ismntpt(vp)) {
4007                         error = EACCES;
4008                 } else {
4009                         /*
4010                          * System V defines rmdir to return EEXIST,
4011                          * not ENOTEMPTY, if the directory is not
4012                          * empty.  A System V NFS server needs to map
4013                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4014                          * transmit over the wire.
4015                          */
4016                         if ((error = fop_rmdir(dvp, name, rootdir, cs->cr,
4017                             NULL, 0)) == EEXIST)
4018                                 error = ENOTEMPTY;
4019                 }
4020         } else {
4021                 if ((error = fop_remove(dvp, name, cs->cr, NULL, 0)) == 0 &&
4022                     fp != NULL) {
4023                         struct vattr va;
4024                         vnode_t *tvp;
4025
4026                         rfs4_dbe_lock(fp->rf_dbe);
4027                         tvp = fp->rf_vp;
4028                         if (tvp)
4029                                 VN_HOLD(tvp);
4030                         rfs4_dbe_unlock(fp->rf_dbe);
4031
4032                         if (tvp) {
4033                                 /*
4034                                  * This is va_seq safe because we are not
4035                                  * manipulating dvp.
4036                                  */
4037                                 va.va_mask = VATTR_NLINK;
4038                                 if (!fop_getattr(tvp, &va, 0, cs->cr, NULL) &&
4039                                     va.va_nlink == 0) {
4040                                         /* Remove state on file remove */
4041                                         if (in_crit) {
4042                                                 nbl_end_crit(vp);
4043                                                 in_crit = 0;
4044                                         }
4045                                         rfs4_close_all_state(fp);
4046                                 }
4047                                 VN_RELE(tvp);
4048                         }
4049                 }
4050         }
4051
4052         if (in_crit)
4053                 nbl_end_crit(vp);
4054         VN_RELE(vp);
4055
4056         if (fp) {
4057                 rfs4_clear_dont_grant(fp);
4058                 rfs4_file_rele(fp);
4059         }
4060         if (nm != name)
4061                 kmem_free(name, MAXPATHLEN + 1);
4062         kmem_free(nm, len);
4063
4064         if (error) {
4065                 *cs->statusp = resp->status = puterrno4(error);
4066                 goto out;
4067         }
4068
4069         /*
4070          * Get the initial "after" sequence number, if it fails, set to zero
4071          */
4072         idva.va_mask = VATTR_SEQ;
4073         if (fop_getattr(dvp, &idva, 0, cs->cr, NULL))
4074                 idva.va_seq = 0;
4075
4076         /*
4077          * Force modified data and metadata out to stable storage.
4078          */
4079         (void) fop_fsync(dvp, 0, cs->cr, NULL);
4080
4081         /*
4082          * Get "after" change value, if it fails, simply return the
4083          * before value.
4084          */
4085         adva.va_mask = VATTR_CTIME|VATTR_SEQ;
4086         if (fop_getattr(dvp, &adva, 0, cs->cr, NULL)) {
4087                 adva.va_ctime = bdva.va_ctime;
4088                 adva.va_seq = 0;
4089         }
4090
4091         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4092
4093         /*
4094          * The cinfo.atomic = TRUE only if we have
4095          * non-zero va_seq's, and it has incremented by exactly one
4096          * during the fop_remove/RMDIR and it didn't change during
4097          * the fop_fsync.
4098          */
4099         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4100             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4101                 resp->cinfo.atomic = TRUE;
4102         else
4103                 resp->cinfo.atomic = FALSE;
4104
4105         *cs->statusp = resp->status = NFS4_OK;
4106
4107 out:
4108         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4109             REMOVE4res *, resp);
4110 }
4111
4112 /*
4113  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4114  *              oldname and newname.
4115  *      res: status. If success - CURRENT_FH unchanged, return change_info
4116  *              for both from and target directories.
4117  */
4118 /* ARGSUSED */
4119 static void
4120 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4121     struct compound_state *cs)
4122 {
4123         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4124         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4125         int error;
4126         vnode_t *odvp;
4127         vnode_t *ndvp;
4128         vnode_t *srcvp, *targvp;
4129         struct vattr obdva, oidva, oadva;
4130         struct vattr nbdva, nidva, nadva;
4131         char *onm, *nnm;
4132         uint_t olen, nlen;
4133         rfs4_file_t *fp, *sfp;
4134         int in_crit_src, in_crit_targ;
4135         int fp_rele_grant_hold, sfp_rele_grant_hold;
4136         struct sockaddr *ca;
4137         char *converted_onm = NULL;
4138         char *converted_nnm = NULL;
4139         nfsstat4 status;
4140
4141         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4142             RENAME4args *, args);
4143
4144         fp = sfp = NULL;
4145         srcvp = targvp = NULL;
4146         in_crit_src = in_crit_targ = 0;
4147         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4148
4149         /* CURRENT_FH: target directory */
4150         ndvp = cs->vp;
4151         if (ndvp == NULL) {
4152                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4153                 goto out;
4154         }
4155
4156         /* SAVED_FH: from directory */
4157         odvp = cs->saved_vp;
4158         if (odvp == NULL) {
4159                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4160                 goto out;
4161         }
4162
4163         if (cs->access == CS_ACCESS_DENIED) {
4164                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4165                 goto out;
4166         }
4167
4168         /*
4169          * If there is an unshared filesystem mounted on this vnode,
4170          * do not allow to rename objects in this directory.
4171          */
4172         if (vn_ismntpt(odvp)) {
4173                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4174                 goto out;
4175         }
4176
4177         /*
4178          * If there is an unshared filesystem mounted on this vnode,
4179          * do not allow to rename to this directory.
4180          */
4181         if (vn_ismntpt(ndvp)) {
4182                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4183                 goto out;
4184         }
4185
4186         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4187                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4188                 goto out;
4189         }
4190
4191         if (cs->saved_exi != cs->exi) {
4192                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4193                 goto out;
4194         }
4195
4196         status = utf8_dir_verify(&args->oldname);
4197         if (status != NFS4_OK) {
4198                 *cs->statusp = resp->status = status;
4199                 goto out;
4200         }
4201
4202         status = utf8_dir_verify(&args->newname);
4203         if (status != NFS4_OK) {
4204                 *cs->statusp = resp->status = status;
4205                 goto out;
4206         }
4207
4208         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4209         if (onm == NULL) {
4210                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4211                 goto out;
4212         }
4213         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4214         nlen = MAXPATHLEN + 1;
4215         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4216             nlen);
4217
4218         if (converted_onm == NULL) {
4219                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4220                 kmem_free(onm, olen);
4221                 goto out;
4222         }
4223
4224         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4225         if (nnm == NULL) {
4226                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4227                 if (onm != converted_onm)
4228                         kmem_free(converted_onm, MAXPATHLEN + 1);
4229                 kmem_free(onm, olen);
4230                 goto out;
4231         }
4232         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4233             MAXPATHLEN  + 1);
4234
4235         if (converted_nnm == NULL) {
4236                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4237                 kmem_free(nnm, nlen);
4238                 nnm = NULL;
4239                 if (onm != converted_onm)
4240                         kmem_free(converted_onm, MAXPATHLEN + 1);
4241                 kmem_free(onm, olen);
4242                 goto out;
4243         }
4244
4245
4246         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4247                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4248                 kmem_free(onm, olen);
4249                 kmem_free(nnm, nlen);
4250                 goto out;
4251         }
4252
4253
4254         if (rdonly4(req, cs)) {
4255                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4256                 if (onm != converted_onm)
4257                         kmem_free(converted_onm, MAXPATHLEN + 1);
4258                 kmem_free(onm, olen);
4259                 if (nnm != converted_nnm)
4260                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4261                 kmem_free(nnm, nlen);
4262                 goto out;
4263         }
4264
4265         /*
4266          * Is the source a file and have a delegation?
4267          * We don't need to acquire va_seq before these lookups, if
4268          * it causes an update, cinfo.before will not match, which will
4269          * trigger a cache flush even if atomic is TRUE.
4270          */
4271         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4272             &error, cs->cr)) {
4273                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4274                     NULL)) {
4275                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4276                         goto err_out;
4277                 }
4278         }
4279
4280         if (srcvp == NULL) {
4281                 *cs->statusp = resp->status = puterrno4(error);
4282                 if (onm != converted_onm)
4283                         kmem_free(converted_onm, MAXPATHLEN + 1);
4284                 kmem_free(onm, olen);
4285                 if (nnm != converted_nnm)
4286                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4287                 kmem_free(nnm, nlen);
4288                 goto out;
4289         }
4290
4291         sfp_rele_grant_hold = 1;
4292
4293         /* Does the destination exist and a file and have a delegation? */
4294         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4295             NULL, cs->cr)) {
4296                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4297                     NULL)) {
4298                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4299                         goto err_out;
4300                 }
4301         }
4302         fp_rele_grant_hold = 1;
4303
4304
4305         /* Check for NBMAND lock on both source and target */
4306         if (nbl_need_check(srcvp)) {
4307                 nbl_start_crit(srcvp, RW_READER);
4308                 in_crit_src = 1;
4309                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4310                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4311                         goto err_out;
4312                 }
4313         }
4314
4315         if (targvp && nbl_need_check(targvp)) {
4316                 nbl_start_crit(targvp, RW_READER);
4317                 in_crit_targ = 1;
4318                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4319                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4320                         goto err_out;
4321                 }
4322         }
4323
4324         /* Get source "before" change value */
4325         obdva.va_mask = VATTR_CTIME|VATTR_SEQ;
4326         error = fop_getattr(odvp, &obdva, 0, cs->cr, NULL);
4327         if (!error) {
4328                 nbdva.va_mask = VATTR_CTIME|VATTR_SEQ;
4329                 error = fop_getattr(ndvp, &nbdva, 0, cs->cr, NULL);
4330         }
4331         if (error) {
4332                 *cs->statusp = resp->status = puterrno4(error);
4333                 goto err_out;
4334         }
4335
4336         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4337         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4338
4339         if ((error = fop_rename(odvp, converted_onm, ndvp, converted_nnm,
4340             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4341                 struct vattr va;
4342                 vnode_t *tvp;
4343
4344                 rfs4_dbe_lock(fp->rf_dbe);
4345                 tvp = fp->rf_vp;
4346                 if (tvp)
4347                         VN_HOLD(tvp);
4348                 rfs4_dbe_unlock(fp->rf_dbe);
4349
4350                 if (tvp) {
4351                         va.va_mask = VATTR_NLINK;
4352                         if (!fop_getattr(tvp, &va, 0, cs->cr, NULL) &&
4353                             va.va_nlink == 0) {
4354                                 /* The file is gone and so should the state */
4355                                 if (in_crit_targ) {
4356                                         nbl_end_crit(targvp);
4357                                         in_crit_targ = 0;
4358                                 }
4359                                 rfs4_close_all_state(fp);
4360                         }
4361                         VN_RELE(tvp);
4362                 }
4363         }
4364         if (error == 0)
4365                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4366
4367         if (in_crit_src)
4368                 nbl_end_crit(srcvp);
4369         if (srcvp)
4370                 VN_RELE(srcvp);
4371         if (in_crit_targ)
4372                 nbl_end_crit(targvp);
4373         if (targvp)
4374                 VN_RELE(targvp);
4375
4376         if (sfp) {
4377                 rfs4_clear_dont_grant(sfp);
4378                 rfs4_file_rele(sfp);
4379         }
4380         if (fp) {
4381                 rfs4_clear_dont_grant(fp);
4382                 rfs4_file_rele(fp);
4383         }
4384
4385         if (converted_onm != onm)
4386                 kmem_free(converted_onm, MAXPATHLEN + 1);
4387         kmem_free(onm, olen);
4388         if (converted_nnm != nnm)
4389                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4390         kmem_free(nnm, nlen);
4391
4392         /*
4393          * Get the initial "after" sequence number, if it fails, set to zero
4394          */
4395         oidva.va_mask = VATTR_SEQ;
4396         if (fop_getattr(odvp, &oidva, 0, cs->cr, NULL))
4397                 oidva.va_seq = 0;
4398
4399         nidva.va_mask = VATTR_SEQ;
4400         if (fop_getattr(ndvp, &nidva, 0, cs->cr, NULL))
4401                 nidva.va_seq = 0;
4402
4403         /*
4404          * Force modified data and metadata out to stable storage.
4405          */
4406         (void) fop_fsync(odvp, 0, cs->cr, NULL);
4407         (void) fop_fsync(ndvp, 0, cs->cr, NULL);
4408
4409         if (error) {
4410                 *cs->statusp = resp->status = puterrno4(error);
4411                 goto out;
4412         }
4413
4414         /*
4415          * Get "after" change values, if it fails, simply return the
4416          * before value.
4417          */
4418         oadva.va_mask = VATTR_CTIME|VATTR_SEQ;
4419         if (fop_getattr(odvp, &oadva, 0, cs->cr, NULL)) {
4420                 oadva.va_ctime = obdva.va_ctime;
4421                 oadva.va_seq = 0;
4422         }
4423
4424         nadva.va_mask = VATTR_CTIME|VATTR_SEQ;
4425         if (fop_getattr(odvp, &nadva, 0, cs->cr, NULL)) {
4426                 nadva.va_ctime = nbdva.va_ctime;
4427                 nadva.va_seq = 0;
4428         }
4429
4430         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4431         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4432
4433         /*
4434          * The cinfo.atomic = TRUE only if we have
4435          * non-zero va_seq's, and it has incremented by exactly one
4436          * during the fop_rename and it didn't change during the fop_fsync.
4437          */
4438         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4439             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4440                 resp->source_cinfo.atomic = TRUE;
4441         else
4442                 resp->source_cinfo.atomic = FALSE;
4443
4444         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4445             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4446                 resp->target_cinfo.atomic = TRUE;
4447         else
4448                 resp->target_cinfo.atomic = FALSE;
4449
4450 #ifdef  VOLATILE_FH_TEST
4451         {
4452         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4453
4454         /*
4455          * Add the renamed file handle to the volatile rename list
4456          */
4457         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4458                 /* file handles may expire on rename */
4459                 vnode_t *vp;
4460
4461                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4462                 /*
4463                  * Already know that nnm will be a valid string
4464                  */
4465                 error = fop_lookup(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4466                     NULL, NULL, NULL);
4467                 kmem_free(nnm, nlen);
4468                 if (!error) {
4469                         add_volrnm_fh(cs->exi, vp);
4470                         VN_RELE(vp);
4471                 }
4472         }
4473         }
4474 #endif  /* VOLATILE_FH_TEST */
4475
4476         *cs->statusp = resp->status = NFS4_OK;
4477 out:
4478         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4479             RENAME4res *, resp);
4480         return;
4481
4482 err_out:
4483         if (onm != converted_onm)
4484                 kmem_free(converted_onm, MAXPATHLEN + 1);
4485         if (onm != NULL)
4486                 kmem_free(onm, olen);
4487         if (nnm != converted_nnm)
4488                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4489         if (nnm != NULL)
4490                 kmem_free(nnm, nlen);
4491
4492         if (in_crit_src) nbl_end_crit(srcvp);
4493         if (in_crit_targ) nbl_end_crit(targvp);
4494         if (targvp) VN_RELE(targvp);
4495         if (srcvp) VN_RELE(srcvp);
4496         if (sfp) {
4497                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4498                 rfs4_file_rele(sfp);
4499         }
4500         if (fp) {
4501                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4502                 rfs4_file_rele(fp);
4503         }
4504
4505         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4506             RENAME4res *, resp);
4507 }
4508
4509 /* ARGSUSED */
4510 static void
4511 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4512     struct compound_state *cs)
4513 {
4514         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4515         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4516         rfs4_client_t *cp;
4517
4518         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4519             RENEW4args *, args);
4520
4521         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4522                 *cs->statusp = resp->status =
4523                     rfs4_check_clientid(&args->clientid, 0);
4524                 goto out;
4525         }
4526
4527         if (rfs4_lease_expired(cp)) {
4528                 rfs4_client_rele(cp);
4529                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4530                 goto out;
4531         }
4532
4533         rfs4_update_lease(cp);
4534
4535         mutex_enter(cp->rc_cbinfo.cb_lock);
4536         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4537                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4538                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4539         } else {
4540                 *cs->statusp = resp->status = NFS4_OK;
4541         }
4542         mutex_exit(cp->rc_cbinfo.cb_lock);
4543
4544         rfs4_client_rele(cp);
4545
4546 out:
4547         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4548             RENEW4res *, resp);
4549 }
4550
4551 /* ARGSUSED */
4552 static void
4553 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4554     struct compound_state *cs)
4555 {
4556         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4557
4558         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4559
4560         /* No need to check cs->access - we are not accessing any object */
4561         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4562                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4563                 goto out;
4564         }
4565         if (cs->vp != NULL) {
4566                 VN_RELE(cs->vp);
4567         }
4568         cs->vp = cs->saved_vp;
4569         cs->saved_vp = NULL;
4570         cs->exi = cs->saved_exi;
4571         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4572         *cs->statusp = resp->status = NFS4_OK;
4573         cs->deleg = FALSE;
4574
4575 out:
4576         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4577             RESTOREFH4res *, resp);
4578 }
4579
4580 /* ARGSUSED */
4581 static void
4582 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4583     struct compound_state *cs)
4584 {
4585         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4586
4587         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4588
4589         /* No need to check cs->access - we are not accessing any object */
4590         if (cs->vp == NULL) {
4591                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4592                 goto out;
4593         }
4594         if (cs->saved_vp != NULL) {
4595                 VN_RELE(cs->saved_vp);
4596         }
4597         cs->saved_vp = cs->vp;
4598         VN_HOLD(cs->saved_vp);
4599         cs->saved_exi = cs->exi;
4600         /*
4601          * since SAVEFH is fairly rare, don't alloc space for its fh
4602          * unless necessary.
4603          */
4604         if (cs->saved_fh.nfs_fh4_val == NULL) {
4605                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4606         }
4607         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4608         *cs->statusp = resp->status = NFS4_OK;
4609
4610 out:
4611         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4612             SAVEFH4res *, resp);
4613 }
4614
4615 /*
4616  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4617  * return the bitmap of attrs that were set successfully. It is also
4618  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4619  * always be called only after rfs4_do_set_attrs().
4620  *
4621  * Verify that the attributes are same as the expected ones. sargp->vap
4622  * and sargp->sbp contain the input attributes as translated from fattr4.
4623  *
4624  * This function verifies only the attrs that correspond to a vattr or
4625  * vfsstat struct. That is because of the extra step needed to get the
4626  * corresponding system structs. Other attributes have already been set or
4627  * verified by do_rfs4_set_attrs.
4628  *
4629  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4630  */
4631 static int
4632 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4633     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4634 {
4635         int error, ret_error = 0;
4636         int i, k;
4637         uint_t sva_mask = sargp->vap->va_mask;
4638         uint_t vbit;
4639         union nfs4_attr_u *na;
4640         uint8_t *amap;
4641         bool_t getsb = ntovp->vfsstat;
4642
4643         if (sva_mask != 0) {
4644                 /*
4645                  * Okay to overwrite sargp->vap because we verify based
4646                  * on the incoming values.
4647                  */
4648                 ret_error = fop_getattr(sargp->cs->vp, sargp->vap, 0,
4649                     sargp->cs->cr, NULL);
4650                 if (ret_error) {
4651                         if (resp == NULL)
4652                                 return (ret_error);
4653                         /*
4654                          * Must return bitmap of successful attrs
4655                          */
4656                         sva_mask = 0;   /* to prevent checking vap later */
4657                 } else {
4658                         /*
4659                          * Some file systems clobber va_mask. it is probably
4660                          * wrong of them to do so, nonethless we practice
4661                          * defensive coding.
4662                          * See bug id 4276830.
4663                          */
4664                         sargp->vap->va_mask = sva_mask;
4665                 }
4666         }
4667
4668         if (getsb) {
4669                 /*
4670                  * Now get the superblock and loop on the bitmap, as there is
4671                  * no simple way of translating from superblock to bitmap4.
4672                  */
4673                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4674                 if (ret_error) {
4675                         if (resp == NULL)
4676                                 goto errout;
4677                         getsb = FALSE;
4678                 }
4679         }
4680
4681         /*
4682          * Now loop and verify each attribute which getattr returned
4683          * whether it's the same as the input.
4684          */
4685         if (resp == NULL && !getsb && (sva_mask == 0))
4686                 goto errout;
4687
4688         na = ntovp->na;
4689         amap = ntovp->amap;
4690         k = 0;
4691         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4692                 k = *amap;
4693                 ASSERT(nfs4_ntov_map[k].nval == k);
4694                 vbit = nfs4_ntov_map[k].vbit;
4695
4696                 /*
4697                  * If vattr attribute but fop_getattr failed, or it's
4698                  * superblock attribute but VFS_STATVFS failed, skip
4699                  */
4700                 if (vbit) {
4701                         if ((vbit & sva_mask) == 0)
4702                                 continue;
4703                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4704                         continue;
4705                 }
4706                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4707                 if (resp != NULL) {
4708                         if (error)
4709                                 ret_error = -1; /* not all match */
4710                         else    /* update response bitmap */
4711                                 *resp |= nfs4_ntov_map[k].fbit;
4712                         continue;
4713                 }
4714                 if (error) {
4715                         ret_error = -1; /* not all match */
4716                         break;
4717                 }
4718         }
4719 errout:
4720         return (ret_error);
4721 }
4722
4723 /*
4724  * Decode the attribute to be set/verified. If the attr requires a sys op
4725  * (fop_getattr, VFS_VFSSTAT), and the request is to verify, then don't
4726  * call the sv_getit function for it, because the sys op hasn't yet been done.
4727  * Return 0 for success, error code if failed.
4728  *
4729  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4730  */
4731 static int
4732 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4733     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4734 {
4735         int error = 0;
4736         bool_t set_later;
4737
4738         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4739
4740         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4741                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4742                 /*
4743                  * don't verify yet if a vattr or sb dependent attr,
4744                  * because we don't have their sys values yet.
4745                  * Will be done later.
4746                  */
4747                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4748                         /*
4749                          * ACLs are a special case, since setting the MODE
4750                          * conflicts with setting the ACL.  We delay setting
4751                          * the ACL until all other attributes have been set.
4752                          * The ACL gets set in do_rfs4_op_setattr().
4753                          */
4754                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4755                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4756                                     sargp, nap);
4757                                 if (error) {
4758                                         xdr_free(nfs4_ntov_map[k].xfunc,
4759                                             (caddr_t)nap);
4760                                 }
4761                         }
4762                 }
4763         } else {
4764 #ifdef  DEBUG
4765                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4766                     "decoding attribute %d\n", k);
4767 #endif
4768                 error = EINVAL;
4769         }
4770         if (!error && resp_bval && !set_later) {
4771                 *resp_bval |= nfs4_ntov_map[k].fbit;
4772         }
4773
4774         return (error);
4775 }
4776
4777 /*
4778  * Set vattr based on incoming fattr4 attrs - used by setattr.
4779  * Set response mask. Ignore any values that are not writable vattr attrs.
4780  */
4781 static nfsstat4
4782 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4783     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4784     nfs4_attr_cmd_t cmd)
4785 {
4786         int error = 0;
4787         int i;
4788         char *attrs = fattrp->attrlist4;
4789         uint32_t attrslen = fattrp->attrlist4_len;
4790         XDR xdr;
4791         nfsstat4 status = NFS4_OK;
4792         vnode_t *vp = cs->vp;
4793         union nfs4_attr_u *na;
4794         uint8_t *amap;
4795
4796         /*
4797          * Make sure that maximum attribute number can be expressed as an
4798          * 8 bit quantity.
4799          */
4800         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4801
4802         if (vp == NULL) {
4803                 if (resp)
4804                         *resp = 0;
4805                 return (NFS4ERR_NOFILEHANDLE);
4806         }
4807         if (cs->access == CS_ACCESS_DENIED) {
4808                 if (resp)
4809                         *resp = 0;
4810                 return (NFS4ERR_ACCESS);
4811         }
4812
4813         sargp->op = cmd;
4814         sargp->cs = cs;
4815         sargp->flag = 0;        /* may be set later */
4816         sargp->vap->va_mask = 0;
4817         sargp->rdattr_error = NFS4_OK;
4818         sargp->rdattr_error_req = FALSE;
4819         /* sargp->sbp is set by the caller */
4820
4821         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4822
4823         na = ntovp->na;
4824         amap = ntovp->amap;
4825
4826         /*
4827          * The following loop iterates on the nfs4_ntov_map checking
4828          * if the fbit is set in the requested bitmap.
4829          * If set then we process the arguments using the
4830          * rfs4_fattr4 conversion functions to populate the setattr
4831          * vattr and va_mask. Any settable attrs that are not using vattr
4832          * will be set in this loop.
4833          */
4834         for (i = 0; i < nfs4_ntov_map_size; i++) {
4835                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4836                         continue;
4837                 }
4838                 /*
4839                  * If setattr, must be a writable attr.
4840                  * If verify/nverify, must be a readable attr.
4841                  */
4842                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
4843                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4844                         /*
4845                          * Client tries to set/verify an
4846                          * unsupported attribute, tries to set
4847                          * a read only attr or verify a write
4848                          * only one - error!
4849                          */
4850                         break;
4851                 }
4852                 /*
4853                  * Decode the attribute to set/verify
4854                  */
4855                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4856                     &xdr, resp ? resp : NULL, na);
4857                 if (error)
4858                         break;
4859                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4860                 na++;
4861                 (ntovp->attrcnt)++;
4862                 if (nfs4_ntov_map[i].vfsstat)
4863                         ntovp->vfsstat = TRUE;
4864         }
4865
4866         if (error != 0)
4867                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
4868                     puterrno4(error));
4869         /* xdrmem_destroy(&xdrs); */    /* NO-OP */
4870         return (status);
4871 }
4872
4873 static nfsstat4
4874 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4875     stateid4 *stateid)
4876 {
4877         int error = 0;
4878         struct nfs4_svgetit_arg sarg;
4879         bool_t trunc;
4880
4881         nfsstat4 status = NFS4_OK;
4882         cred_t *cr = cs->cr;
4883         vnode_t *vp = cs->vp;
4884         struct nfs4_ntov_table ntov;
4885         struct statvfs64 sb;
4886         struct vattr bva;
4887         struct flock64 bf;
4888         int in_crit = 0;
4889         uint_t saved_mask = 0;
4890         caller_context_t ct;
4891
4892         *resp = 0;
4893         sarg.sbp = &sb;
4894         sarg.is_referral = B_FALSE;
4895         nfs4_ntov_table_init(&ntov);
4896         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4897             NFS4ATTR_SETIT);
4898         if (status != NFS4_OK) {
4899                 /*
4900                  * failed set attrs
4901                  */
4902                 goto done;
4903         }
4904         if ((sarg.vap->va_mask == 0) &&
4905             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4906                 /*
4907                  * no further work to be done
4908                  */
4909                 goto done;
4910         }
4911
4912         /*
4913          * If we got a request to set the ACL and the MODE, only
4914          * allow changing VSUID, VSGID, and VSVTX.  Attempting
4915          * to change any other bits, along with setting an ACL,
4916          * gives NFS4ERR_INVAL.
4917          */
4918         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4919             (fattrp->attrmask & FATTR4_MODE_MASK)) {
4920                 vattr_t va;
4921
4922                 va.va_mask = VATTR_MODE;
4923                 error = fop_getattr(vp, &va, 0, cs->cr, NULL);
4924                 if (error) {
4925                         status = puterrno4(error);
4926                         goto done;
4927                 }
4928                 if ((sarg.vap->va_mode ^ va.va_mode) &
4929                     ~(VSUID | VSGID | VSVTX)) {
4930                         status = NFS4ERR_INVAL;
4931                         goto done;
4932                 }
4933         }
4934
4935         /* Check stateid only if size has been set */
4936         if (sarg.vap->va_mask & VATTR_SIZE) {
4937                 trunc = (sarg.vap->va_size == 0);
4938                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
4939                     trunc, &cs->deleg, sarg.vap->va_mask & VATTR_SIZE, &ct);
4940                 if (status != NFS4_OK)
4941                         goto done;
4942         } else {
4943                 ct.cc_sysid = 0;
4944                 ct.cc_pid = 0;
4945                 ct.cc_caller_id = nfs4_srv_caller_id;
4946                 ct.cc_flags = CC_DONTBLOCK;
4947         }
4948
4949         /* XXX start of possible race with delegations */
4950
4951         /*
4952          * We need to specially handle size changes because it is
4953          * possible for the client to create a file with read-only
4954          * modes, but with the file opened for writing. If the client
4955          * then tries to set the file size, e.g. ftruncate(3C),
4956          * fcntl(F_FREESP), the normal access checking done in
4957          * fop_setattr would prevent the client from doing it even though
4958          * it should be allowed to do so.  To get around this, we do the
4959          * access checking for ourselves and use fop_space which doesn't
4960          * do the access checking.
4961          * Also the client should not be allowed to change the file
4962          * size if there is a conflicting non-blocking mandatory lock in
4963          * the region of the change.
4964          */
4965         if (vp->v_type == VREG && (sarg.vap->va_mask & VATTR_SIZE)) {
4966                 uoff_t offset;
4967                 ssize_t length;
4968
4969                 /*
4970                  * ufs_setattr clears VATTR_SIZE from vap->va_mask, but
4971                  * before returning, sarg.vap->va_mask is used to
4972                  * generate the setattr reply bitmap.  We also clear
4973                  * VATTR_SIZE below before calling fop_space.  For both
4974                  * of these cases, the va_mask needs to be saved here
4975                  * and restored after calling fop_setattr.
4976                  */
4977                 saved_mask = sarg.vap->va_mask;
4978
4979                 /*
4980                  * Check any possible conflict due to NBMAND locks.
4981                  * Get into critical region before fop_getattr, so the
4982                  * size attribute is valid when checking conflicts.
4983                  */
4984                 if (nbl_need_check(vp)) {
4985                         nbl_start_crit(vp, RW_READER);
4986                         in_crit = 1;
4987                 }
4988
4989                 bva.va_mask = VATTR_UID|VATTR_SIZE;
4990                 if (error = fop_getattr(vp, &bva, 0, cr, &ct)) {
4991                         status = puterrno4(error);
4992                         goto done;
4993                 }
4994
4995                 if (in_crit) {
4996                         if (sarg.vap->va_size < bva.va_size) {
4997                                 offset = sarg.vap->va_size;
4998                                 length = bva.va_size - sarg.vap->va_size;
4999                         } else {
5000                                 offset = bva.va_size;
5001                                 length = sarg.vap->va_size - bva.va_size;
5002                         }
5003                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5004                             &ct)) {
5005                                 status = NFS4ERR_LOCKED;
5006                                 goto done;
5007                         }
5008                 }
5009
5010                 if (crgetuid(cr) == bva.va_uid) {
5011                         sarg.vap->va_mask &= ~VATTR_SIZE;
5012                         bf.l_type = F_WRLCK;
5013                         bf.l_whence = 0;
5014                         bf.l_start = (off64_t)sarg.vap->va_size;
5015                         bf.l_len = 0;
5016                         bf.l_sysid = 0;
5017                         bf.l_pid = 0;
5018                         error = fop_space(vp, F_FREESP, &bf, FWRITE,
5019                             (offset_t)sarg.vap->va_size, cr, &ct);
5020                 }
5021         }
5022
5023         if (!error && sarg.vap->va_mask != 0)
5024                 error = fop_setattr(vp, sarg.vap, sarg.flag, cr, &ct);
5025
5026         /* restore va_mask -- ufs_setattr clears VATTR_SIZE */
5027         if (saved_mask & VATTR_SIZE)
5028                 sarg.vap->va_mask |= VATTR_SIZE;
5029
5030         /*
5031          * If an ACL was being set, it has been delayed until now,
5032          * in order to set the mode (via the fop_setattr() above) first.
5033          */
5034         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5035                 int i;
5036
5037                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5038                         if (ntov.amap[i] == FATTR4_ACL)
5039                                 break;
5040                 if (i < NFS4_MAXNUM_ATTRS) {
5041                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5042                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5043                         if (error == 0) {
5044                                 *resp |= FATTR4_ACL_MASK;
5045                         } else if (error == ENOTSUP) {
5046                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5047                                 status = NFS4ERR_ATTRNOTSUPP;
5048                                 goto done;
5049                         }
5050                 } else {
5051                         NFS4_DEBUG(rfs4_debug,
5052                             (CE_NOTE, "do_rfs4_op_setattr: "
5053                             "unable to find ACL in fattr4"));
5054                         error = EINVAL;
5055                 }
5056         }
5057
5058         if (error) {
5059                 /* check if a monitor detected a delegation conflict */
5060                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5061                         status = NFS4ERR_DELAY;
5062                 else
5063                         status = puterrno4(error);
5064
5065                 /*
5066                  * Set the response bitmap when setattr failed.
5067                  * If fop_setattr partially succeeded, test by doing a
5068                  * fop_getattr on the object and comparing the data
5069                  * to the setattr arguments.
5070                  */
5071                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5072         } else {
5073                 /*
5074                  * Force modified metadata out to stable storage.
5075                  */
5076                 (void) fop_fsync(vp, FNODSYNC, cr, &ct);
5077                 /*
5078                  * Set response bitmap
5079                  */
5080                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5081         }
5082
5083 /* Return early and already have a NFSv4 error */
5084 done:
5085         /*
5086          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5087          * conversion sets both readable and writeable NFS4 attrs
5088          * for VATTR_MTIME and VATTR_ATIME.  The line below masks out
5089          * unrequested attrs from the setattr result bitmap.  This
5090          * is placed after the done: label to catch the ATTRNOTSUP
5091          * case.
5092          */
5093         *resp &= fattrp->attrmask;
5094
5095         if (in_crit)
5096                 nbl_end_crit(vp);
5097
5098         nfs4_ntov_table_free(&ntov, &sarg);
5099
5100         return (status);
5101 }
5102
5103 /* ARGSUSED */
5104 static void
5105 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5106     struct compound_state *cs)
5107 {
5108         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5109         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5110
5111         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5112             SETATTR4args *, args);
5113
5114         if (cs->vp == NULL) {
5115                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5116                 goto out;
5117         }
5118
5119         /*
5120          * If there is an unshared filesystem mounted on this vnode,
5121          * do not allow to setattr on this vnode.
5122          */
5123         if (vn_ismntpt(cs->vp)) {
5124                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5125                 goto out;
5126         }
5127
5128         resp->attrsset = 0;
5129
5130         if (rdonly4(req, cs)) {
5131                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5132                 goto out;
5133         }
5134
5135         *cs->statusp = resp->status =
5136             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5137             &args->stateid);
5138
5139 out:
5140         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5141             SETATTR4res *, resp);
5142 }
5143
5144 /* ARGSUSED */
5145 static void
5146 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5147     struct compound_state *cs)
5148 {
5149         /*
5150          * verify and nverify are exactly the same, except that nverify
5151          * succeeds when some argument changed, and verify succeeds when
5152          * when none changed.
5153          */
5154
5155         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5156         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5157
5158         int error;
5159         struct nfs4_svgetit_arg sarg;
5160         struct statvfs64 sb;
5161         struct nfs4_ntov_table ntov;
5162
5163         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5164             VERIFY4args *, args);
5165
5166         if (cs->vp == NULL) {
5167                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5168                 goto out;
5169         }
5170
5171         sarg.sbp = &sb;
5172         sarg.is_referral = B_FALSE;
5173         nfs4_ntov_table_init(&ntov);
5174         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5175             &sarg, &ntov, NFS4ATTR_VERIT);
5176         if (resp->status != NFS4_OK) {
5177                 /*
5178                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5179                  * so could return -1 for "no match".
5180                  */
5181                 if (resp->status == -1)
5182                         resp->status = NFS4ERR_NOT_SAME;
5183                 goto done;
5184         }
5185         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5186         switch (error) {
5187         case 0:
5188                 resp->status = NFS4_OK;
5189                 break;
5190         case -1:
5191                 resp->status = NFS4ERR_NOT_SAME;
5192                 break;
5193         default:
5194                 resp->status = puterrno4(error);
5195                 break;
5196         }
5197 done:
5198         *cs->statusp = resp->status;
5199         nfs4_ntov_table_free(&ntov, &sarg);
5200 out:
5201         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5202             VERIFY4res *, resp);
5203 }
5204
5205 /* ARGSUSED */
5206 static void
5207 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5208     struct compound_state *cs)
5209 {
5210         /*
5211          * verify and nverify are exactly the same, except that nverify
5212          * succeeds when some argument changed, and verify succeeds when
5213          * when none changed.
5214          */
5215
5216         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5217         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5218
5219         int error;
5220         struct nfs4_svgetit_arg sarg;
5221         struct statvfs64 sb;
5222         struct nfs4_ntov_table ntov;
5223
5224         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5225             NVERIFY4args *, args);
5226
5227         if (cs->vp == NULL) {
5228                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5229                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5230                     NVERIFY4res *, resp);
5231                 return;
5232         }
5233         sarg.sbp = &sb;
5234         sarg.is_referral = B_FALSE;
5235         nfs4_ntov_table_init(&ntov);
5236         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5237             &sarg, &ntov, NFS4ATTR_VERIT);
5238         if (resp->status != NFS4_OK) {
5239                 /*
5240                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5241                  * so could return -1 for "no match".
5242                  */
5243                 if (resp->status == -1)
5244                         resp->status = NFS4_OK;
5245                 goto done;
5246         }
5247         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5248         switch (error) {
5249         case 0:
5250                 resp->status = NFS4ERR_SAME;
5251                 break;
5252         case -1:
5253                 resp->status = NFS4_OK;
5254                 break;
5255         default:
5256                 resp->status = puterrno4(error);
5257                 break;
5258         }
5259 done:
5260         *cs->statusp = resp->status;
5261         nfs4_ntov_table_free(&ntov, &sarg);
5262
5263         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5264             NVERIFY4res *, resp);
5265 }
5266
5267 /*
5268  * XXX - This should live in an NFS header file.
5269  */
5270 #define MAX_IOVECS      12
5271
5272 /* ARGSUSED */
5273 static void
5274 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5275     struct compound_state *cs)
5276 {
5277         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5278         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5279         int error;
5280         vnode_t *vp;
5281         struct vattr bva;
5282         uoff_t rlimit;
5283         struct uio uio;
5284         struct iovec iov[MAX_IOVECS];
5285         struct iovec *iovp;
5286         int iovcnt;
5287         int ioflag;
5288         cred_t *savecred, *cr;
5289         bool_t *deleg = &cs->deleg;
5290         nfsstat4 stat;
5291         int in_crit = 0;
5292         caller_context_t ct;
5293
5294         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5295             WRITE4args *, args);
5296
5297         vp = cs->vp;
5298         if (vp == NULL) {
5299                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5300                 goto out;
5301         }
5302         if (cs->access == CS_ACCESS_DENIED) {
5303                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5304                 goto out;
5305         }
5306
5307         cr = cs->cr;
5308
5309         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5310             deleg, TRUE, &ct)) != NFS4_OK) {
5311                 *cs->statusp = resp->status = stat;
5312                 goto out;
5313         }
5314
5315         /*
5316          * We have to enter the critical region before calling fop_rwlock
5317          * to avoid a deadlock with ufs.
5318          */
5319         if (nbl_need_check(vp)) {
5320                 nbl_start_crit(vp, RW_READER);
5321                 in_crit = 1;
5322                 if (nbl_conflict(vp, NBL_WRITE,
5323                     args->offset, args->data_len, 0, &ct)) {
5324                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5325                         goto out;
5326                 }
5327         }
5328
5329         bva.va_mask = VATTR_MODE | VATTR_UID;
5330         error = fop_getattr(vp, &bva, 0, cr, &ct);
5331
5332         /*
5333          * If we can't get the attributes, then we can't do the
5334          * right access checking.  So, we'll fail the request.
5335          */
5336         if (error) {
5337                 *cs->statusp = resp->status = puterrno4(error);
5338                 goto out;
5339         }
5340
5341         if (rdonly4(req, cs)) {
5342                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5343                 goto out;
5344         }
5345
5346         if (vp->v_type != VREG) {
5347                 *cs->statusp = resp->status =
5348                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5349                 goto out;
5350         }
5351
5352         if (crgetuid(cr) != bva.va_uid &&
5353             (error = fop_access(vp, VWRITE, 0, cr, &ct))) {
5354                 *cs->statusp = resp->status = puterrno4(error);
5355                 goto out;
5356         }
5357
5358         if (MANDLOCK(vp, bva.va_mode)) {
5359                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5360                 goto out;
5361         }
5362
5363         if (args->data_len == 0) {
5364                 *cs->statusp = resp->status = NFS4_OK;
5365                 resp->count = 0;
5366                 resp->committed = args->stable;
5367                 resp->writeverf = Write4verf;
5368                 goto out;
5369         }
5370
5371         if (args->mblk != NULL) {
5372                 mblk_t *m;
5373                 uint_t bytes, round_len;
5374
5375                 iovcnt = 0;
5376                 bytes = 0;
5377                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5378                 for (m = args->mblk;
5379                     m != NULL && bytes < round_len;
5380                     m = m->b_cont) {
5381                         iovcnt++;
5382                         bytes += MBLKL(m);
5383                 }
5384 #ifdef DEBUG
5385                 /* should have ended on an mblk boundary */
5386                 if (bytes != round_len) {
5387                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5388                             bytes, round_len, args->data_len);
5389                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5390                             (void *)args->mblk, (void *)m);
5391                         ASSERT(bytes == round_len);
5392                 }
5393 #endif
5394                 if (iovcnt <= MAX_IOVECS) {
5395                         iovp = iov;
5396                 } else {
5397                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5398                 }
5399                 mblk_to_iov(args->mblk, iovcnt, iovp);
5400         } else if (args->rlist != NULL) {
5401                 iovcnt = 1;
5402                 iovp = iov;
5403                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5404                 iovp->iov_len = args->data_len;
5405         } else {
5406                 iovcnt = 1;
5407                 iovp = iov;
5408                 iovp->iov_base = args->data_val;
5409                 iovp->iov_len = args->data_len;
5410         }
5411
5412         uio.uio_iov = iovp;
5413         uio.uio_iovcnt = iovcnt;
5414
5415         uio.uio_segflg = UIO_SYSSPACE;
5416         uio.uio_extflg = UIO_COPY_DEFAULT;
5417         uio.uio_loffset = args->offset;
5418         uio.uio_resid = args->data_len;
5419         uio.uio_llimit = curproc->p_fsz_ctl;
5420         rlimit = uio.uio_llimit - args->offset;
5421         if (rlimit < (uoff_t)uio.uio_resid)
5422                 uio.uio_resid = (int)rlimit;
5423
5424         if (args->stable == UNSTABLE4)
5425                 ioflag = 0;
5426         else if (args->stable == FILE_SYNC4)
5427                 ioflag = FSYNC;
5428         else if (args->stable == DATA_SYNC4)
5429                 ioflag = FDSYNC;
5430         else {
5431                 if (iovp != iov)
5432                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5433                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5434                 goto out;
5435         }
5436
5437         /*
5438          * We're changing creds because VM may fault and we need
5439          * the cred of the current thread to be used if quota
5440          * checking is enabled.
5441          */
5442         savecred = curthread->t_cred;
5443         curthread->t_cred = cr;
5444         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5445         curthread->t_cred = savecred;
5446
5447         if (iovp != iov)
5448                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5449
5450         if (error) {
5451                 *cs->statusp = resp->status = puterrno4(error);
5452                 goto out;
5453         }
5454
5455         *cs->statusp = resp->status = NFS4_OK;
5456         resp->count = args->data_len - uio.uio_resid;
5457
5458         if (ioflag == 0)
5459                 resp->committed = UNSTABLE4;
5460         else
5461                 resp->committed = FILE_SYNC4;
5462
5463         resp->writeverf = Write4verf;
5464
5465 out:
5466         if (in_crit)
5467                 nbl_end_crit(vp);
5468
5469         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5470             WRITE4res *, resp);
5471 }
5472
5473
5474 /* XXX put in a header file */
5475 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5476
5477 void
5478 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5479     struct svc_req *req, cred_t *cr, int *rv)
5480 {
5481         uint_t i;
5482         struct compound_state cs;
5483
5484         if (rv != NULL)
5485                 *rv = 0;
5486         rfs4_init_compound_state(&cs);
5487         /*
5488          * Form a reply tag by copying over the reqeuest tag.
5489          */
5490         resp->tag.utf8string_val =
5491             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5492         resp->tag.utf8string_len = args->tag.utf8string_len;
5493         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5494             resp->tag.utf8string_len);
5495
5496         cs.statusp = &resp->status;
5497         cs.req = req;
5498         resp->array = NULL;
5499         resp->array_len = 0;
5500
5501         /*
5502          * XXX for now, minorversion should be zero
5503          */
5504         if (args->minorversion != NFS4_MINORVERSION) {
5505                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5506                     &cs, COMPOUND4args *, args);
5507                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5508                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5509                     &cs, COMPOUND4res *, resp);
5510                 return;
5511         }
5512
5513         if (args->array_len == 0) {
5514                 resp->status = NFS4_OK;
5515                 return;
5516         }
5517
5518         ASSERT(exi == NULL);
5519         ASSERT(cr == NULL);
5520
5521         cr = crget();
5522         ASSERT(cr != NULL);
5523
5524         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5525                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5526                     &cs, COMPOUND4args *, args);
5527                 crfree(cr);
5528                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5529                     &cs, COMPOUND4res *, resp);
5530                 svcerr_badcred(req->rq_xprt);
5531                 if (rv != NULL)
5532                         *rv = 1;
5533                 return;
5534         }
5535         resp->array_len = args->array_len;
5536         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5537             KM_SLEEP);
5538
5539         cs.basecr = cr;
5540
5541         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5542             COMPOUND4args *, args);
5543
5544         /*
5545          * For now, NFS4 compound processing must be protected by
5546          * exported_lock because it can access more than one exportinfo
5547          * per compound and share/unshare can now change multiple
5548          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5549          * per proc (excluding public exinfo), and exi_count design
5550          * is sufficient to protect concurrent execution of NFS2/3
5551          * ops along with unexport.  This lock will be removed as
5552          * part of the NFSv4 phase 2 namespace redesign work.
5553          */
5554         rw_enter(&exported_lock, RW_READER);
5555
5556         /*
5557          * If this is the first compound we've seen, we need to start all
5558          * new instances' grace periods.
5559          */
5560         if (rfs4_seen_first_compound == 0) {
5561                 rfs4_grace_start_new();
5562                 /*
5563                  * This must be set after rfs4_grace_start_new(), otherwise
5564                  * another thread could proceed past here before the former
5565                  * is finished.
5566                  */
5567                 rfs4_seen_first_compound = 1;
5568         }
5569
5570         for (i = 0; i < args->array_len && cs.cont; i++) {
5571                 nfs_argop4 *argop;
5572                 nfs_resop4 *resop;
5573                 uint_t op;
5574
5575                 argop = &args->array[i];
5576                 resop = &resp->array[i];
5577                 resop->resop = argop->argop;
5578                 op = (uint_t)resop->resop;
5579
5580                 if (op < rfsv4disp_cnt) {
5581                         /*
5582                          * Count the individual ops here; NULL and COMPOUND
5583                          * are counted in common_dispatch()
5584                          */
5585                         rfsproccnt_v4_ptr[op].value.ui64++;
5586
5587                         NFS4_DEBUG(rfs4_debug > 1,
5588                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5589                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5590                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5591                             rfs4_op_string[op], *cs.statusp));
5592                         if (*cs.statusp != NFS4_OK)
5593                                 cs.cont = FALSE;
5594                 } else {
5595                         /*
5596                          * This is effectively dead code since XDR code
5597                          * will have already returned BADXDR if op doesn't
5598                          * decode to legal value.  This only done for a
5599                          * day when XDR code doesn't verify v4 opcodes.
5600                          */
5601                         op = OP_ILLEGAL;
5602                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5603
5604                         rfs4_op_illegal(argop, resop, req, &cs);
5605                         cs.cont = FALSE;
5606                 }
5607
5608                 /*
5609                  * If not at last op, and if we are to stop, then
5610                  * compact the results array.
5611                  */
5612                 if ((i + 1) < args->array_len && !cs.cont) {
5613                         nfs_resop4 *new_res = kmem_alloc(
5614                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5615                         bcopy(resp->array,
5616                             new_res, (i+1) * sizeof (nfs_resop4));
5617                         kmem_free(resp->array,
5618                             args->array_len * sizeof (nfs_resop4));
5619
5620                         resp->array_len =  i + 1;
5621                         resp->array = new_res;
5622                 }
5623         }
5624
5625         rw_exit(&exported_lock);
5626
5627         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5628             COMPOUND4res *, resp);
5629
5630         if (cs.vp)
5631                 VN_RELE(cs.vp);
5632         if (cs.saved_vp)
5633                 VN_RELE(cs.saved_vp);
5634         if (cs.saved_fh.nfs_fh4_val)
5635                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5636
5637         if (cs.basecr)
5638                 crfree(cs.basecr);
5639         if (cs.cr)
5640                 crfree(cs.cr);
5641 }
5642
5643 /*
5644  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5645  * XXX zero out the tag and array values. Need to investigate why the
5646  * XXX calls occur, but at least prevent the panic for now.
5647  */
5648 void
5649 rfs4_compound_free(COMPOUND4res *resp)
5650 {
5651         uint_t i;
5652
5653         if (resp->tag.utf8string_val) {
5654                 UTF8STRING_FREE(resp->tag)
5655         }
5656
5657         for (i = 0; i < resp->array_len; i++) {
5658                 nfs_resop4 *resop;
5659                 uint_t op;
5660
5661                 resop = &resp->array[i];
5662                 op = (uint_t)resop->resop;
5663                 if (op < rfsv4disp_cnt) {
5664                         (*rfsv4disptab[op].dis_resfree)(resop);
5665                 }
5666         }
5667         if (resp->array != NULL) {
5668                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5669         }
5670 }
5671
5672 /*
5673  * Process the value of the compound request rpc flags, as a bit-AND
5674  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5675  */
5676 void
5677 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5678 {
5679         int i;
5680         int flag = RPC_ALL;
5681
5682         for (i = 0; flag && i < args->array_len; i++) {
5683                 uint_t op;
5684
5685                 op = (uint_t)args->array[i].argop;
5686
5687                 if (op < rfsv4disp_cnt)
5688                         flag &= rfsv4disptab[op].dis_flags;
5689                 else
5690                         flag = 0;
5691         }
5692         *flagp = flag;
5693 }
5694
5695 nfsstat4
5696 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5697 {
5698         nfsstat4 e;
5699
5700         rfs4_dbe_lock(cp->rc_dbe);
5701
5702         if (cp->rc_sysidt != LM_NOSYSID) {
5703                 *sp = cp->rc_sysidt;
5704                 e = NFS4_OK;
5705
5706         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5707                 *sp = cp->rc_sysidt;
5708                 e = NFS4_OK;
5709
5710                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5711                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
5712         } else
5713                 e = NFS4ERR_DELAY;
5714
5715         rfs4_dbe_unlock(cp->rc_dbe);
5716         return (e);
5717 }
5718
5719 #if defined(DEBUG) && ! defined(lint)
5720 static void lock_print(char *str, int operation, struct flock64 *flk)
5721 {
5722         char *op, *type;
5723
5724         switch (operation) {
5725         case F_GETLK: op = "F_GETLK";
5726                 break;
5727         case F_SETLK: op = "F_SETLK";
5728                 break;
5729         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
5730                 break;
5731         default: op = "F_UNKNOWN";
5732                 break;
5733         }
5734         switch (flk->l_type) {
5735         case F_UNLCK: type = "F_UNLCK";
5736                 break;
5737         case F_RDLCK: type = "F_RDLCK";
5738                 break;
5739         case F_WRLCK: type = "F_WRLCK";
5740                 break;
5741         default: type = "F_UNKNOWN";
5742                 break;
5743         }
5744
5745         ASSERT(flk->l_whence == 0);
5746         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5747             str, op, type, (longlong_t)flk->l_start,
5748             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
5749 }
5750
5751 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5752 #else
5753 #define LOCK_PRINT(d, s, t, f)
5754 #endif
5755
5756 /*ARGSUSED*/
5757 static bool_t
5758 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5759 {
5760         return (TRUE);
5761 }
5762
5763 /*
5764  * Look up the pathname using the vp in cs as the directory vnode.
5765  * cs->vp will be the vnode for the file on success
5766  */
5767
5768 static nfsstat4
5769 rfs4_lookup(component4 *component, struct svc_req *req,
5770     struct compound_state *cs)
5771 {
5772         char *nm;
5773         uint32_t len;
5774         nfsstat4 status;
5775         struct sockaddr *ca;
5776         char *name;
5777
5778         if (cs->vp == NULL) {
5779                 return (NFS4ERR_NOFILEHANDLE);
5780         }
5781         if (cs->vp->v_type != VDIR) {
5782                 return (NFS4ERR_NOTDIR);
5783         }
5784
5785         status = utf8_dir_verify(component);
5786         if (status != NFS4_OK)
5787                 return (status);
5788
5789         nm = utf8_to_fn(component, &len, NULL);
5790         if (nm == NULL) {
5791                 return (NFS4ERR_INVAL);
5792         }
5793
5794         if (len > MAXNAMELEN) {
5795                 kmem_free(nm, len);
5796                 return (NFS4ERR_NAMETOOLONG);
5797         }
5798
5799         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
5800         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
5801             MAXPATHLEN + 1);
5802
5803         if (name == NULL) {
5804                 kmem_free(nm, len);
5805                 return (NFS4ERR_INVAL);
5806         }
5807
5808         status = do_rfs4_op_lookup(name, req, cs);
5809
5810         if (name != nm)
5811                 kmem_free(name, MAXPATHLEN + 1);
5812
5813         kmem_free(nm, len);
5814
5815         return (status);
5816 }
5817
5818 static nfsstat4
5819 rfs4_lookupfile(component4 *component, struct svc_req *req,
5820     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
5821 {
5822         nfsstat4 status;
5823         vnode_t *dvp = cs->vp;
5824         vattr_t bva, ava, fva;
5825         int error;
5826
5827         /* Get "before" change value */
5828         bva.va_mask = VATTR_CTIME|VATTR_SEQ;
5829         error = fop_getattr(dvp, &bva, 0, cs->cr, NULL);
5830         if (error)
5831                 return (puterrno4(error));
5832
5833         /* rfs4_lookup may VN_RELE directory */
5834         VN_HOLD(dvp);
5835
5836         status = rfs4_lookup(component, req, cs);
5837         if (status != NFS4_OK) {
5838                 VN_RELE(dvp);
5839                 return (status);
5840         }
5841
5842         /*
5843          * Get "after" change value, if it fails, simply return the
5844          * before value.
5845          */
5846         ava.va_mask = VATTR_CTIME|VATTR_SEQ;
5847         if (fop_getattr(dvp, &ava, 0, cs->cr, NULL)) {
5848                 ava.va_ctime = bva.va_ctime;
5849                 ava.va_seq = 0;
5850         }
5851         VN_RELE(dvp);
5852
5853         /*
5854          * Validate the file is a file
5855          */
5856         fva.va_mask = VATTR_TYPE|VATTR_MODE;
5857         error = fop_getattr(cs->vp, &fva, 0, cs->cr, NULL);
5858         if (error)
5859                 return (puterrno4(error));
5860
5861         if (fva.va_type != VREG) {
5862                 if (fva.va_type == VDIR)
5863                         return (NFS4ERR_ISDIR);
5864                 if (fva.va_type == VLNK)
5865                         return (NFS4ERR_SYMLINK);
5866                 return (NFS4ERR_INVAL);
5867         }
5868
5869         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5870         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5871
5872         /*
5873          * It is undefined if fop_lookup will change va_seq, so
5874          * cinfo.atomic = TRUE only if we have
5875          * non-zero va_seq's, and they have not changed.
5876          */
5877         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5878                 cinfo->atomic = TRUE;
5879         else
5880                 cinfo->atomic = FALSE;
5881
5882         /* Check for mandatory locking */
5883         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5884         return (check_open_access(access, cs, req));
5885 }
5886
5887 static nfsstat4
5888 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5889     cred_t *cr, vnode_t **vpp, bool_t *created)
5890 {
5891         int error;
5892         nfsstat4 status = NFS4_OK;
5893         vattr_t va;
5894
5895 tryagain:
5896
5897         /*
5898          * The file open mode used is VWRITE.  If the client needs
5899          * some other semantic, then it should do the access checking
5900          * itself.  It would have been nice to have the file open mode
5901          * passed as part of the arguments.
5902          */
5903
5904         *created = TRUE;
5905         error = fop_create(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
5906
5907         if (error) {
5908                 *created = FALSE;
5909
5910                 /*
5911                  * If we got something other than file already exists
5912                  * then just return this error.  Otherwise, we got
5913                  * EEXIST.  If we were doing a GUARDED create, then
5914                  * just return this error.  Otherwise, we need to
5915                  * make sure that this wasn't a duplicate of an
5916                  * exclusive create request.
5917                  *
5918                  * The assumption is made that a non-exclusive create
5919                  * request will never return EEXIST.
5920                  */
5921
5922                 if (error != EEXIST || mode == GUARDED4) {
5923                         status = puterrno4(error);
5924                         return (status);
5925                 }
5926                 error = fop_lookup(dvp, nm, vpp, NULL, 0, NULL, cr,
5927                     NULL, NULL, NULL);
5928
5929                 if (error) {
5930                         /*
5931                          * We couldn't find the file that we thought that
5932                          * we just created.  So, we'll just try creating
5933                          * it again.
5934                          */
5935                         if (error == ENOENT)
5936                                 goto tryagain;
5937
5938                         status = puterrno4(error);
5939                         return (status);
5940                 }
5941
5942                 if (mode == UNCHECKED4) {
5943                         /* existing object must be regular file */
5944                         if ((*vpp)->v_type != VREG) {
5945                                 if ((*vpp)->v_type == VDIR)
5946                                         status = NFS4ERR_ISDIR;
5947                                 else if ((*vpp)->v_type == VLNK)
5948                                         status = NFS4ERR_SYMLINK;
5949                                 else
5950                                         status = NFS4ERR_INVAL;
5951                                 VN_RELE(*vpp);
5952                                 return (status);
5953                         }
5954
5955                         return (NFS4_OK);
5956                 }
5957
5958                 /* Check for duplicate request */
5959                 va.va_mask = VATTR_MTIME;
5960                 error = fop_getattr(*vpp, &va, 0, cr, NULL);
5961                 if (!error) {
5962                         /* We found the file */
5963                         const timestruc_t *mtime = &vap->va_mtime;
5964
5965                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
5966                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
5967                                 /* but its not our creation */
5968                                 VN_RELE(*vpp);
5969                                 return (NFS4ERR_EXIST);
5970                         }
5971                         *created = TRUE; /* retrans of create == created */
5972                         return (NFS4_OK);
5973                 }
5974                 VN_RELE(*vpp);
5975                 return (NFS4ERR_EXIST);
5976         }
5977
5978         return (NFS4_OK);
5979 }
5980
5981 static nfsstat4
5982 check_open_access(uint32_t access, struct compound_state *cs,
5983     struct svc_req *req)
5984 {
5985         int error;
5986         vnode_t *vp;
5987         bool_t readonly;
5988         cred_t *cr = cs->cr;
5989
5990         /* For now we don't allow mandatory locking as per V2/V3 */
5991         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
5992                 return (NFS4ERR_ACCESS);
5993         }
5994
5995         vp = cs->vp;
5996         ASSERT(cr != NULL && vp->v_type == VREG);
5997
5998         /*
5999          * If the file system is exported read only and we are trying
6000          * to open for write, then return NFS4ERR_ROFS
6001          */
6002
6003         readonly = rdonly4(req, cs);
6004
6005         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6006                 return (NFS4ERR_ROFS);
6007
6008         if (access & OPEN4_SHARE_ACCESS_READ) {
6009                 if ((fop_access(vp, VREAD, 0, cr, NULL) != 0) &&
6010                     (fop_access(vp, VEXEC, 0, cr, NULL) != 0)) {
6011                         return (NFS4ERR_ACCESS);
6012                 }
6013         }
6014
6015         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6016                 error = fop_access(vp, VWRITE, 0, cr, NULL);
6017                 if (error)
6018                         return (NFS4ERR_ACCESS);
6019         }
6020
6021         return (NFS4_OK);
6022 }
6023
6024 static nfsstat4
6025 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6026     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6027 {
6028         struct nfs4_svgetit_arg sarg;
6029         struct nfs4_ntov_table ntov;
6030
6031         bool_t ntov_table_init = FALSE;
6032         struct statvfs64 sb;
6033         nfsstat4 status;
6034         vnode_t *vp;
6035         vattr_t bva, ava, iva, cva, *vap;
6036         vnode_t *dvp;
6037         timespec32_t *mtime;
6038         char *nm = NULL;
6039         uint_t buflen;
6040         bool_t created;
6041         bool_t setsize = FALSE;
6042         len_t reqsize;
6043         int error;
6044         bool_t trunc;
6045         caller_context_t ct;
6046         component4 *component;
6047         struct sockaddr *ca;
6048         char *name = NULL;
6049
6050         sarg.sbp = &sb;
6051         sarg.is_referral = B_FALSE;
6052
6053         dvp = cs->vp;
6054
6055         /* Check if the file system is read only */
6056         if (rdonly4(req, cs))
6057                 return (NFS4ERR_ROFS);
6058
6059         /*
6060          * Get the last component of path name in nm. cs will reference
6061          * the including directory on success.
6062          */
6063         component = &args->open_claim4_u.file;
6064         status = utf8_dir_verify(component);
6065         if (status != NFS4_OK)
6066                 return (status);
6067
6068         nm = utf8_to_fn(component, &buflen, NULL);
6069
6070         if (nm == NULL)
6071                 return (NFS4ERR_RESOURCE);
6072
6073         if (buflen > MAXNAMELEN) {
6074                 kmem_free(nm, buflen);
6075                 return (NFS4ERR_NAMETOOLONG);
6076         }
6077
6078         bva.va_mask = VATTR_TYPE|VATTR_CTIME|VATTR_SEQ;
6079         error = fop_getattr(dvp, &bva, 0, cs->cr, NULL);
6080         if (error) {
6081                 kmem_free(nm, buflen);
6082                 return (puterrno4(error));
6083         }
6084
6085         if (bva.va_type != VDIR) {
6086                 kmem_free(nm, buflen);
6087                 return (NFS4ERR_NOTDIR);
6088         }
6089
6090         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6091
6092         switch (args->mode) {
6093         case GUARDED4:
6094                 /*FALLTHROUGH*/
6095         case UNCHECKED4:
6096                 nfs4_ntov_table_init(&ntov);
6097                 ntov_table_init = TRUE;
6098
6099                 *attrset = 0;
6100                 status = do_rfs4_set_attrs(attrset,
6101                     &args->createhow4_u.createattrs,
6102                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6103
6104                 if (status == NFS4_OK && (sarg.vap->va_mask & VATTR_TYPE) &&
6105                     sarg.vap->va_type != VREG) {
6106                         if (sarg.vap->va_type == VDIR)
6107                                 status = NFS4ERR_ISDIR;
6108                         else if (sarg.vap->va_type == VLNK)
6109                                 status = NFS4ERR_SYMLINK;
6110                         else
6111                                 status = NFS4ERR_INVAL;
6112                 }
6113
6114                 if (status != NFS4_OK) {
6115                         kmem_free(nm, buflen);
6116                         nfs4_ntov_table_free(&ntov, &sarg);
6117                         *attrset = 0;
6118                         return (status);
6119                 }
6120
6121                 vap = sarg.vap;
6122                 vap->va_type = VREG;
6123                 vap->va_mask |= VATTR_TYPE;
6124
6125                 if ((vap->va_mask & VATTR_MODE) == 0) {
6126                         vap->va_mask |= VATTR_MODE;
6127                         vap->va_mode = (mode_t)0600;
6128                 }
6129
6130                 if (vap->va_mask & VATTR_SIZE) {
6131
6132                         /* Disallow create with a non-zero size */
6133
6134                         if ((reqsize = sarg.vap->va_size) != 0) {
6135                                 kmem_free(nm, buflen);
6136                                 nfs4_ntov_table_free(&ntov, &sarg);
6137                                 *attrset = 0;
6138                                 return (NFS4ERR_INVAL);
6139                         }
6140                         setsize = TRUE;
6141                 }
6142                 break;
6143
6144         case EXCLUSIVE4:
6145                 /* prohibit EXCL create of named attributes */
6146                 if (dvp->v_flag & V_XATTRDIR) {
6147                         kmem_free(nm, buflen);
6148                         *attrset = 0;
6149                         return (NFS4ERR_INVAL);
6150                 }
6151
6152                 cva.va_mask = VATTR_TYPE | VATTR_MTIME | VATTR_MODE;
6153                 cva.va_type = VREG;
6154                 /*
6155                  * Ensure no time overflows. Assumes underlying
6156                  * filesystem supports at least 32 bits.
6157                  * Truncate nsec to usec resolution to allow valid
6158                  * compares even if the underlying filesystem truncates.
6159                  */
6160                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6161                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6162                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6163                 cva.va_mode = (mode_t)0;
6164                 vap = &cva;
6165
6166                 /*
6167                  * For EXCL create, attrset is set to the server attr
6168                  * used to cache the client's verifier.
6169                  */
6170                 *attrset = FATTR4_TIME_MODIFY_MASK;
6171                 break;
6172         }
6173
6174         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6175         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6176             MAXPATHLEN  + 1);
6177
6178         if (name == NULL) {
6179                 kmem_free(nm, buflen);
6180                 return (NFS4ERR_SERVERFAULT);
6181         }
6182
6183         status = create_vnode(dvp, name, vap, args->mode,
6184             cs->cr, &vp, &created);
6185         if (nm != name)
6186                 kmem_free(name, MAXPATHLEN + 1);
6187         kmem_free(nm, buflen);
6188
6189         if (status != NFS4_OK) {
6190                 if (ntov_table_init)
6191                         nfs4_ntov_table_free(&ntov, &sarg);
6192                 *attrset = 0;
6193                 return (status);
6194         }
6195
6196         trunc = (setsize && !created);
6197
6198         if (args->mode != EXCLUSIVE4) {
6199                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6200
6201                 /*
6202                  * True verification that object was created with correct
6203                  * attrs is impossible.  The attrs could have been changed
6204                  * immediately after object creation.  If attributes did
6205                  * not verify, the only recourse for the server is to
6206                  * destroy the object.  Maybe if some attrs (like gid)
6207                  * are set incorrectly, the object should be destroyed;
6208                  * however, seems bad as a default policy.  Do we really
6209                  * want to destroy an object over one of the times not
6210                  * verifying correctly?  For these reasons, the server
6211                  * currently sets bits in attrset for createattrs
6212                  * that were set; however, no verification is done.
6213                  *
6214                  * vmask_to_nmask accounts for vattr bits set on create
6215                  *      [do_rfs4_set_attrs() only sets resp bits for
6216                  *       non-vattr/vfs bits.]
6217                  * Mask off any bits we set by default so as not to return
6218                  * more attrset bits than were requested in createattrs
6219                  */
6220                 if (created) {
6221                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6222                         *attrset &= createmask;
6223                 } else {
6224                         /*
6225                          * We did not create the vnode (we tried but it
6226                          * already existed).  In this case, the only createattr
6227                          * that the spec allows the server to set is size,
6228                          * and even then, it can only be set if it is 0.
6229                          */
6230                         *attrset = 0;
6231                         if (trunc)
6232                                 *attrset = FATTR4_SIZE_MASK;
6233                 }
6234         }
6235         if (ntov_table_init)
6236                 nfs4_ntov_table_free(&ntov, &sarg);
6237
6238         /*
6239          * Get the initial "after" sequence number, if it fails,
6240          * set to zero, time to before.
6241          */
6242         iva.va_mask = VATTR_CTIME|VATTR_SEQ;
6243         if (fop_getattr(dvp, &iva, 0, cs->cr, NULL)) {
6244                 iva.va_seq = 0;
6245                 iva.va_ctime = bva.va_ctime;
6246         }
6247
6248         /*
6249          * create_vnode attempts to create the file exclusive,
6250          * if it already exists the fop_create will fail and
6251          * may not increase va_seq. It is atomic if
6252          * we haven't changed the directory, but if it has changed
6253          * we don't know what changed it.
6254          */
6255         if (!created) {
6256                 if (bva.va_seq && iva.va_seq &&
6257                     bva.va_seq == iva.va_seq)
6258                         cinfo->atomic = TRUE;
6259                 else
6260                         cinfo->atomic = FALSE;
6261                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6262         } else {
6263                 /*
6264                  * The entry was created, we need to sync the
6265                  * directory metadata.
6266                  */
6267                 (void) fop_fsync(dvp, 0, cs->cr, NULL);
6268
6269                 /*
6270                  * Get "after" change value, if it fails, simply return the
6271                  * before value.
6272                  */
6273                 ava.va_mask = VATTR_CTIME|VATTR_SEQ;
6274                 if (fop_getattr(dvp, &ava, 0, cs->cr, NULL)) {
6275                         ava.va_ctime = bva.va_ctime;
6276                         ava.va_seq = 0;
6277                 }
6278
6279                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6280
6281                 /*
6282                  * The cinfo->atomic = TRUE only if we have
6283                  * non-zero va_seq's, and it has incremented by exactly one
6284                  * during the create_vnode and it didn't
6285                  * change during the fop_fsync.
6286                  */
6287                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6288                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6289                         cinfo->atomic = TRUE;
6290                 else
6291                         cinfo->atomic = FALSE;
6292         }
6293
6294         /* Check for mandatory locking and that the size gets set. */
6295         cva.va_mask = VATTR_MODE;
6296         if (setsize)
6297                 cva.va_mask |= VATTR_SIZE;
6298
6299         /* Assume the worst */
6300         cs->mandlock = TRUE;
6301
6302         if (fop_getattr(vp, &cva, 0, cs->cr, NULL) == 0) {
6303                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6304
6305                 /*
6306                  * Truncate the file if necessary; this would be
6307                  * the case for create over an existing file.
6308                  */
6309
6310                 if (trunc) {
6311                         int in_crit = 0;
6312                         rfs4_file_t *fp;
6313                         bool_t create = FALSE;
6314
6315                         /*
6316                          * We are writing over an existing file.
6317                          * Check to see if we need to recall a delegation.
6318                          */
6319                         rfs4_hold_deleg_policy();
6320                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6321                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6322                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6323                                         rfs4_file_rele(fp);
6324                                         rfs4_rele_deleg_policy();
6325                                         VN_RELE(vp);
6326                                         *attrset = 0;
6327                                         return (NFS4ERR_DELAY);
6328                                 }
6329                                 rfs4_file_rele(fp);
6330                         }
6331                         rfs4_rele_deleg_policy();
6332
6333                         if (nbl_need_check(vp)) {
6334                                 in_crit = 1;
6335
6336                                 ASSERT(reqsize == 0);
6337
6338                                 nbl_start_crit(vp, RW_READER);
6339                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6340                                     cva.va_size, 0, NULL)) {
6341                                         in_crit = 0;
6342                                         nbl_end_crit(vp);
6343                                         VN_RELE(vp);
6344                                         *attrset = 0;
6345                                         return (NFS4ERR_ACCESS);
6346                                 }
6347                         }
6348                         ct.cc_sysid = 0;
6349                         ct.cc_pid = 0;
6350                         ct.cc_caller_id = nfs4_srv_caller_id;
6351                         ct.cc_flags = CC_DONTBLOCK;
6352
6353                         cva.va_mask = VATTR_SIZE;
6354                         cva.va_size = reqsize;
6355                         (void) fop_setattr(vp, &cva, 0, cs->cr, &ct);
6356                         if (in_crit)
6357                                 nbl_end_crit(vp);
6358                 }
6359         }
6360
6361         error = makefh4(&cs->fh, vp, cs->exi);
6362
6363         /*
6364          * Force modified data and metadata out to stable storage.
6365          */
6366         (void) fop_fsync(vp, FNODSYNC, cs->cr, NULL);
6367
6368         if (error) {
6369                 VN_RELE(vp);
6370                 *attrset = 0;
6371                 return (puterrno4(error));
6372         }
6373
6374         /* if parent dir is attrdir, set namedattr fh flag */
6375         if (dvp->v_flag & V_XATTRDIR)
6376                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6377
6378         if (cs->vp)
6379                 VN_RELE(cs->vp);
6380
6381         cs->vp = vp;
6382
6383         /*
6384          * if we did not create the file, we will need to check
6385          * the access bits on the file
6386          */
6387
6388         if (!created) {
6389                 if (setsize)
6390                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6391                 status = check_open_access(args->share_access, cs, req);
6392                 if (status != NFS4_OK)
6393                         *attrset = 0;
6394         }
6395         return (status);
6396 }
6397
6398 /*ARGSUSED*/
6399 static void
6400 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6401     rfs4_openowner_t *oo, delegreq_t deleg,
6402     uint32_t access, uint32_t deny,
6403     OPEN4res *resp, int deleg_cur)
6404 {
6405         /* XXX Currently not using req  */
6406         rfs4_state_t *sp;
6407         rfs4_file_t *fp;
6408         bool_t screate = TRUE;
6409         bool_t fcreate = TRUE;
6410         uint32_t open_a, share_a;
6411         uint32_t open_d, share_d;
6412         rfs4_deleg_state_t *dsp;
6413         sysid_t sysid;
6414         nfsstat4 status;
6415         caller_context_t ct;
6416         int fflags = 0;
6417         int recall = 0;
6418         int err;
6419         int first_open;
6420
6421         /* get the file struct and hold a lock on it during initial open */
6422         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6423         if (fp == NULL) {
6424                 resp->status = NFS4ERR_RESOURCE;
6425                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6426                 return;
6427         }
6428
6429         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6430         if (sp == NULL) {
6431                 resp->status = NFS4ERR_RESOURCE;
6432                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6433                 /* No need to keep any reference */
6434                 rw_exit(&fp->rf_file_rwlock);
6435                 rfs4_file_rele(fp);
6436                 return;
6437         }
6438
6439         /* try to get the sysid before continuing */
6440         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6441                 resp->status = status;
6442                 rfs4_file_rele(fp);
6443                 /* Not a fully formed open; "close" it */
6444                 if (screate == TRUE)
6445                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6446                 rfs4_state_rele(sp);
6447                 return;
6448         }
6449
6450         /* Calculate the fflags for this OPEN. */
6451         if (access & OPEN4_SHARE_ACCESS_READ)
6452                 fflags |= FREAD;
6453         if (access & OPEN4_SHARE_ACCESS_WRITE)
6454                 fflags |= FWRITE;
6455
6456         rfs4_dbe_lock(sp->rs_dbe);
6457
6458         /*
6459          * Calculate the new deny and access mode that this open is adding to
6460          * the file for this open owner;
6461          */
6462         open_d = (deny & ~sp->rs_open_deny);
6463         open_a = (access & ~sp->rs_open_access);
6464
6465         /*
6466          * Calculate the new share access and share deny modes that this open
6467          * is adding to the file for this open owner;
6468          */
6469         share_a = (access & ~sp->rs_share_access);
6470         share_d = (deny & ~sp->rs_share_deny);
6471
6472         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6473
6474         /*
6475          * Check to see the client has already sent an open for this
6476          * open owner on this file with the same share/deny modes.
6477          * If so, we don't need to check for a conflict and we don't
6478          * need to add another shrlock.  If not, then we need to
6479          * check for conflicts in deny and access before checking for
6480          * conflicts in delegation.  We don't want to recall a
6481          * delegation based on an open that will eventually fail based
6482          * on shares modes.
6483          */
6484
6485         if (share_a || share_d) {
6486                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6487                         rfs4_dbe_unlock(sp->rs_dbe);
6488                         resp->status = err;
6489
6490                         rfs4_file_rele(fp);
6491                         /* Not a fully formed open; "close" it */
6492                         if (screate == TRUE)
6493                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6494                         rfs4_state_rele(sp);
6495                         return;
6496                 }
6497         }
6498
6499         rfs4_dbe_lock(fp->rf_dbe);
6500
6501         /*
6502          * Check to see if this file is delegated and if so, if a
6503          * recall needs to be done.
6504          */
6505         if (rfs4_check_recall(sp, access)) {
6506                 rfs4_dbe_unlock(fp->rf_dbe);
6507                 rfs4_dbe_unlock(sp->rs_dbe);
6508                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6509                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6510                 rfs4_dbe_lock(sp->rs_dbe);
6511
6512                 /* if state closed while lock was dropped */
6513                 if (sp->rs_closed) {
6514                         if (share_a || share_d)
6515                                 (void) rfs4_unshare(sp);
6516                         rfs4_dbe_unlock(sp->rs_dbe);
6517                         rfs4_file_rele(fp);
6518                         /* Not a fully formed open; "close" it */
6519                         if (screate == TRUE)
6520                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6521                         rfs4_state_rele(sp);
6522                         resp->status = NFS4ERR_OLD_STATEID;
6523                         return;
6524                 }
6525
6526                 rfs4_dbe_lock(fp->rf_dbe);
6527                 /* Let's see if the delegation was returned */
6528                 if (rfs4_check_recall(sp, access)) {
6529                         rfs4_dbe_unlock(fp->rf_dbe);
6530                         if (share_a || share_d)
6531                                 (void) rfs4_unshare(sp);
6532                         rfs4_dbe_unlock(sp->rs_dbe);
6533                         rfs4_file_rele(fp);
6534                         rfs4_update_lease(sp->rs_owner->ro_client);
6535
6536                         /* Not a fully formed open; "close" it */
6537                         if (screate == TRUE)
6538                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6539                         rfs4_state_rele(sp);
6540                         resp->status = NFS4ERR_DELAY;
6541                         return;
6542                 }
6543         }
6544         /*
6545          * the share check passed and any delegation conflict has been
6546          * taken care of, now call vop_open.
6547          * if this is the first open then call vop_open with fflags.
6548          * if not, call vn_open_upgrade with just the upgrade flags.
6549          *
6550          * if the file has been opened already, it will have the current
6551          * access mode in the state struct.  if it has no share access, then
6552          * this is a new open.
6553          *
6554          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6555          * call fop_open(), just do the open upgrade.
6556          */
6557         if (first_open && !deleg_cur) {
6558                 ct.cc_sysid = sysid;
6559                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6560                 ct.cc_caller_id = nfs4_srv_caller_id;
6561                 ct.cc_flags = CC_DONTBLOCK;
6562                 err = fop_open(&cs->vp, fflags, cs->cr, &ct);
6563                 if (err) {
6564                         rfs4_dbe_unlock(fp->rf_dbe);
6565                         if (share_a || share_d)
6566                                 (void) rfs4_unshare(sp);
6567                         rfs4_dbe_unlock(sp->rs_dbe);
6568                         rfs4_file_rele(fp);
6569
6570                         /* Not a fully formed open; "close" it */
6571                         if (screate == TRUE)
6572                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6573                         rfs4_state_rele(sp);
6574                         /* check if a monitor detected a delegation conflict */
6575                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6576                                 resp->status = NFS4ERR_DELAY;
6577                         else
6578                                 resp->status = NFS4ERR_SERVERFAULT;
6579                         return;
6580                 }
6581         } else { /* open upgrade */
6582                 /*
6583                  * calculate the fflags for the new mode that is being added
6584                  * by this upgrade.
6585                  */
6586                 fflags = 0;
6587                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6588                         fflags |= FREAD;
6589                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6590                         fflags |= FWRITE;
6591                 vn_open_upgrade(cs->vp, fflags);
6592         }
6593         sp->rs_open_access |= access;
6594         sp->rs_open_deny |= deny;
6595
6596         if (open_d & OPEN4_SHARE_DENY_READ)
6597                 fp->rf_deny_read++;
6598         if (open_d & OPEN4_SHARE_DENY_WRITE)
6599                 fp->rf_deny_write++;
6600         fp->rf_share_deny |= deny;
6601
6602         if (open_a & OPEN4_SHARE_ACCESS_READ)
6603                 fp->rf_access_read++;
6604         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6605                 fp->rf_access_write++;
6606         fp->rf_share_access |= access;
6607
6608         /*
6609          * Check for delegation here. if the deleg argument is not
6610          * DELEG_ANY, then this is a reclaim from a client and
6611          * we must honor the delegation requested. If necessary we can
6612          * set the recall flag.
6613          */
6614
6615         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6616
6617         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6618
6619         next_stateid(&sp->rs_stateid);
6620
6621         resp->stateid = sp->rs_stateid.stateid;
6622
6623         rfs4_dbe_unlock(fp->rf_dbe);
6624         rfs4_dbe_unlock(sp->rs_dbe);
6625
6626         if (dsp) {
6627                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6628                 rfs4_deleg_state_rele(dsp);
6629         }
6630
6631         rfs4_file_rele(fp);
6632         rfs4_state_rele(sp);
6633
6634         resp->status = NFS4_OK;
6635 }
6636
6637 /*ARGSUSED*/
6638 static void
6639 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6640     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6641 {
6642         change_info4 *cinfo = &resp->cinfo;
6643         bitmap4 *attrset = &resp->attrset;
6644
6645         if (args->opentype == OPEN4_NOCREATE)
6646                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6647                     req, cs, args->share_access, cinfo);
6648         else {
6649                 /* inhibit delegation grants during exclusive create */
6650
6651                 if (args->mode == EXCLUSIVE4)
6652                         rfs4_disable_delegation();
6653
6654                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6655                     oo->ro_client->rc_clientid);
6656         }
6657
6658         if (resp->status == NFS4_OK) {
6659
6660                 /* cs->vp cs->fh now reference the desired file */
6661
6662                 rfs4_do_open(cs, req, oo,
6663                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6664                     args->share_access, args->share_deny, resp, 0);
6665
6666                 /*
6667                  * If rfs4_createfile set attrset, we must
6668                  * clear this attrset before the response is copied.
6669                  */
6670                 if (resp->status != NFS4_OK && resp->attrset) {
6671                         resp->attrset = 0;
6672                 }
6673         }
6674         else
6675                 *cs->statusp = resp->status;
6676
6677         if (args->mode == EXCLUSIVE4)
6678                 rfs4_enable_delegation();
6679 }
6680
6681 /*ARGSUSED*/
6682 static void
6683 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6684     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6685 {
6686         change_info4 *cinfo = &resp->cinfo;
6687         vattr_t va;
6688         vtype_t v_type = cs->vp->v_type;
6689         int error = 0;
6690
6691         /* Verify that we have a regular file */
6692         if (v_type != VREG) {
6693                 if (v_type == VDIR)
6694                         resp->status = NFS4ERR_ISDIR;
6695                 else if (v_type == VLNK)
6696                         resp->status = NFS4ERR_SYMLINK;
6697                 else
6698                         resp->status = NFS4ERR_INVAL;
6699                 return;
6700         }
6701
6702         va.va_mask = VATTR_MODE|VATTR_UID;
6703         error = fop_getattr(cs->vp, &va, 0, cs->cr, NULL);
6704         if (error) {
6705                 resp->status = puterrno4(error);
6706                 return;
6707         }
6708
6709         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6710
6711         /*
6712          * Check if we have access to the file, Note the the file
6713          * could have originally been open UNCHECKED or GUARDED
6714          * with mode bits that will now fail, but there is nothing
6715          * we can really do about that except in the case that the
6716          * owner of the file is the one requesting the open.
6717          */
6718         if (crgetuid(cs->cr) != va.va_uid) {
6719                 resp->status = check_open_access(args->share_access, cs, req);
6720                 if (resp->status != NFS4_OK) {
6721                         return;
6722                 }
6723         }
6724
6725         /*
6726          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6727          */
6728         cinfo->before = 0;
6729         cinfo->after = 0;
6730         cinfo->atomic = FALSE;
6731
6732         rfs4_do_open(cs, req, oo,
6733             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6734             args->share_access, args->share_deny, resp, 0);
6735 }
6736
6737 static void
6738 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6739     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6740 {
6741         int error;
6742         nfsstat4 status;
6743         stateid4 stateid =
6744             args->open_claim4_u.delegate_cur_info.delegate_stateid;
6745         rfs4_deleg_state_t *dsp;
6746
6747         /*
6748          * Find the state info from the stateid and confirm that the
6749          * file is delegated.  If the state openowner is the same as
6750          * the supplied openowner we're done. If not, get the file
6751          * info from the found state info. Use that file info to
6752          * create the state for this lock owner. Note solaris doen't
6753          * really need the pathname to find the file. We may want to
6754          * lookup the pathname and make sure that the vp exist and
6755          * matches the vp in the file structure. However it is
6756          * possible that the pathname nolonger exists (local process
6757          * unlinks the file), so this may not be that useful.
6758          */
6759
6760         status = rfs4_get_deleg_state(&stateid, &dsp);
6761         if (status != NFS4_OK) {
6762                 resp->status = status;
6763                 return;
6764         }
6765
6766         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
6767
6768         /*
6769          * New lock owner, create state. Since this was probably called
6770          * in response to a CB_RECALL we set deleg to DELEG_NONE
6771          */
6772
6773         ASSERT(cs->vp != NULL);
6774         VN_RELE(cs->vp);
6775         VN_HOLD(dsp->rds_finfo->rf_vp);
6776         cs->vp = dsp->rds_finfo->rf_vp;
6777
6778         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6779                 rfs4_deleg_state_rele(dsp);
6780                 *cs->statusp = resp->status = puterrno4(error);
6781                 return;
6782         }
6783
6784         /* Mark progress for delegation returns */
6785         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
6786         rfs4_deleg_state_rele(dsp);
6787         rfs4_do_open(cs, req, oo, DELEG_NONE,
6788             args->share_access, args->share_deny, resp, 1);
6789 }
6790
6791 /*ARGSUSED*/
6792 static void
6793 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6794     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6795 {
6796         /*
6797          * Lookup the pathname, it must already exist since this file
6798          * was delegated.
6799          *
6800          * Find the file and state info for this vp and open owner pair.
6801          *      check that they are in fact delegated.
6802          *      check that the state access and deny modes are the same.
6803          *
6804          * Return the delgation possibly seting the recall flag.
6805          */
6806         rfs4_file_t *fp;
6807         rfs4_state_t *sp;
6808         bool_t create = FALSE;
6809         bool_t dcreate = FALSE;
6810         rfs4_deleg_state_t *dsp;
6811         nfsace4 *ace;
6812
6813         /* Note we ignore oflags */
6814         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6815             req, cs, args->share_access, &resp->cinfo);
6816
6817         if (resp->status != NFS4_OK) {
6818                 return;
6819         }
6820
6821         /* get the file struct and hold a lock on it during initial open */
6822         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
6823         if (fp == NULL) {
6824                 resp->status = NFS4ERR_RESOURCE;
6825                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
6826                 return;
6827         }
6828
6829         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
6830         if (sp == NULL) {
6831                 resp->status = NFS4ERR_SERVERFAULT;
6832                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
6833                 rw_exit(&fp->rf_file_rwlock);
6834                 rfs4_file_rele(fp);
6835                 return;
6836         }
6837
6838         rfs4_dbe_lock(sp->rs_dbe);
6839         rfs4_dbe_lock(fp->rf_dbe);
6840         if (args->share_access != sp->rs_share_access ||
6841             args->share_deny != sp->rs_share_deny ||
6842             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
6843                 NFS4_DEBUG(rfs4_debug,
6844                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6845                 rfs4_dbe_unlock(fp->rf_dbe);
6846                 rfs4_dbe_unlock(sp->rs_dbe);
6847                 rfs4_file_rele(fp);
6848                 rfs4_state_rele(sp);
6849                 resp->status = NFS4ERR_SERVERFAULT;
6850                 return;
6851         }
6852         rfs4_dbe_unlock(fp->rf_dbe);
6853         rfs4_dbe_unlock(sp->rs_dbe);
6854
6855         dsp = rfs4_finddeleg(sp, &dcreate);
6856         if (dsp == NULL) {
6857                 rfs4_state_rele(sp);
6858                 rfs4_file_rele(fp);
6859                 resp->status = NFS4ERR_SERVERFAULT;
6860                 return;
6861         }
6862
6863         next_stateid(&sp->rs_stateid);
6864
6865         resp->stateid = sp->rs_stateid.stateid;
6866
6867         resp->delegation.delegation_type = dsp->rds_dtype;
6868
6869         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
6870                 open_read_delegation4 *rv =
6871                     &resp->delegation.open_delegation4_u.read;
6872
6873                 rv->stateid = dsp->rds_delegid.stateid;
6874                 rv->recall = FALSE; /* no policy in place to set to TRUE */
6875                 ace = &rv->permissions;
6876         } else {
6877                 open_write_delegation4 *rv =
6878                     &resp->delegation.open_delegation4_u.write;
6879
6880                 rv->stateid = dsp->rds_delegid.stateid;
6881                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
6882                 ace = &rv->permissions;
6883                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
6884                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6885         }
6886
6887         /* XXX For now */
6888         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6889         ace->flag = 0;
6890         ace->access_mask = 0;
6891         ace->who.utf8string_len = 0;
6892         ace->who.utf8string_val = 0;
6893
6894         rfs4_deleg_state_rele(dsp);
6895         rfs4_state_rele(sp);
6896         rfs4_file_rele(fp);
6897 }
6898
6899 typedef enum {
6900         NFS4_CHKSEQ_OKAY = 0,
6901         NFS4_CHKSEQ_REPLAY = 1,
6902         NFS4_CHKSEQ_BAD = 2
6903 } rfs4_chkseq_t;
6904
6905 /*
6906  * Generic function for sequence number checks.
6907  */
6908 static rfs4_chkseq_t
6909 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6910     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6911 {
6912         /* Same sequence ids and matching operations? */
6913         if (seqid == rqst_seq && resop->resop == lastop->resop) {
6914                 if (copyres == TRUE) {
6915                         rfs4_free_reply(resop);
6916                         rfs4_copy_reply(resop, lastop);
6917                 }
6918                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6919                     "Replayed SEQID %d\n", seqid));
6920                 return (NFS4_CHKSEQ_REPLAY);
6921         }
6922
6923         /* If the incoming sequence is not the next expected then it is bad */
6924         if (rqst_seq != seqid + 1) {
6925                 if (rqst_seq == seqid) {
6926                         NFS4_DEBUG(rfs4_debug,
6927                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
6928                             "but last op was %d current op is %d\n",
6929                             lastop->resop, resop->resop));
6930                         return (NFS4_CHKSEQ_BAD);
6931                 }
6932                 NFS4_DEBUG(rfs4_debug,
6933                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
6934                     rqst_seq, seqid));
6935                 return (NFS4_CHKSEQ_BAD);
6936         }
6937
6938         /* Everything okay -- next expected */
6939         return (NFS4_CHKSEQ_OKAY);
6940 }
6941
6942
6943 static rfs4_chkseq_t
6944 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6945 {
6946         rfs4_chkseq_t rc;
6947
6948         rfs4_dbe_lock(op->ro_dbe);
6949         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
6950             TRUE);
6951         rfs4_dbe_unlock(op->ro_dbe);
6952
6953         if (rc == NFS4_CHKSEQ_OKAY)
6954                 rfs4_update_lease(op->ro_client);
6955
6956         return (rc);
6957 }
6958
6959 static rfs4_chkseq_t
6960 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6961 {
6962         rfs4_chkseq_t rc;
6963
6964         rfs4_dbe_lock(op->ro_dbe);
6965         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
6966             olo_seqid, resop, FALSE);
6967         rfs4_dbe_unlock(op->ro_dbe);
6968
6969         return (rc);
6970 }
6971
6972 static rfs4_chkseq_t
6973 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
6974 {
6975         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
6976
6977         rfs4_dbe_lock(lsp->rls_dbe);
6978         if (!lsp->rls_skip_seqid_check)
6979                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
6980                     resop, TRUE);
6981         rfs4_dbe_unlock(lsp->rls_dbe);
6982
6983         return (rc);
6984 }
6985
6986 static void
6987 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
6988     struct svc_req *req, struct compound_state *cs)
6989 {
6990         OPEN4args *args = &argop->nfs_argop4_u.opopen;
6991         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
6992         open_owner4 *owner = &args->owner;
6993         open_claim_type4 claim = args->claim;
6994         rfs4_client_t *cp;
6995         rfs4_openowner_t *oo;
6996         bool_t create;
6997         bool_t replay = FALSE;
6998         int can_reclaim;
6999
7000         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7001             OPEN4args *, args);
7002
7003         if (cs->vp == NULL) {
7004                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7005                 goto end;
7006         }
7007
7008         /*
7009          * Need to check clientid and lease expiration first based on
7010          * error ordering and incrementing sequence id.
7011          */
7012         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7013         if (cp == NULL) {
7014                 *cs->statusp = resp->status =
7015                     rfs4_check_clientid(&owner->clientid, 0);
7016                 goto end;
7017         }
7018
7019         if (rfs4_lease_expired(cp)) {
7020                 rfs4_client_close(cp);
7021                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7022                 goto end;
7023         }
7024         can_reclaim = cp->rc_can_reclaim;
7025
7026         /*
7027          * Find the open_owner for use from this point forward.  Take
7028          * care in updating the sequence id based on the type of error
7029          * being returned.
7030          */
7031 retry:
7032         create = TRUE;
7033         oo = rfs4_findopenowner(owner, &create, args->seqid);
7034         if (oo == NULL) {
7035                 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7036                 rfs4_client_rele(cp);
7037                 goto end;
7038         }
7039
7040         /* Hold off access to the sequence space while the open is done */
7041         rfs4_sw_enter(&oo->ro_sw);
7042
7043         /*
7044          * If the open_owner existed before at the server, then check
7045          * the sequence id.
7046          */
7047         if (!create && !oo->ro_postpone_confirm) {
7048                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7049                 case NFS4_CHKSEQ_BAD:
7050                         if ((args->seqid > oo->ro_open_seqid) &&
7051                             oo->ro_need_confirm) {
7052                                 rfs4_free_opens(oo, TRUE, FALSE);
7053                                 rfs4_sw_exit(&oo->ro_sw);
7054                                 rfs4_openowner_rele(oo);
7055                                 goto retry;
7056                         }
7057                         resp->status = NFS4ERR_BAD_SEQID;
7058                         goto out;
7059                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7060                         replay = TRUE;
7061                         goto out;
7062                 default:
7063                         break;
7064                 }
7065
7066                 /*
7067                  * Sequence was ok and open owner exists
7068                  * check to see if we have yet to see an
7069                  * open_confirm.
7070                  */
7071                 if (oo->ro_need_confirm) {
7072                         rfs4_free_opens(oo, TRUE, FALSE);
7073                         rfs4_sw_exit(&oo->ro_sw);
7074                         rfs4_openowner_rele(oo);
7075                         goto retry;
7076                 }
7077         }
7078         /* Grace only applies to regular-type OPENs */
7079         if (rfs4_clnt_in_grace(cp) &&
7080             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7081                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7082                 goto out;
7083         }
7084
7085         /*
7086          * If previous state at the server existed then can_reclaim
7087          * will be set. If not reply NFS4ERR_NO_GRACE to the
7088          * client.
7089          */
7090         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7091                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7092                 goto out;
7093         }
7094
7095
7096         /*
7097          * Reject the open if the client has missed the grace period
7098          */
7099         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7100                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7101                 goto out;
7102         }
7103
7104         /* Couple of up-front bookkeeping items */
7105         if (oo->ro_need_confirm) {
7106                 /*
7107                  * If this is a reclaim OPEN then we should not ask
7108                  * for a confirmation of the open_owner per the
7109                  * protocol specification.
7110                  */
7111                 if (claim == CLAIM_PREVIOUS)
7112                         oo->ro_need_confirm = FALSE;
7113                 else
7114                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7115         }
7116         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7117
7118         /*
7119          * If there is an unshared filesystem mounted on this vnode,
7120          * do not allow to open/create in this directory.
7121          */
7122         if (vn_ismntpt(cs->vp)) {
7123                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7124                 goto out;
7125         }
7126
7127         /*
7128          * access must READ, WRITE, or BOTH.  No access is invalid.
7129          * deny can be READ, WRITE, BOTH, or NONE.
7130          * bits not defined for access/deny are invalid.
7131          */
7132         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7133             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7134             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7135                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7136                 goto out;
7137         }
7138
7139
7140         /*
7141          * make sure attrset is zero before response is built.
7142          */
7143         resp->attrset = 0;
7144
7145         switch (claim) {
7146         case CLAIM_NULL:
7147                 rfs4_do_opennull(cs, req, args, oo, resp);
7148                 break;
7149         case CLAIM_PREVIOUS:
7150                 rfs4_do_openprev(cs, req, args, oo, resp);
7151                 break;
7152         case CLAIM_DELEGATE_CUR:
7153                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7154                 break;
7155         case CLAIM_DELEGATE_PREV:
7156                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7157                 break;
7158         default:
7159                 resp->status = NFS4ERR_INVAL;
7160                 break;
7161         }
7162
7163 out:
7164         rfs4_client_rele(cp);
7165
7166         /* Catch sequence id handling here to make it a little easier */
7167         switch (resp->status) {
7168         case NFS4ERR_BADXDR:
7169         case NFS4ERR_BAD_SEQID:
7170         case NFS4ERR_BAD_STATEID:
7171         case NFS4ERR_NOFILEHANDLE:
7172         case NFS4ERR_RESOURCE:
7173         case NFS4ERR_STALE_CLIENTID:
7174         case NFS4ERR_STALE_STATEID:
7175                 /*
7176                  * The protocol states that if any of these errors are
7177                  * being returned, the sequence id should not be
7178                  * incremented.  Any other return requires an
7179                  * increment.
7180                  */
7181                 break;
7182         default:
7183                 /* Always update the lease in this case */
7184                 rfs4_update_lease(oo->ro_client);
7185
7186                 /* Regular response - copy the result */
7187                 if (!replay)
7188                         rfs4_update_open_resp(oo, resop, &cs->fh);
7189
7190                 /*
7191                  * REPLAY case: Only if the previous response was OK
7192                  * do we copy the filehandle.  If not OK, no
7193                  * filehandle to copy.
7194                  */
7195                 if (replay == TRUE &&
7196                     resp->status == NFS4_OK &&
7197                     oo->ro_reply_fh.nfs_fh4_val) {
7198                         /*
7199                          * If this is a replay, we must restore the
7200                          * current filehandle/vp to that of what was
7201                          * returned originally.  Try our best to do
7202                          * it.
7203                          */
7204                         nfs_fh4_fmt_t *fh_fmtp =
7205                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7206
7207                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7208                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7209
7210                         if (cs->exi == NULL) {
7211                                 resp->status = NFS4ERR_STALE;
7212                                 goto finish;
7213                         }
7214
7215                         VN_RELE(cs->vp);
7216
7217                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7218                             &resp->status);
7219
7220                         if (cs->vp == NULL)
7221                                 goto finish;
7222
7223                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7224                 }
7225
7226                 /*
7227                  * If this was a replay, no need to update the
7228                  * sequence id. If the open_owner was not created on
7229                  * this pass, then update.  The first use of an
7230                  * open_owner will not bump the sequence id.
7231                  */
7232                 if (replay == FALSE && !create)
7233                         rfs4_update_open_sequence(oo);
7234                 /*
7235                  * If the client is receiving an error and the
7236                  * open_owner needs to be confirmed, there is no way
7237                  * to notify the client of this fact ignoring the fact
7238                  * that the server has no method of returning a
7239                  * stateid to confirm.  Therefore, the server needs to
7240                  * mark this open_owner in a way as to avoid the
7241                  * sequence id checking the next time the client uses
7242                  * this open_owner.
7243                  */
7244                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7245                         oo->ro_postpone_confirm = TRUE;
7246                 /*
7247                  * If OK response then clear the postpone flag and
7248                  * reset the sequence id to keep in sync with the
7249                  * client.
7250                  */
7251                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7252                         oo->ro_postpone_confirm = FALSE;
7253                         oo->ro_open_seqid = args->seqid;
7254                 }
7255                 break;
7256         }
7257
7258 finish:
7259         *cs->statusp = resp->status;
7260
7261         rfs4_sw_exit(&oo->ro_sw);
7262         rfs4_openowner_rele(oo);
7263
7264 end:
7265         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7266             OPEN4res *, resp);
7267 }
7268
7269 /*ARGSUSED*/
7270 void
7271 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7272     struct svc_req *req, struct compound_state *cs)
7273 {
7274         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7275         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7276         rfs4_state_t *sp;
7277         nfsstat4 status;
7278
7279         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7280             OPEN_CONFIRM4args *, args);
7281
7282         if (cs->vp == NULL) {
7283                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7284                 goto out;
7285         }
7286
7287         if (cs->vp->v_type != VREG) {
7288                 *cs->statusp = resp->status =
7289                     cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7290                 return;
7291         }
7292
7293         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7294         if (status != NFS4_OK) {
7295                 *cs->statusp = resp->status = status;
7296                 goto out;
7297         }
7298
7299         /* Ensure specified filehandle matches */
7300         if (cs->vp != sp->rs_finfo->rf_vp) {
7301                 rfs4_state_rele(sp);
7302                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7303                 goto out;
7304         }
7305
7306         /* hold off other access to open_owner while we tinker */
7307         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7308
7309         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7310         case NFS4_CHECK_STATEID_OKAY:
7311                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7312                     resop) != 0) {
7313                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7314                         break;
7315                 }
7316                 /*
7317                  * If it is the appropriate stateid and determined to
7318                  * be "OKAY" then this means that the stateid does not
7319                  * need to be confirmed and the client is in error for
7320                  * sending an OPEN_CONFIRM.
7321                  */
7322                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7323                 break;
7324         case NFS4_CHECK_STATEID_OLD:
7325                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7326                 break;
7327         case NFS4_CHECK_STATEID_BAD:
7328                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7329                 break;
7330         case NFS4_CHECK_STATEID_EXPIRED:
7331                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7332                 break;
7333         case NFS4_CHECK_STATEID_CLOSED:
7334                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7335                 break;
7336         case NFS4_CHECK_STATEID_REPLAY:
7337                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7338                     resop)) {
7339                 case NFS4_CHKSEQ_OKAY:
7340                         /*
7341                          * This is replayed stateid; if seqid matches
7342                          * next expected, then client is using wrong seqid.
7343                          */
7344                         /* fall through */
7345                 case NFS4_CHKSEQ_BAD:
7346                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7347                         break;
7348                 case NFS4_CHKSEQ_REPLAY:
7349                         /*
7350                          * Note this case is the duplicate case so
7351                          * resp->status is already set.
7352                          */
7353                         *cs->statusp = resp->status;
7354                         rfs4_update_lease(sp->rs_owner->ro_client);
7355                         break;
7356                 }
7357                 break;
7358         case NFS4_CHECK_STATEID_UNCONFIRMED:
7359                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7360                     resop) != NFS4_CHKSEQ_OKAY) {
7361                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7362                         break;
7363                 }
7364                 *cs->statusp = resp->status = NFS4_OK;
7365
7366                 next_stateid(&sp->rs_stateid);
7367                 resp->open_stateid = sp->rs_stateid.stateid;
7368                 sp->rs_owner->ro_need_confirm = FALSE;
7369                 rfs4_update_lease(sp->rs_owner->ro_client);
7370                 rfs4_update_open_sequence(sp->rs_owner);
7371                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7372                 break;
7373         default:
7374                 ASSERT(FALSE);
7375                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7376                 break;
7377         }
7378         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7379         rfs4_state_rele(sp);
7380
7381 out:
7382         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7383             OPEN_CONFIRM4res *, resp);
7384 }
7385
7386 /*ARGSUSED*/
7387 void
7388 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7389     struct svc_req *req, struct compound_state *cs)
7390 {
7391         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7392         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7393         uint32_t access = args->share_access;
7394         uint32_t deny = args->share_deny;
7395         nfsstat4 status;
7396         rfs4_state_t *sp;
7397         rfs4_file_t *fp;
7398         int fflags = 0;
7399
7400         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7401             OPEN_DOWNGRADE4args *, args);
7402
7403         if (cs->vp == NULL) {
7404                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7405                 goto out;
7406         }
7407
7408         if (cs->vp->v_type != VREG) {
7409                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7410                 return;
7411         }
7412
7413         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7414         if (status != NFS4_OK) {
7415                 *cs->statusp = resp->status = status;
7416                 goto out;
7417         }
7418
7419         /* Ensure specified filehandle matches */
7420         if (cs->vp != sp->rs_finfo->rf_vp) {
7421                 rfs4_state_rele(sp);
7422                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7423                 goto out;
7424         }
7425
7426         /* hold off other access to open_owner while we tinker */
7427         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7428
7429         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7430         case NFS4_CHECK_STATEID_OKAY:
7431                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7432                     resop) != NFS4_CHKSEQ_OKAY) {
7433                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7434                         goto end;
7435                 }
7436                 break;
7437         case NFS4_CHECK_STATEID_OLD:
7438                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7439                 goto end;
7440         case NFS4_CHECK_STATEID_BAD:
7441                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7442                 goto end;
7443         case NFS4_CHECK_STATEID_EXPIRED:
7444                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7445                 goto end;
7446         case NFS4_CHECK_STATEID_CLOSED:
7447                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7448                 goto end;
7449         case NFS4_CHECK_STATEID_UNCONFIRMED:
7450                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7451                 goto end;
7452         case NFS4_CHECK_STATEID_REPLAY:
7453                 /* Check the sequence id for the open owner */
7454                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7455                     resop)) {
7456                 case NFS4_CHKSEQ_OKAY:
7457                         /*
7458                          * This is replayed stateid; if seqid matches
7459                          * next expected, then client is using wrong seqid.
7460                          */
7461                         /* fall through */
7462                 case NFS4_CHKSEQ_BAD:
7463                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7464                         goto end;
7465                 case NFS4_CHKSEQ_REPLAY:
7466                         /*
7467                          * Note this case is the duplicate case so
7468                          * resp->status is already set.
7469                          */
7470                         *cs->statusp = resp->status;
7471                         rfs4_update_lease(sp->rs_owner->ro_client);
7472                         goto end;
7473                 }
7474                 break;
7475         default:
7476                 ASSERT(FALSE);
7477                 break;
7478         }
7479
7480         rfs4_dbe_lock(sp->rs_dbe);
7481         /*
7482          * Check that the new access modes and deny modes are valid.
7483          * Check that no invalid bits are set.
7484          */
7485         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7486             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7487                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7488                 rfs4_update_open_sequence(sp->rs_owner);
7489                 rfs4_dbe_unlock(sp->rs_dbe);
7490                 goto end;
7491         }
7492
7493         /*
7494          * The new modes must be a subset of the current modes and
7495          * the access must specify at least one mode. To test that
7496          * the new mode is a subset of the current modes we bitwise
7497          * AND them together and check that the result equals the new
7498          * mode. For example:
7499          * New mode, access == R and current mode, sp->rs_open_access  == RW
7500          * access & sp->rs_open_access == R == access, so the new access mode
7501          * is valid. Consider access == RW, sp->rs_open_access = R
7502          * access & sp->rs_open_access == R != access, so the new access mode
7503          * is invalid.
7504          */
7505         if ((access & sp->rs_open_access) != access ||
7506             (deny & sp->rs_open_deny) != deny ||
7507             (access &
7508             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7509                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7510                 rfs4_update_open_sequence(sp->rs_owner);
7511                 rfs4_dbe_unlock(sp->rs_dbe);
7512                 goto end;
7513         }
7514
7515         /*
7516          * Release any share locks associated with this stateID.
7517          * Strictly speaking, this violates the spec because the
7518          * spec effectively requires that open downgrade be atomic.
7519          * At present, fs_shrlock does not have this capability.
7520          */
7521         (void) rfs4_unshare(sp);
7522
7523         status = rfs4_share(sp, access, deny);
7524         if (status != NFS4_OK) {
7525                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7526                 rfs4_update_open_sequence(sp->rs_owner);
7527                 rfs4_dbe_unlock(sp->rs_dbe);
7528                 goto end;
7529         }
7530
7531         fp = sp->rs_finfo;
7532         rfs4_dbe_lock(fp->rf_dbe);
7533
7534         /*
7535          * If the current mode has deny read and the new mode
7536          * does not, decrement the number of deny read mode bits
7537          * and if it goes to zero turn off the deny read bit
7538          * on the file.
7539          */
7540         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7541             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7542                 fp->rf_deny_read--;
7543                 if (fp->rf_deny_read == 0)
7544                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7545         }
7546
7547         /*
7548          * If the current mode has deny write and the new mode
7549          * does not, decrement the number of deny write mode bits
7550          * and if it goes to zero turn off the deny write bit
7551          * on the file.
7552          */
7553         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7554             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7555                 fp->rf_deny_write--;
7556                 if (fp->rf_deny_write == 0)
7557                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7558         }
7559
7560         /*
7561          * If the current mode has access read and the new mode
7562          * does not, decrement the number of access read mode bits
7563          * and if it goes to zero turn off the access read bit
7564          * on the file.  set fflags to FREAD for the call to
7565          * vn_open_downgrade().
7566          */
7567         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7568             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7569                 fp->rf_access_read--;
7570                 if (fp->rf_access_read == 0)
7571                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7572                 fflags |= FREAD;
7573         }
7574
7575         /*
7576          * If the current mode has access write and the new mode
7577          * does not, decrement the number of access write mode bits
7578          * and if it goes to zero turn off the access write bit
7579          * on the file.  set fflags to FWRITE for the call to
7580          * vn_open_downgrade().
7581          */
7582         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7583             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7584                 fp->rf_access_write--;
7585                 if (fp->rf_access_write == 0)
7586                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7587                 fflags |= FWRITE;
7588         }
7589
7590         /* Check that the file is still accessible */
7591         ASSERT(fp->rf_share_access);
7592
7593         rfs4_dbe_unlock(fp->rf_dbe);
7594
7595         /* now set the new open access and deny modes */
7596         sp->rs_open_access = access;
7597         sp->rs_open_deny = deny;
7598
7599         /*
7600          * we successfully downgraded the share lock, now we need to downgrade
7601          * the open. it is possible that the downgrade was only for a deny
7602          * mode and we have nothing else to do.
7603          */
7604         if ((fflags & (FREAD|FWRITE)) != 0)
7605                 vn_open_downgrade(cs->vp, fflags);
7606
7607         /* Update the stateid */
7608         next_stateid(&sp->rs_stateid);
7609         resp->open_stateid = sp->rs_stateid.stateid;
7610
7611         rfs4_dbe_unlock(sp->rs_dbe);
7612
7613         *cs->statusp = resp->status = NFS4_OK;
7614         /* Update the lease */
7615         rfs4_update_lease(sp->rs_owner->ro_client);
7616         /* And the sequence */
7617         rfs4_update_open_sequence(sp->rs_owner);
7618         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7619
7620 end:
7621         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7622         rfs4_state_rele(sp);
7623 out:
7624         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7625             OPEN_DOWNGRADE4res *, resp);
7626 }
7627
7628 static void *
7629 memstr(const void *s1, const char *s2, size_t n)
7630 {
7631         size_t l = strlen(s2);
7632         char *p = (char *)s1;
7633
7634         while (n >= l) {
7635                 if (bcmp(p, s2, l) == 0)
7636                         return (p);
7637                 p++;
7638                 n--;
7639         }
7640
7641         return (NULL);
7642 }
7643
7644 /*
7645  * The logic behind this function is detailed in the NFSv4 RFC in the
7646  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7647  * that section for explicit guidance to server behavior for
7648  * SETCLIENTID.
7649  */
7650 void
7651 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7652     struct svc_req *req, struct compound_state *cs)
7653 {
7654         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7655         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7656         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7657         rfs4_clntip_t *ci;
7658         bool_t create;
7659         char *addr, *netid;
7660         int len;
7661
7662         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7663             SETCLIENTID4args *, args);
7664 retry:
7665         newcp = cp_confirmed = cp_unconfirmed = NULL;
7666
7667         /*
7668          * Save the caller's IP address
7669          */
7670         args->client.cl_addr =
7671             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7672
7673         /*
7674          * Record if it is a Solaris client that cannot handle referrals.
7675          */
7676         if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7677             !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7678                 /* Add a "yes, it's downrev" record */
7679                 create = TRUE;
7680                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7681                 ASSERT(ci != NULL);
7682                 rfs4_dbe_rele(ci->ri_dbe);
7683         } else {
7684                 /* Remove any previous record */
7685                 rfs4_invalidate_clntip(args->client.cl_addr);
7686         }
7687
7688         /*
7689          * In search of an EXISTING client matching the incoming
7690          * request to establish a new client identifier at the server
7691          */
7692         create = TRUE;
7693         cp = rfs4_findclient(&args->client, &create, NULL);
7694
7695         /* Should never happen */
7696         ASSERT(cp != NULL);
7697
7698         if (cp == NULL) {
7699                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7700                 goto out;
7701         }
7702
7703         /*
7704          * Easiest case. Client identifier is newly created and is
7705          * unconfirmed.  Also note that for this case, no other
7706          * entries exist for the client identifier.  Nothing else to
7707          * check.  Just setup the response and respond.
7708          */
7709         if (create) {
7710                 *cs->statusp = res->status = NFS4_OK;
7711                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7712                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7713                     cp->rc_confirm_verf;
7714                 /* Setup callback information; CB_NULL confirmation later */
7715                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7716
7717                 rfs4_client_rele(cp);
7718                 goto out;
7719         }
7720
7721         /*
7722          * An existing, confirmed client may exist but it may not have
7723          * been active for at least one lease period.  If so, then
7724          * "close" the client and create a new client identifier
7725          */
7726         if (rfs4_lease_expired(cp)) {
7727                 rfs4_client_close(cp);
7728                 goto retry;
7729         }
7730
7731         if (cp->rc_need_confirm == TRUE)
7732                 cp_unconfirmed = cp;
7733         else
7734                 cp_confirmed = cp;
7735
7736         cp = NULL;
7737
7738         /*
7739          * We have a confirmed client, now check for an
7740          * unconfimred entry
7741          */
7742         if (cp_confirmed) {
7743                 /* If creds don't match then client identifier is inuse */
7744                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
7745                         rfs4_cbinfo_t *cbp;
7746                         /*
7747                          * Some one else has established this client
7748                          * id. Try and say * who they are. We will use
7749                          * the call back address supplied by * the
7750                          * first client.
7751                          */
7752                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7753
7754                         addr = netid = NULL;
7755
7756                         cbp = &cp_confirmed->rc_cbinfo;
7757                         if (cbp->cb_callback.cb_location.r_addr &&
7758                             cbp->cb_callback.cb_location.r_netid) {
7759                                 cb_client4 *cbcp = &cbp->cb_callback;
7760
7761                                 len = strlen(cbcp->cb_location.r_addr)+1;
7762                                 addr = kmem_alloc(len, KM_SLEEP);
7763                                 bcopy(cbcp->cb_location.r_addr, addr, len);
7764                                 len = strlen(cbcp->cb_location.r_netid)+1;
7765                                 netid = kmem_alloc(len, KM_SLEEP);
7766                                 bcopy(cbcp->cb_location.r_netid, netid, len);
7767                         }
7768
7769                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
7770                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
7771
7772                         rfs4_client_rele(cp_confirmed);
7773                 }
7774
7775                 /*
7776                  * Confirmed, creds match, and verifier matches; must
7777                  * be an update of the callback info
7778                  */
7779                 if (cp_confirmed->rc_nfs_client.verifier ==
7780                     args->client.verifier) {
7781                         /* Setup callback information */
7782                         rfs4_client_setcb(cp_confirmed, &args->callback,
7783                             args->callback_ident);
7784
7785                         /* everything okay -- move ahead */
7786                         *cs->statusp = res->status = NFS4_OK;
7787                         res->SETCLIENTID4res_u.resok4.clientid =
7788                             cp_confirmed->rc_clientid;
7789
7790                         /* update the confirm_verifier and return it */
7791                         rfs4_client_scv_next(cp_confirmed);
7792                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7793                             cp_confirmed->rc_confirm_verf;
7794
7795                         rfs4_client_rele(cp_confirmed);
7796                         goto out;
7797                 }
7798
7799                 /*
7800                  * Creds match but the verifier doesn't.  Must search
7801                  * for an unconfirmed client that would be replaced by
7802                  * this request.
7803                  */
7804                 create = FALSE;
7805                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
7806                     cp_confirmed);
7807         }
7808
7809         /*
7810          * At this point, we have taken care of the brand new client
7811          * struct, INUSE case, update of an existing, and confirmed
7812          * client struct.
7813          */
7814
7815         /*
7816          * check to see if things have changed while we originally
7817          * picked up the client struct.  If they have, then return and
7818          * retry the processing of this SETCLIENTID request.
7819          */
7820         if (cp_unconfirmed) {
7821                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
7822                 if (!cp_unconfirmed->rc_need_confirm) {
7823                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
7824                         rfs4_client_rele(cp_unconfirmed);
7825                         if (cp_confirmed)
7826                                 rfs4_client_rele(cp_confirmed);
7827                         goto retry;
7828                 }
7829                 /* do away with the old unconfirmed one */
7830                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
7831                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
7832                 rfs4_client_rele(cp_unconfirmed);
7833                 cp_unconfirmed = NULL;
7834         }
7835
7836         /*
7837          * This search will temporarily hide the confirmed client
7838          * struct while a new client struct is created as the
7839          * unconfirmed one.
7840          */
7841         create = TRUE;
7842         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7843
7844         ASSERT(newcp != NULL);
7845
7846         if (newcp == NULL) {
7847                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7848                 rfs4_client_rele(cp_confirmed);
7849                 goto out;
7850         }
7851
7852         /*
7853          * If one was not created, then a similar request must be in
7854          * process so release and start over with this one
7855          */
7856         if (create != TRUE) {
7857                 rfs4_client_rele(newcp);
7858                 if (cp_confirmed)
7859                         rfs4_client_rele(cp_confirmed);
7860                 goto retry;
7861         }
7862
7863         *cs->statusp = res->status = NFS4_OK;
7864         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
7865         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7866             newcp->rc_confirm_verf;
7867         /* Setup callback information; CB_NULL confirmation later */
7868         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
7869
7870         newcp->rc_cp_confirmed = cp_confirmed;
7871
7872         rfs4_client_rele(newcp);
7873
7874 out:
7875         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
7876             SETCLIENTID4res *, res);
7877 }
7878
7879 /*ARGSUSED*/
7880 void
7881 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7882     struct svc_req *req, struct compound_state *cs)
7883 {
7884         SETCLIENTID_CONFIRM4args *args =
7885             &argop->nfs_argop4_u.opsetclientid_confirm;
7886         SETCLIENTID_CONFIRM4res *res =
7887             &resop->nfs_resop4_u.opsetclientid_confirm;
7888         rfs4_client_t *cp, *cptoclose = NULL;
7889
7890         DTRACE_NFSV4_2(op__setclientid__confirm__start,
7891             struct compound_state *, cs,
7892             SETCLIENTID_CONFIRM4args *, args);
7893
7894         *cs->statusp = res->status = NFS4_OK;
7895
7896         cp = rfs4_findclient_by_id(args->clientid, TRUE);
7897
7898         if (cp == NULL) {
7899                 *cs->statusp = res->status =
7900                     rfs4_check_clientid(&args->clientid, 1);
7901                 goto out;
7902         }
7903
7904         if (!creds_ok(cp, req, cs)) {
7905                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7906                 rfs4_client_rele(cp);
7907                 goto out;
7908         }
7909
7910         /* If the verifier doesn't match, the record doesn't match */
7911         if (cp->rc_confirm_verf != args->setclientid_confirm) {
7912                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7913                 rfs4_client_rele(cp);
7914                 goto out;
7915         }
7916
7917         rfs4_dbe_lock(cp->rc_dbe);
7918         cp->rc_need_confirm = FALSE;
7919         if (cp->rc_cp_confirmed) {
7920                 cptoclose = cp->rc_cp_confirmed;
7921                 cptoclose->rc_ss_remove = 1;
7922                 cp->rc_cp_confirmed = NULL;
7923         }
7924
7925         /*
7926          * Update the client's associated server instance, if it's changed
7927          * since the client was created.
7928          */
7929         if (rfs4_servinst(cp) != rfs4_cur_servinst)
7930                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
7931
7932         /*
7933          * Record clientid in stable storage.
7934          * Must be done after server instance has been assigned.
7935          */
7936         rfs4_ss_clid(cp);
7937
7938         rfs4_dbe_unlock(cp->rc_dbe);
7939
7940         if (cptoclose)
7941                 /* don't need to rele, client_close does it */
7942                 rfs4_client_close(cptoclose);
7943
7944         /* If needed, initiate CB_NULL call for callback path */
7945         rfs4_deleg_cb_check(cp);
7946         rfs4_update_lease(cp);
7947
7948         /*
7949          * Check to see if client can perform reclaims
7950          */
7951         rfs4_ss_chkclid(cp);
7952
7953         rfs4_client_rele(cp);
7954
7955 out:
7956         DTRACE_NFSV4_2(op__setclientid__confirm__done,
7957             struct compound_state *, cs,
7958             SETCLIENTID_CONFIRM4 *, res);
7959 }
7960
7961
7962 /*ARGSUSED*/
7963 void
7964 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
7965     struct svc_req *req, struct compound_state *cs)
7966 {
7967         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
7968         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
7969         rfs4_state_t *sp;
7970         nfsstat4 status;
7971
7972         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
7973             CLOSE4args *, args);
7974
7975         if (cs->vp == NULL) {
7976                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7977                 goto out;
7978         }
7979
7980         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
7981         if (status != NFS4_OK) {
7982                 *cs->statusp = resp->status = status;
7983                 goto out;
7984         }
7985
7986         /* Ensure specified filehandle matches */
7987         if (cs->vp != sp->rs_finfo->rf_vp) {
7988                 rfs4_state_rele(sp);
7989                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7990                 goto out;
7991         }
7992
7993         /* hold off other access to open_owner while we tinker */
7994         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7995
7996         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7997         case NFS4_CHECK_STATEID_OKAY:
7998                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7999                     resop) != NFS4_CHKSEQ_OKAY) {
8000                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8001                         goto end;
8002                 }
8003                 break;
8004         case NFS4_CHECK_STATEID_OLD:
8005                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8006                 goto end;
8007         case NFS4_CHECK_STATEID_BAD:
8008                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8009                 goto end;
8010         case NFS4_CHECK_STATEID_EXPIRED:
8011                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8012                 goto end;
8013         case NFS4_CHECK_STATEID_CLOSED:
8014                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8015                 goto end;
8016         case NFS4_CHECK_STATEID_UNCONFIRMED:
8017                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8018                 goto end;
8019         case NFS4_CHECK_STATEID_REPLAY:
8020                 /* Check the sequence id for the open owner */
8021                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8022                     resop)) {
8023                 case NFS4_CHKSEQ_OKAY:
8024                         /*
8025                          * This is replayed stateid; if seqid matches
8026                          * next expected, then client is using wrong seqid.
8027                          */
8028                         /* FALL THROUGH */
8029                 case NFS4_CHKSEQ_BAD:
8030                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8031                         goto end;
8032                 case NFS4_CHKSEQ_REPLAY:
8033                         /*
8034                          * Note this case is the duplicate case so
8035                          * resp->status is already set.
8036                          */
8037                         *cs->statusp = resp->status;
8038                         rfs4_update_lease(sp->rs_owner->ro_client);
8039                         goto end;
8040                 }
8041                 break;
8042         default:
8043                 ASSERT(FALSE);
8044                 break;
8045         }
8046
8047         rfs4_dbe_lock(sp->rs_dbe);
8048
8049         /* Update the stateid. */
8050         next_stateid(&sp->rs_stateid);
8051         resp->open_stateid = sp->rs_stateid.stateid;
8052
8053         rfs4_dbe_unlock(sp->rs_dbe);
8054
8055         rfs4_update_lease(sp->rs_owner->ro_client);
8056         rfs4_update_open_sequence(sp->rs_owner);
8057         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8058
8059         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8060
8061         *cs->statusp = resp->status = status;
8062
8063 end:
8064         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8065         rfs4_state_rele(sp);
8066 out:
8067         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8068             CLOSE4res *, resp);
8069 }
8070
8071 /*
8072  * Manage the counts on the file struct and close all file locks
8073  */
8074 /*ARGSUSED*/
8075 void
8076 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8077     bool_t close_of_client)
8078 {
8079         rfs4_file_t *fp = sp->rs_finfo;
8080         rfs4_lo_state_t *lsp;
8081         int fflags = 0;
8082
8083         /*
8084          * If this call is part of the larger closing down of client
8085          * state then it is just easier to release all locks
8086          * associated with this client instead of going through each
8087          * individual file and cleaning locks there.
8088          */
8089         if (close_of_client) {
8090                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8091                     !list_is_empty(&sp->rs_lostatelist) &&
8092                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8093                         /* Is the PxFS kernel module loaded? */
8094                         if (lm_remove_file_locks != NULL) {
8095                                 int new_sysid;
8096
8097                                 /* Encode the cluster nodeid in new sysid */
8098                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8099                                 lm_set_nlmid_flk(&new_sysid);
8100
8101                                 /*
8102                                  * This PxFS routine removes file locks for a
8103                                  * client over all nodes of a cluster.
8104                                  */
8105                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8106                                     "lm_remove_file_locks(sysid=0x%x)\n",
8107                                     new_sysid));
8108                                 (*lm_remove_file_locks)(new_sysid);
8109                         } else {
8110                                 struct flock64 flk;
8111
8112                                 /* Release all locks for this client */
8113                                 flk.l_type = F_UNLKSYS;
8114                                 flk.l_whence = 0;
8115                                 flk.l_start = 0;
8116                                 flk.l_len = 0;
8117                                 flk.l_sysid =
8118                                     sp->rs_owner->ro_client->rc_sysidt;
8119                                 flk.l_pid = 0;
8120                                 (void) fop_frlock(sp->rs_finfo->rf_vp, F_SETLK,
8121                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8122                                     0, NULL, CRED(), NULL);
8123                         }
8124
8125                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8126                 }
8127         }
8128
8129         /*
8130          * Release all locks on this file by this lock owner or at
8131          * least mark the locks as having been released
8132          */
8133         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8134             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8135                 lsp->rls_locks_cleaned = TRUE;
8136
8137                 /* Was this already taken care of above? */
8138                 if (!close_of_client &&
8139                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8140                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8141                             lsp->rls_locker->rl_pid,
8142                             lsp->rls_locker->rl_client->rc_sysidt);
8143         }
8144
8145         /*
8146          * Release any shrlocks associated with this open state ID.
8147          * This must be done before the rfs4_state gets marked closed.
8148          */
8149         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8150                 (void) rfs4_unshare(sp);
8151
8152         if (sp->rs_open_access) {
8153                 rfs4_dbe_lock(fp->rf_dbe);
8154
8155                 /*
8156                  * Decrement the count for each access and deny bit that this
8157                  * state has contributed to the file.
8158                  * If the file counts go to zero
8159                  * clear the appropriate bit in the appropriate mask.
8160                  */
8161                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8162                         fp->rf_access_read--;
8163                         fflags |= FREAD;
8164                         if (fp->rf_access_read == 0)
8165                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8166                 }
8167                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8168                         fp->rf_access_write--;
8169                         fflags |= FWRITE;
8170                         if (fp->rf_access_write == 0)
8171                                 fp->rf_share_access &=
8172                                     ~OPEN4_SHARE_ACCESS_WRITE;
8173                 }
8174                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8175                         fp->rf_deny_read--;
8176                         if (fp->rf_deny_read == 0)
8177                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8178                 }
8179                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8180                         fp->rf_deny_write--;
8181                         if (fp->rf_deny_write == 0)
8182                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8183                 }
8184
8185                 (void) fop_close(fp->rf_vp, fflags, 1, 0, cr, NULL);
8186
8187                 rfs4_dbe_unlock(fp->rf_dbe);
8188
8189                 sp->rs_open_access = 0;
8190                 sp->rs_open_deny = 0;
8191         }
8192 }
8193
8194 /*
8195  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8196  */
8197 static nfsstat4
8198 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8199 {
8200         rfs4_lockowner_t *lo;
8201         rfs4_client_t *cp;
8202         uint32_t len;
8203
8204         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8205         if (lo != NULL) {
8206                 cp = lo->rl_client;
8207                 if (rfs4_lease_expired(cp)) {
8208                         rfs4_lockowner_rele(lo);
8209                         rfs4_dbe_hold(cp->rc_dbe);
8210                         rfs4_client_close(cp);
8211                         return (NFS4ERR_EXPIRED);
8212                 }
8213                 dp->owner.clientid = lo->rl_owner.clientid;
8214                 len = lo->rl_owner.owner_len;
8215                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8216                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8217                 dp->owner.owner_len = len;
8218                 rfs4_lockowner_rele(lo);
8219                 goto finish;
8220         }
8221
8222         /*
8223          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8224          * of the client id contain the boot time for a NFS4 lock. So we
8225          * fabricate and identity by setting clientid to the sysid, and
8226          * the lock owner to the pid.
8227          */
8228         dp->owner.clientid = flk->l_sysid;
8229         len = sizeof (pid_t);
8230         dp->owner.owner_len = len;
8231         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8232         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8233 finish:
8234         dp->offset = flk->l_start;
8235         dp->length = flk->l_len;
8236
8237         if (flk->l_type == F_RDLCK)
8238                 dp->locktype = READ_LT;
8239         else if (flk->l_type == F_WRLCK)
8240                 dp->locktype = WRITE_LT;
8241         else
8242                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8243
8244         return (NFS4_OK);
8245 }
8246
8247 /*
8248  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8249  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8250  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8251  * for that (obviously); they are sending the LOCK requests with some delays
8252  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8253  * locking and delay implementation at the client side.
8254  *
8255  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8256  * fast retries on its own (the for loop below) in a hope the lock will be
8257  * available soon.  And if not, the client won't need to resend the LOCK
8258  * requests so fast to check the lock availability.  This basically saves some
8259  * network traffic and tries to make sure the client gets the lock ASAP.
8260  */
8261 static int
8262 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8263 {
8264         int error;
8265         struct flock64 flk;
8266         int i;
8267         clock_t delaytime;
8268         int cmd;
8269         int spin_cnt = 0;
8270
8271         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8272 retry:
8273         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8274
8275         for (i = 0; i < rfs4_maxlock_tries; i++) {
8276                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8277                 error = fop_frlock(vp, cmd,
8278                     flock, flag, 0, NULL, cred, NULL);
8279
8280                 if (error != EAGAIN && error != EACCES)
8281                         break;
8282
8283                 if (i < rfs4_maxlock_tries - 1) {
8284                         delay(delaytime);
8285                         delaytime *= 2;
8286                 }
8287         }
8288
8289         if (error == EAGAIN || error == EACCES) {
8290                 /* Get the owner of the lock */
8291                 flk = *flock;
8292                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8293                 if (fop_frlock(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8294                     NULL) == 0) {
8295                         /*
8296                          * There's a race inherent in the current fop_frlock
8297                          * design where:
8298                          * a: "other guy" takes a lock that conflicts with a
8299                          * lock we want
8300                          * b: we attempt to take our lock (non-blocking) and
8301                          * the attempt fails.
8302                          * c: "other guy" releases the conflicting lock
8303                          * d: we ask what lock conflicts with the lock we want,
8304                          * getting F_UNLCK (no lock blocks us)
8305                          *
8306                          * If we retry the non-blocking lock attempt in this
8307                          * case (restart at step 'b') there's some possibility
8308                          * that many such attempts might fail.  However a test
8309                          * designed to actually provoke this race shows that
8310                          * the vast majority of cases require no retry, and
8311                          * only a few took as many as three retries.  Here's
8312                          * the test outcome:
8313                          *
8314                          *         number of retries    how many times we needed
8315                          *                              that many retries
8316                          *         0                    79461
8317                          *         1                      862
8318                          *         2                       49
8319                          *         3                        5
8320                          *
8321                          * Given those empirical results, we arbitrarily limit
8322                          * the retry count to ten.
8323                          *
8324                          * If we actually make to ten retries and give up,
8325                          * nothing catastrophic happens, but we're unable to
8326                          * return the information about the conflicting lock to
8327                          * the NFS client.  That's an acceptable trade off vs.
8328                          * letting this retry loop run forever.
8329                          */
8330                         if (flk.l_type == F_UNLCK) {
8331                                 if (spin_cnt++ < 10) {
8332                                         /* No longer locked, retry */
8333                                         goto retry;
8334                                 }
8335                         } else {
8336                                 *flock = flk;
8337                                 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8338                                     F_GETLK, &flk);
8339                         }
8340                 }
8341         }
8342
8343         return (error);
8344 }
8345
8346 /*ARGSUSED*/
8347 static nfsstat4
8348 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8349     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8350 {
8351         nfsstat4 status;
8352         rfs4_lockowner_t *lo = lsp->rls_locker;
8353         rfs4_state_t *sp = lsp->rls_state;
8354         struct flock64 flock;
8355         int16_t ltype;
8356         int flag;
8357         int error;
8358         sysid_t sysid;
8359         LOCK4res *lres;
8360         vnode_t *vp;
8361
8362         if (rfs4_lease_expired(lo->rl_client)) {
8363                 return (NFS4ERR_EXPIRED);
8364         }
8365
8366         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8367                 return (status);
8368
8369         /* Check for zero length. To lock to end of file use all ones for V4 */
8370         if (length == 0)
8371                 return (NFS4ERR_INVAL);
8372         else if (length == (length4)(~0))
8373                 length = 0;             /* Posix to end of file  */
8374
8375 retry:
8376         rfs4_dbe_lock(sp->rs_dbe);
8377         if (sp->rs_closed == TRUE) {
8378                 rfs4_dbe_unlock(sp->rs_dbe);
8379                 return (NFS4ERR_OLD_STATEID);
8380         }
8381
8382         if (resop->resop != OP_LOCKU) {
8383                 switch (locktype) {
8384                 case READ_LT:
8385                 case READW_LT:
8386                         if ((sp->rs_share_access
8387                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8388                                 rfs4_dbe_unlock(sp->rs_dbe);
8389
8390                                 return (NFS4ERR_OPENMODE);
8391                         }
8392                         ltype = F_RDLCK;
8393                         break;
8394                 case WRITE_LT:
8395                 case WRITEW_LT:
8396                         if ((sp->rs_share_access
8397                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8398                                 rfs4_dbe_unlock(sp->rs_dbe);
8399
8400                                 return (NFS4ERR_OPENMODE);
8401                         }
8402                         ltype = F_WRLCK;
8403                         break;
8404                 }
8405         } else
8406                 ltype = F_UNLCK;
8407
8408         flock.l_type = ltype;
8409         flock.l_whence = 0;             /* SEEK_SET */
8410         flock.l_start = offset;
8411         flock.l_len = length;
8412         flock.l_sysid = sysid;
8413         flock.l_pid = lsp->rls_locker->rl_pid;
8414
8415         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8416         if (flock.l_len < 0 || flock.l_start < 0) {
8417                 rfs4_dbe_unlock(sp->rs_dbe);
8418                 return (NFS4ERR_INVAL);
8419         }
8420
8421         /*
8422          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8423          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8424          */
8425         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8426
8427         vp = sp->rs_finfo->rf_vp;
8428         VN_HOLD(vp);
8429
8430         /*
8431          * We need to unlock sp before we call the underlying filesystem to
8432          * acquire the file lock.
8433          */
8434         rfs4_dbe_unlock(sp->rs_dbe);
8435
8436         error = setlock(vp, &flock, flag, cred);
8437
8438         /*
8439          * Make sure the file is still open.  In a case the file was closed in
8440          * the meantime, clean the lock we acquired using the setlock() call
8441          * above, and return the appropriate error.
8442          */
8443         rfs4_dbe_lock(sp->rs_dbe);
8444         if (sp->rs_closed == TRUE) {
8445                 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8446                 rfs4_dbe_unlock(sp->rs_dbe);
8447
8448                 VN_RELE(vp);
8449
8450                 return (NFS4ERR_OLD_STATEID);
8451         }
8452         rfs4_dbe_unlock(sp->rs_dbe);
8453
8454         VN_RELE(vp);
8455
8456         if (error == 0) {
8457                 rfs4_dbe_lock(lsp->rls_dbe);
8458                 next_stateid(&lsp->rls_lockid);
8459                 rfs4_dbe_unlock(lsp->rls_dbe);
8460         }
8461
8462         /*
8463          * N.B. We map error values to nfsv4 errors. This is differrent
8464          * than puterrno4 routine.
8465          */
8466         switch (error) {
8467         case 0:
8468                 status = NFS4_OK;
8469                 break;
8470         case EAGAIN:
8471         case EACCES:            /* Old value */
8472                 /* Can only get here if op is OP_LOCK */
8473                 ASSERT(resop->resop == OP_LOCK);
8474                 lres = &resop->nfs_resop4_u.oplock;
8475                 status = NFS4ERR_DENIED;
8476                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8477                     == NFS4ERR_EXPIRED)
8478                         goto retry;
8479                 break;
8480         case ENOLCK:
8481                 status = NFS4ERR_DELAY;
8482                 break;
8483         case EOVERFLOW:
8484                 status = NFS4ERR_INVAL;
8485                 break;
8486         case EINVAL:
8487                 status = NFS4ERR_NOTSUPP;
8488                 break;
8489         default:
8490                 status = NFS4ERR_SERVERFAULT;
8491                 break;
8492         }
8493
8494         return (status);
8495 }
8496
8497 /*ARGSUSED*/
8498 void
8499 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8500     struct svc_req *req, struct compound_state *cs)
8501 {
8502         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8503         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8504         nfsstat4 status;
8505         stateid4 *stateid;
8506         rfs4_lockowner_t *lo;
8507         rfs4_client_t *cp;
8508         rfs4_state_t *sp = NULL;
8509         rfs4_lo_state_t *lsp = NULL;
8510         bool_t ls_sw_held = FALSE;
8511         bool_t create = TRUE;
8512         bool_t lcreate = TRUE;
8513         bool_t dup_lock = FALSE;
8514         int rc;
8515
8516         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8517             LOCK4args *, args);
8518
8519         if (cs->vp == NULL) {
8520                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8521                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8522                     cs, LOCK4res *, resp);
8523                 return;
8524         }
8525
8526         if (args->locker.new_lock_owner) {
8527                 /* Create a new lockowner for this instance */
8528                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8529
8530                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8531
8532                 stateid = &olo->open_stateid;
8533                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8534                 if (status != NFS4_OK) {
8535                         NFS4_DEBUG(rfs4_debug,
8536                             (CE_NOTE, "Get state failed in lock %d", status));
8537                         *cs->statusp = resp->status = status;
8538                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8539                             cs, LOCK4res *, resp);
8540                         return;
8541                 }
8542
8543                 /* Ensure specified filehandle matches */
8544                 if (cs->vp != sp->rs_finfo->rf_vp) {
8545                         rfs4_state_rele(sp);
8546                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8547                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8548                             cs, LOCK4res *, resp);
8549                         return;
8550                 }
8551
8552                 /* hold off other access to open_owner while we tinker */
8553                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8554
8555                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8556                 case NFS4_CHECK_STATEID_OLD:
8557                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8558                         goto end;
8559                 case NFS4_CHECK_STATEID_BAD:
8560                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8561                         goto end;
8562                 case NFS4_CHECK_STATEID_EXPIRED:
8563                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8564                         goto end;
8565                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8566                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8567                         goto end;
8568                 case NFS4_CHECK_STATEID_CLOSED:
8569                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8570                         goto end;
8571                 case NFS4_CHECK_STATEID_OKAY:
8572                 case NFS4_CHECK_STATEID_REPLAY:
8573                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8574                             sp->rs_owner, resop)) {
8575                         case NFS4_CHKSEQ_OKAY:
8576                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8577                                         break;
8578                                 /*
8579                                  * This is replayed stateid; if seqid
8580                                  * matches next expected, then client
8581                                  * is using wrong seqid.
8582                                  */
8583                                 /* FALLTHROUGH */
8584                         case NFS4_CHKSEQ_BAD:
8585                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8586                                 goto end;
8587                         case NFS4_CHKSEQ_REPLAY:
8588                                 /* This is a duplicate LOCK request */
8589                                 dup_lock = TRUE;
8590
8591                                 /*
8592                                  * For a duplicate we do not want to
8593                                  * create a new lockowner as it should
8594                                  * already exist.
8595                                  * Turn off the lockowner create flag.
8596                                  */
8597                                 lcreate = FALSE;
8598                         }
8599                         break;
8600                 }
8601
8602                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8603                 if (lo == NULL) {
8604                         NFS4_DEBUG(rfs4_debug,
8605                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8606                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8607                         goto end;
8608                 }
8609
8610                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8611                 if (lsp == NULL) {
8612                         rfs4_update_lease(sp->rs_owner->ro_client);
8613                         /*
8614                          * Only update theh open_seqid if this is not
8615                          * a duplicate request
8616                          */
8617                         if (dup_lock == FALSE) {
8618                                 rfs4_update_open_sequence(sp->rs_owner);
8619                         }
8620
8621                         NFS4_DEBUG(rfs4_debug,
8622                             (CE_NOTE, "rfs4_op_lock: no state"));
8623                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8624                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8625                         rfs4_lockowner_rele(lo);
8626                         goto end;
8627                 }
8628
8629                 /*
8630                  * This is the new_lock_owner branch and the client is
8631                  * supposed to be associating a new lock_owner with
8632                  * the open file at this point.  If we find that a
8633                  * lock_owner/state association already exists and a
8634                  * successful LOCK request was returned to the client,
8635                  * an error is returned to the client since this is
8636                  * not appropriate.  The client should be using the
8637                  * existing lock_owner branch.
8638                  */
8639                 if (dup_lock == FALSE && create == FALSE) {
8640                         if (lsp->rls_lock_completed == TRUE) {
8641                                 *cs->statusp =
8642                                     resp->status = NFS4ERR_BAD_SEQID;
8643                                 rfs4_lockowner_rele(lo);
8644                                 goto end;
8645                         }
8646                 }
8647
8648                 rfs4_update_lease(sp->rs_owner->ro_client);
8649
8650                 /*
8651                  * Only update theh open_seqid if this is not
8652                  * a duplicate request
8653                  */
8654                 if (dup_lock == FALSE) {
8655                         rfs4_update_open_sequence(sp->rs_owner);
8656                 }
8657
8658                 /*
8659                  * If this is a duplicate lock request, just copy the
8660                  * previously saved reply and return.
8661                  */
8662                 if (dup_lock == TRUE) {
8663                         /* verify that lock_seqid's match */
8664                         if (lsp->rls_seqid != olo->lock_seqid) {
8665                                 NFS4_DEBUG(rfs4_debug,
8666                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8667                                     "lsp->seqid=%d old->seqid=%d",
8668                                     lsp->rls_seqid, olo->lock_seqid));
8669                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8670                         } else {
8671                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8672                                 /*
8673                                  * Make sure to copy the just
8674                                  * retrieved reply status into the
8675                                  * overall compound status
8676                                  */
8677                                 *cs->statusp = resp->status;
8678                         }
8679                         rfs4_lockowner_rele(lo);
8680                         goto end;
8681                 }
8682
8683                 rfs4_dbe_lock(lsp->rls_dbe);
8684
8685                 /* Make sure to update the lock sequence id */
8686                 lsp->rls_seqid = olo->lock_seqid;
8687
8688                 NFS4_DEBUG(rfs4_debug,
8689                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8690
8691                 /*
8692                  * This is used to signify the newly created lockowner
8693                  * stateid and its sequence number.  The checks for
8694                  * sequence number and increment don't occur on the
8695                  * very first lock request for a lockowner.
8696                  */
8697                 lsp->rls_skip_seqid_check = TRUE;
8698
8699                 /* hold off other access to lsp while we tinker */
8700                 rfs4_sw_enter(&lsp->rls_sw);
8701                 ls_sw_held = TRUE;
8702
8703                 rfs4_dbe_unlock(lsp->rls_dbe);
8704
8705                 rfs4_lockowner_rele(lo);
8706         } else {
8707                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8708                 /* get lsp and hold the lock on the underlying file struct */
8709                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8710                     != NFS4_OK) {
8711                         *cs->statusp = resp->status = status;
8712                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8713                             cs, LOCK4res *, resp);
8714                         return;
8715                 }
8716                 create = FALSE; /* We didn't create lsp */
8717
8718                 /* Ensure specified filehandle matches */
8719                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8720                         rfs4_lo_state_rele(lsp, TRUE);
8721                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8722                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8723                             cs, LOCK4res *, resp);
8724                         return;
8725                 }
8726
8727                 /* hold off other access to lsp while we tinker */
8728                 rfs4_sw_enter(&lsp->rls_sw);
8729                 ls_sw_held = TRUE;
8730
8731                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8732                 /*
8733                  * The stateid looks like it was okay (expected to be
8734                  * the next one)
8735                  */
8736                 case NFS4_CHECK_STATEID_OKAY:
8737                         /*
8738                          * The sequence id is now checked.  Determine
8739                          * if this is a replay or if it is in the
8740                          * expected (next) sequence.  In the case of a
8741                          * replay, there are two replay conditions
8742                          * that may occur.  The first is the normal
8743                          * condition where a LOCK is done with a
8744                          * NFS4_OK response and the stateid is
8745                          * updated.  That case is handled below when
8746                          * the stateid is identified as a REPLAY.  The
8747                          * second is the case where an error is
8748                          * returned, like NFS4ERR_DENIED, and the
8749                          * sequence number is updated but the stateid
8750                          * is not updated.  This second case is dealt
8751                          * with here.  So it may seem odd that the
8752                          * stateid is okay but the sequence id is a
8753                          * replay but it is okay.
8754                          */
8755                         switch (rfs4_check_lock_seqid(
8756                             args->locker.locker4_u.lock_owner.lock_seqid,
8757                             lsp, resop)) {
8758                         case NFS4_CHKSEQ_REPLAY:
8759                                 if (resp->status != NFS4_OK) {
8760                                         /*
8761                                          * Here is our replay and need
8762                                          * to verify that the last
8763                                          * response was an error.
8764                                          */
8765                                         *cs->statusp = resp->status;
8766                                         goto end;
8767                                 }
8768                                 /*
8769                                  * This is done since the sequence id
8770                                  * looked like a replay but it didn't
8771                                  * pass our check so a BAD_SEQID is
8772                                  * returned as a result.
8773                                  */
8774                                 /*FALLTHROUGH*/
8775                         case NFS4_CHKSEQ_BAD:
8776                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8777                                 goto end;
8778                         case NFS4_CHKSEQ_OKAY:
8779                                 /* Everything looks okay move ahead */
8780                                 break;
8781                         }
8782                         break;
8783                 case NFS4_CHECK_STATEID_OLD:
8784                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8785                         goto end;
8786                 case NFS4_CHECK_STATEID_BAD:
8787                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8788                         goto end;
8789                 case NFS4_CHECK_STATEID_EXPIRED:
8790                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8791                         goto end;
8792                 case NFS4_CHECK_STATEID_CLOSED:
8793                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8794                         goto end;
8795                 case NFS4_CHECK_STATEID_REPLAY:
8796                         switch (rfs4_check_lock_seqid(
8797                             args->locker.locker4_u.lock_owner.lock_seqid,
8798                             lsp, resop)) {
8799                         case NFS4_CHKSEQ_OKAY:
8800                                 /*
8801                                  * This is a replayed stateid; if
8802                                  * seqid matches the next expected,
8803                                  * then client is using wrong seqid.
8804                                  */
8805                         case NFS4_CHKSEQ_BAD:
8806                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8807                                 goto end;
8808                         case NFS4_CHKSEQ_REPLAY:
8809                                 rfs4_update_lease(lsp->rls_locker->rl_client);
8810                                 *cs->statusp = status = resp->status;
8811                                 goto end;
8812                         }
8813                         break;
8814                 default:
8815                         ASSERT(FALSE);
8816                         break;
8817                 }
8818
8819                 rfs4_update_lock_sequence(lsp);
8820                 rfs4_update_lease(lsp->rls_locker->rl_client);
8821         }
8822
8823         /*
8824          * NFS4 only allows locking on regular files, so
8825          * verify type of object.
8826          */
8827         if (cs->vp->v_type != VREG) {
8828                 if (cs->vp->v_type == VDIR)
8829                         status = NFS4ERR_ISDIR;
8830                 else
8831                         status = NFS4ERR_INVAL;
8832                 goto out;
8833         }
8834
8835         cp = lsp->rls_state->rs_owner->ro_client;
8836
8837         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8838                 status = NFS4ERR_GRACE;
8839                 goto out;
8840         }
8841
8842         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
8843                 status = NFS4ERR_NO_GRACE;
8844                 goto out;
8845         }
8846
8847         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8848                 status = NFS4ERR_NO_GRACE;
8849                 goto out;
8850         }
8851
8852         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
8853                 cs->deleg = TRUE;
8854
8855         status = rfs4_do_lock(lsp, args->locktype,
8856             args->offset, args->length, cs->cr, resop);
8857
8858 out:
8859         lsp->rls_skip_seqid_check = FALSE;
8860
8861         *cs->statusp = resp->status = status;
8862
8863         if (status == NFS4_OK) {
8864                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
8865                 lsp->rls_lock_completed = TRUE;
8866         }
8867         /*
8868          * Only update the "OPEN" response here if this was a new
8869          * lock_owner
8870          */
8871         if (sp)
8872                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8873
8874         rfs4_update_lock_resp(lsp, resop);
8875
8876 end:
8877         if (lsp) {
8878                 if (ls_sw_held)
8879                         rfs4_sw_exit(&lsp->rls_sw);
8880                 /*
8881                  * If an sp obtained, then the lsp does not represent
8882                  * a lock on the file struct.
8883                  */
8884                 if (sp != NULL)
8885                         rfs4_lo_state_rele(lsp, FALSE);
8886                 else
8887                         rfs4_lo_state_rele(lsp, TRUE);
8888         }
8889         if (sp) {
8890                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8891                 rfs4_state_rele(sp);
8892         }
8893
8894         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
8895             LOCK4res *, resp);
8896 }
8897
8898 /* free function for LOCK/LOCKT */
8899 static void
8900 lock_denied_free(nfs_resop4 *resop)
8901 {
8902         LOCK4denied *dp = NULL;
8903
8904         switch (resop->resop) {
8905         case OP_LOCK:
8906                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8907                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8908                 break;
8909         case OP_LOCKT:
8910                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8911                         dp = &resop->nfs_resop4_u.oplockt.denied;
8912                 break;
8913         default:
8914                 break;
8915         }
8916
8917         if (dp)
8918                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8919 }
8920
8921 /*ARGSUSED*/
8922 void
8923 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8924     struct svc_req *req, struct compound_state *cs)
8925 {
8926         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
8927         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
8928         nfsstat4 status;
8929         stateid4 *stateid = &args->lock_stateid;
8930         rfs4_lo_state_t *lsp;
8931
8932         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
8933             LOCKU4args *, args);
8934
8935         if (cs->vp == NULL) {
8936                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8937                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
8938                     LOCKU4res *, resp);
8939                 return;
8940         }
8941
8942         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
8943                 *cs->statusp = resp->status = status;
8944                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
8945                     LOCKU4res *, resp);
8946                 return;
8947         }
8948
8949         /* Ensure specified filehandle matches */
8950         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8951                 rfs4_lo_state_rele(lsp, TRUE);
8952                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8953                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
8954                     LOCKU4res *, resp);
8955                 return;
8956         }
8957
8958         /* hold off other access to lsp while we tinker */
8959         rfs4_sw_enter(&lsp->rls_sw);
8960
8961         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8962         case NFS4_CHECK_STATEID_OKAY:
8963                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
8964                     != NFS4_CHKSEQ_OKAY) {
8965                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8966                         goto end;
8967                 }
8968                 break;
8969         case NFS4_CHECK_STATEID_OLD:
8970                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8971                 goto end;
8972         case NFS4_CHECK_STATEID_BAD:
8973                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8974                 goto end;
8975         case NFS4_CHECK_STATEID_EXPIRED:
8976                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8977                 goto end;
8978         case NFS4_CHECK_STATEID_CLOSED:
8979                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8980                 goto end;
8981         case NFS4_CHECK_STATEID_REPLAY:
8982                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
8983                 case NFS4_CHKSEQ_OKAY:
8984                                 /*
8985                                  * This is a replayed stateid; if
8986                                  * seqid matches the next expected,
8987                                  * then client is using wrong seqid.
8988                                  */
8989                 case NFS4_CHKSEQ_BAD:
8990                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8991                         goto end;
8992                 case NFS4_CHKSEQ_REPLAY:
8993                         rfs4_update_lease(lsp->rls_locker->rl_client);
8994                         *cs->statusp = status = resp->status;
8995                         goto end;
8996                 }
8997                 break;
8998         default:
8999                 ASSERT(FALSE);
9000                 break;
9001         }
9002
9003         rfs4_update_lock_sequence(lsp);
9004         rfs4_update_lease(lsp->rls_locker->rl_client);
9005
9006         /*
9007          * NFS4 only allows locking on regular files, so
9008          * verify type of object.
9009          */
9010         if (cs->vp->v_type != VREG) {
9011                 if (cs->vp->v_type == VDIR)
9012                         status = NFS4ERR_ISDIR;
9013                 else
9014                         status = NFS4ERR_INVAL;
9015                 goto out;
9016         }
9017
9018         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9019                 status = NFS4ERR_GRACE;
9020                 goto out;
9021         }
9022
9023         status = rfs4_do_lock(lsp, args->locktype,
9024             args->offset, args->length, cs->cr, resop);
9025
9026 out:
9027         *cs->statusp = resp->status = status;
9028
9029         if (status == NFS4_OK)
9030                 resp->lock_stateid = lsp->rls_lockid.stateid;
9031
9032         rfs4_update_lock_resp(lsp, resop);
9033
9034 end:
9035         rfs4_sw_exit(&lsp->rls_sw);
9036         rfs4_lo_state_rele(lsp, TRUE);
9037
9038         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9039             LOCKU4res *, resp);
9040 }
9041
9042 /*
9043  * LOCKT is a best effort routine, the client can not be guaranteed that
9044  * the status return is still in effect by the time the reply is received.
9045  * They are numerous race conditions in this routine, but we are not required
9046  * and can not be accurate.
9047  */
9048 /*ARGSUSED*/
9049 void
9050 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9051     struct svc_req *req, struct compound_state *cs)
9052 {
9053         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9054         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9055         rfs4_lockowner_t *lo;
9056         rfs4_client_t *cp;
9057         bool_t create = FALSE;
9058         struct flock64 flk;
9059         int error;
9060         int flag = FREAD | FWRITE;
9061         int ltype;
9062         length4 posix_length;
9063         sysid_t sysid;
9064         pid_t pid;
9065
9066         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9067             LOCKT4args *, args);
9068
9069         if (cs->vp == NULL) {
9070                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9071                 goto out;
9072         }
9073
9074         /*
9075          * NFS4 only allows locking on regular files, so
9076          * verify type of object.
9077          */
9078         if (cs->vp->v_type != VREG) {
9079                 if (cs->vp->v_type == VDIR)
9080                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9081                 else
9082                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9083                 goto out;
9084         }
9085
9086         /*
9087          * Check out the clientid to ensure the server knows about it
9088          * so that we correctly inform the client of a server reboot.
9089          */
9090         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9091             == NULL) {
9092                 *cs->statusp = resp->status =
9093                     rfs4_check_clientid(&args->owner.clientid, 0);
9094                 goto out;
9095         }
9096         if (rfs4_lease_expired(cp)) {
9097                 rfs4_client_close(cp);
9098                 /*
9099                  * Protocol doesn't allow returning NFS4ERR_STALE as
9100                  * other operations do on this check so STALE_CLIENTID
9101                  * is returned instead
9102                  */
9103                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9104                 goto out;
9105         }
9106
9107         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9108                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9109                 rfs4_client_rele(cp);
9110                 goto out;
9111         }
9112         rfs4_client_rele(cp);
9113
9114         resp->status = NFS4_OK;
9115
9116         switch (args->locktype) {
9117         case READ_LT:
9118         case READW_LT:
9119                 ltype = F_RDLCK;
9120                 break;
9121         case WRITE_LT:
9122         case WRITEW_LT:
9123                 ltype = F_WRLCK;
9124                 break;
9125         }
9126
9127         posix_length = args->length;
9128         /* Check for zero length. To lock to end of file use all ones for V4 */
9129         if (posix_length == 0) {
9130                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9131                 goto out;
9132         } else if (posix_length == (length4)(~0)) {
9133                 posix_length = 0;       /* Posix to end of file  */
9134         }
9135
9136         /* Find or create a lockowner */
9137         lo = rfs4_findlockowner(&args->owner, &create);
9138
9139         if (lo) {
9140                 pid = lo->rl_pid;
9141                 if ((resp->status =
9142                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9143                         goto err;
9144         } else {
9145                 pid = 0;
9146                 sysid = lockt_sysid;
9147         }
9148 retry:
9149         flk.l_type = ltype;
9150         flk.l_whence = 0;               /* SEEK_SET */
9151         flk.l_start = args->offset;
9152         flk.l_len = posix_length;
9153         flk.l_sysid = sysid;
9154         flk.l_pid = pid;
9155         flag |= F_REMOTELOCK;
9156
9157         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9158
9159         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9160         if (flk.l_len < 0 || flk.l_start < 0) {
9161                 resp->status = NFS4ERR_INVAL;
9162                 goto err;
9163         }
9164         error = fop_frlock(cs->vp, F_GETLK, &flk, flag, 0,
9165             NULL, cs->cr, NULL);
9166
9167         /*
9168          * N.B. We map error values to nfsv4 errors. This is differrent
9169          * than puterrno4 routine.
9170          */
9171         switch (error) {
9172         case 0:
9173                 if (flk.l_type == F_UNLCK)
9174                         resp->status = NFS4_OK;
9175                 else {
9176                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9177                                 goto retry;
9178                         resp->status = NFS4ERR_DENIED;
9179                 }
9180                 break;
9181         case EOVERFLOW:
9182                 resp->status = NFS4ERR_INVAL;
9183                 break;
9184         case EINVAL:
9185                 resp->status = NFS4ERR_NOTSUPP;
9186                 break;
9187         default:
9188                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9189                     error);
9190                 resp->status = NFS4ERR_SERVERFAULT;
9191                 break;
9192         }
9193
9194 err:
9195         if (lo)
9196                 rfs4_lockowner_rele(lo);
9197         *cs->statusp = resp->status;
9198 out:
9199         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9200             LOCKT4res *, resp);
9201 }
9202
9203 int
9204 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9205 {
9206         int err;
9207         int cmd;
9208         vnode_t *vp;
9209         struct shrlock shr;
9210         struct shr_locowner shr_loco;
9211         int fflags = 0;
9212
9213         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9214         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9215
9216         if (sp->rs_closed)
9217                 return (NFS4ERR_OLD_STATEID);
9218
9219         vp = sp->rs_finfo->rf_vp;
9220         ASSERT(vp);
9221
9222         shr.s_access = shr.s_deny = 0;
9223
9224         if (access & OPEN4_SHARE_ACCESS_READ) {
9225                 fflags |= FREAD;
9226                 shr.s_access |= F_RDACC;
9227         }
9228         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9229                 fflags |= FWRITE;
9230                 shr.s_access |= F_WRACC;
9231         }
9232         ASSERT(shr.s_access);
9233
9234         if (deny & OPEN4_SHARE_DENY_READ)
9235                 shr.s_deny |= F_RDDNY;
9236         if (deny & OPEN4_SHARE_DENY_WRITE)
9237                 shr.s_deny |= F_WRDNY;
9238
9239         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9240         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9241         shr_loco.sl_pid = shr.s_pid;
9242         shr_loco.sl_id = shr.s_sysid;
9243         shr.s_owner = (caddr_t)&shr_loco;
9244         shr.s_own_len = sizeof (shr_loco);
9245
9246         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9247
9248         err = fop_shrlock(vp, cmd, &shr, fflags, CRED(), NULL);
9249         if (err != 0) {
9250                 if (err == EAGAIN)
9251                         err = NFS4ERR_SHARE_DENIED;
9252                 else
9253                         err = puterrno4(err);
9254                 return (err);
9255         }
9256
9257         sp->rs_share_access |= access;
9258         sp->rs_share_deny |= deny;
9259
9260         return (0);
9261 }
9262
9263 int
9264 rfs4_unshare(rfs4_state_t *sp)
9265 {
9266         int err;
9267         struct shrlock shr;
9268         struct shr_locowner shr_loco;
9269
9270         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9271
9272         if (sp->rs_closed || sp->rs_share_access == 0)
9273                 return (0);
9274
9275         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9276         ASSERT(sp->rs_finfo->rf_vp);
9277
9278         shr.s_access = shr.s_deny = 0;
9279         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9280         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9281         shr_loco.sl_pid = shr.s_pid;
9282         shr_loco.sl_id = shr.s_sysid;
9283         shr.s_owner = (caddr_t)&shr_loco;
9284         shr.s_own_len = sizeof (shr_loco);
9285
9286         err = fop_shrlock(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9287             NULL);
9288         if (err != 0) {
9289                 err = puterrno4(err);
9290                 return (err);
9291         }
9292
9293         sp->rs_share_access = 0;
9294         sp->rs_share_deny = 0;
9295
9296         return (0);
9297
9298 }
9299
9300 static int
9301 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9302 {
9303         struct clist    *wcl;
9304         count4          count = rok->data_len;
9305         int             wlist_len;
9306
9307         wcl = args->wlist;
9308         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9309                 return (FALSE);
9310         }
9311         wcl = args->wlist;
9312         rok->wlist_len = wlist_len;
9313         rok->wlist = wcl;
9314         return (TRUE);
9315 }
9316
9317 /* tunable to disable server referrals */
9318 int rfs4_no_referrals = 0;
9319
9320 /*
9321  * Find an NFS record in reparse point data.
9322  * Returns 0 for success and <0 or an errno value on failure.
9323  */
9324 int
9325 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9326 {
9327         int err;
9328         char *stype, *val;
9329         nvlist_t *nvl;
9330         nvpair_t *curr;
9331
9332         if ((nvl = reparse_init()) == NULL)
9333                 return (-1);
9334
9335         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9336                 reparse_free(nvl);
9337                 return (err);
9338         }
9339
9340         curr = NULL;
9341         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9342                 if ((stype = nvpair_name(curr)) == NULL) {
9343                         reparse_free(nvl);
9344                         return (-2);
9345                 }
9346                 if (strncasecmp(stype, "NFS", 3) == 0)
9347                         break;
9348         }
9349
9350         if ((curr == NULL) ||
9351             (nvpair_value_string(curr, &val))) {
9352                 reparse_free(nvl);
9353                 return (-3);
9354         }
9355         *nvlp = nvl;
9356         *svcp = stype;
9357         *datap = val;
9358         return (0);
9359 }
9360
9361 int
9362 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9363 {
9364         nvlist_t *nvl;
9365         char *s, *d;
9366
9367         if (rfs4_no_referrals != 0)
9368                 return (B_FALSE);
9369
9370         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9371                 return (B_FALSE);
9372
9373         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9374                 return (B_FALSE);
9375
9376         reparse_free(nvl);
9377
9378         return (B_TRUE);
9379 }
9380
9381 /*
9382  * There is a user-level copy of this routine in ref_subr.c.
9383  * Changes should be kept in sync.
9384  */
9385 static int
9386 nfs4_create_components(char *path, component4 *comp4)
9387 {
9388         int slen, plen, ncomp;
9389         char *ori_path, *nxtc, buf[MAXNAMELEN];
9390
9391         if (path == NULL)
9392                 return (0);
9393
9394         plen = strlen(path) + 1;        /* include the terminator */
9395         ori_path = path;
9396         ncomp = 0;
9397
9398         /* count number of components in the path */
9399         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9400                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9401                         if ((slen = nxtc - path) == 0) {
9402                                 path = nxtc + 1;
9403                                 continue;
9404                         }
9405
9406                         if (comp4 != NULL) {
9407                                 bcopy(path, buf, slen);
9408                                 buf[slen] = '\0';
9409                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9410                         }
9411
9412                         ncomp++;        /* 1 valid component */
9413                         path = nxtc + 1;
9414                 }
9415                 if (*nxtc == '\0' || *nxtc == '\n')
9416                         break;
9417         }
9418
9419         return (ncomp);
9420 }
9421
9422 /*
9423  * There is a user-level copy of this routine in ref_subr.c.
9424  * Changes should be kept in sync.
9425  */
9426 static int
9427 make_pathname4(char *path, pathname4 *pathname)
9428 {
9429         int ncomp;
9430         component4 *comp4;
9431
9432         if (pathname == NULL)
9433                 return (0);
9434
9435         if (path == NULL) {
9436                 pathname->pathname4_val = NULL;
9437                 pathname->pathname4_len = 0;
9438                 return (0);
9439         }
9440
9441         /* count number of components to alloc buffer */
9442         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9443                 pathname->pathname4_val = NULL;
9444                 pathname->pathname4_len = 0;
9445                 return (0);
9446         }
9447         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9448
9449         /* copy components into allocated buffer */
9450         ncomp = nfs4_create_components(path, comp4);
9451
9452         pathname->pathname4_val = comp4;
9453         pathname->pathname4_len = ncomp;
9454
9455         return (ncomp);
9456 }
9457
9458 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9459
9460 fs_locations4 *
9461 fetch_referral(vnode_t *vp, cred_t *cr)
9462 {
9463         nvlist_t *nvl;
9464         char *stype, *sdata;
9465         fs_locations4 *result;
9466         char buf[1024];
9467         size_t bufsize;
9468         XDR xdr;
9469         int err;
9470
9471         /*
9472          * Check attrs to ensure it's a reparse point
9473          */
9474         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9475                 return (NULL);
9476
9477         /*
9478          * Look for an NFS record and get the type and data
9479          */
9480         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9481                 return (NULL);
9482
9483         /*
9484          * With the type and data, upcall to get the referral
9485          */
9486         bufsize = sizeof (buf);
9487         bzero(buf, sizeof (buf));
9488         err = reparse_kderef((const char *)stype, (const char *)sdata,
9489             buf, &bufsize);
9490         reparse_free(nvl);
9491
9492         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9493             char *, stype, char *, sdata, char *, buf, int, err);
9494         if (err) {
9495                 cmn_err(CE_NOTE,
9496                     "reparsed daemon not running: unable to get referral (%d)",
9497                     err);
9498                 return (NULL);
9499         }
9500
9501         /*
9502          * We get an XDR'ed record back from the kderef call
9503          */
9504         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9505         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9506         err = xdr_fs_locations4(&xdr, result);
9507         XDR_DESTROY(&xdr);
9508         if (err != TRUE) {
9509                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9510                     int, err);
9511                 return (NULL);
9512         }
9513
9514         /*
9515          * Look at path to recover fs_root, ignoring the leading '/'
9516          */
9517         (void) make_pathname4(vp->v_path, &result->fs_root);
9518
9519         return (result);
9520 }
9521
9522 char *
9523 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9524 {
9525         fs_locations4 *fsl;
9526         fs_location4 *fs;
9527         char *server, *path, *symbuf;
9528         static char *prefix = "/net/";
9529         int i, size, npaths;
9530         uint_t len;
9531
9532         /* Get the referral */
9533         if ((fsl = fetch_referral(vp, cr)) == NULL)
9534                 return (NULL);
9535
9536         /* Deal with only the first location and first server */
9537         fs = &fsl->locations_val[0];
9538         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9539         if (server == NULL) {
9540                 rfs4_free_fs_locations4(fsl);
9541                 kmem_free(fsl, sizeof (fs_locations4));
9542                 return (NULL);
9543         }
9544
9545         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9546         size = strlen(prefix) + len;
9547         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9548                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9549
9550         /* Allocate the symlink buffer and fill it */
9551         symbuf = kmem_zalloc(size, KM_SLEEP);
9552         (void) strcat(symbuf, prefix);
9553         (void) strcat(symbuf, server);
9554         kmem_free(server, len);
9555
9556         npaths = 0;
9557         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9558                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9559                 if (path == NULL)
9560                         continue;
9561                 (void) strcat(symbuf, "/");
9562                 (void) strcat(symbuf, path);
9563                 npaths++;
9564                 kmem_free(path, len);
9565         }
9566
9567         rfs4_free_fs_locations4(fsl);
9568         kmem_free(fsl, sizeof (fs_locations4));
9569
9570         if (strsz != NULL)
9571                 *strsz = size;
9572         return (symbuf);
9573 }
9574
9575 /*
9576  * Check to see if we have a downrev Solaris client, so that we
9577  * can send it a symlink instead of a referral.
9578  */
9579 int
9580 client_is_downrev(struct svc_req *req)
9581 {
9582         struct sockaddr *ca;
9583         rfs4_clntip_t *ci;
9584         bool_t create = FALSE;
9585         int is_downrev;
9586
9587         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9588         ASSERT(ca);
9589         ci = rfs4_find_clntip(ca, &create);
9590         if (ci == NULL)
9591                 return (0);
9592         is_downrev = ci->ri_no_referrals;
9593         rfs4_dbe_rele(ci->ri_dbe);
9594         return (is_downrev);
9595 }