usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c

   1 /*
   2  * Copyright (c) 2000-2001 Boris Popov
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *    This product includes software developed by Boris Popov.
  16  * 4. Neither the name of the author nor the names of any co-contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  * $Id: smbfs_vnops.c,v 1.128.36.1 2005/05/27 02:35:28 lindak Exp $
  33  */
  34
  35 /*
  36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  37  */
  38
  39 /*
  40  * Vnode operations
  41  *
  42  * This file is similar to nfs3_vnops.c
  43  */
  44
  45 #include <sys/param.h>
  46 #include <sys/systm.h>
  47 #include <sys/cred.h>
  48 #include <sys/vnode.h>
  49 #include <sys/vfs.h>
  50 #include <sys/filio.h>
  51 #include <sys/uio.h>
  52 #include <sys/dirent.h>
  53 #include <sys/errno.h>
  54 #include <sys/sunddi.h>
  55 #include <sys/sysmacros.h>
  56 #include <sys/kmem.h>
  57 #include <sys/cmn_err.h>
  58 #include <sys/vfs.h>
  59 #include <sys/pathname.h>
  60 #include <sys/policy.h>
  61 #include <sys/sdt.h>
  62 #include <sys/taskq_impl.h>
  63 #include <sys/zone.h>
  64 #include <sys/vmsystm.h>
  65
  66 #include <vm/hat.h>
  67 #include <vm/as.h>
  68 #include <vm/page.h>
  69 #include <vm/pvn.h>
  70 #include <vm/seg.h>
  71 #include <vm/seg_map.h>
  72 #include <vm/seg_kpm.h>
  73 #include <vm/seg_vn.h>
  74
  75 #include <netsmb/smb_osdep.h>
  76 #include <netsmb/smb.h>
  77 #include <netsmb/smb_conn.h>
  78 #include <netsmb/smb_subr.h>
  79
  80 #include <smbfs/smbfs.h>
  81 #include <smbfs/smbfs_node.h>
  82 #include <smbfs/smbfs_subr.h>
  83
  84 #include <sys/fs/smbfs_ioctl.h>
  85 #include <sys/fs_subr.h>
  86
  87 /*
  88  * We assign directory offsets like the NFS client, where the
  89  * offset increments by _one_ after each directory entry.
  90  * Further, the entries "." and ".." are always at offsets
  91  * zero and one (respectively) and the "real" entries from
  92  * the server appear at offsets starting with two.  This
  93  * macro is used to initialize the n_dirofs field after
  94  * setting n_dirseq with a _findopen call.
  95  */
  96 #define FIRST_DIROFS    2
  97
  98 /*
  99  * These characters are illegal in NTFS file names.
 100  * ref: http://support.microsoft.com/kb/147438
 101  *
 102  * Careful!  The check in the XATTR case skips the
 103  * first character to allow colon in XATTR names.
 104  */
 105 static const char illegal_chars[] = {
 106         ':',    /* colon - keep this first! */
 107         '\\',   /* back slash */
 108         '/',    /* slash */
 109         '*',    /* asterisk */
 110         '?',    /* question mark */
 111         '"',    /* double quote */
 112         '<',    /* less than sign */
 113         '>',    /* greater than sign */
 114         '|',    /* vertical bar */
 115         0
 116 };
 117
 118 /*
 119  * Turning this on causes nodes to be created in the cache
 120  * during directory listings, normally avoiding a second
 121  * OtW attribute fetch just after a readdir.
 122  */
 123 int smbfs_fastlookup = 1;
 124
 125 /* local static function defines */
 126
 127 static int      smbfslookup_cache(vnode_t *, char *, int, vnode_t **,
 128                         cred_t *);
 129 static int      smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
 130                         int cache_ok, caller_context_t *);
 131 static int      smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
 132                         int flags);
 133 static int      smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp,
 134                         char *nnm, struct smb_cred *scred, int flags);
 135 static int      smbfssetattr(vnode_t *, struct vattr *, int, cred_t *);
 136 static int      smbfs_accessx(void *, int, cred_t *);
 137 static int      smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
 138                         caller_context_t *);
 139 static void     smbfs_rele_fid(smbnode_t *, struct smb_cred *);
 140 static uint32_t xvattr_to_dosattr(smbnode_t *, struct vattr *);
 141
 142 static int      smbfs_rdwrlbn(vnode_t *, page_t *, uoff_t, size_t, int,
 143                         cred_t *);
 144 static int      smbfs_bio(struct buf *, int, cred_t *);
 145 static int      smbfs_writenp(smbnode_t *np, caddr_t base, int tcount,
 146                         struct uio *uiop, int pgcreated);
 147
 148 static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
 149 static int      smbfs_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
 150                         caller_context_t *);
 151 static int      smbfs_getapage(vnode_t *, uoff_t, size_t, uint_t *,
 152                         page_t *[], size_t, struct seg *, caddr_t,
 153                         enum seg_rw, cred_t *);
 154 static int      smbfs_putapage(vnode_t *, page_t *, uoff_t *, size_t *,
 155                         int, cred_t *);
 156 static void     smbfs_delmap_async(void *);
 157
 158 /*
 159  * Error flags used to pass information about certain special errors
 160  * which need to be handled specially.
 161  */
 162 #define SMBFS_EOF                       -98
 163
 164 /* When implementing OtW locks, make this a real function. */
 165 #define smbfs_lm_has_sleep(vp) 0
 166
 167 /*
 168  * These are the vnode ops routines which implement the vnode interface to
 169  * the networked file system.  These routines just take their parameters,
 170  * make them look networkish by putting the right info into interface structs,
 171  * and then calling the appropriate remote routine(s) to do the work.
 172  *
 173  * Note on directory name lookup cacheing:  If we detect a stale fhandle,
 174  * we purge the directory cache relative to that vnode.  This way, the
 175  * user won't get burned by the cache repeatedly.  See <smbfs/smbnode.h> for
 176  * more details on smbnode locking.
 177  */
 178
 179 /*
 180  * XXX
 181  * When new and relevant functionality is enabled, we should be
 182  * calling vfs_set_feature() to inform callers that pieces of
 183  * functionality are available, per PSARC 2007/227.
 184  */
 185 /* ARGSUSED */
 186 static int
 187 smbfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 188 {
 189         smbnode_t       *np;
 190         vnode_t         *vp;
 191         smbfattr_t      fa;
 192         u_int32_t       rights, rightsrcvd;
 193         u_int16_t       fid, oldfid;
 194         int             oldgenid;
 195         struct smb_cred scred;
 196         smbmntinfo_t    *smi;
 197         smb_share_t     *ssp;
 198         cred_t          *oldcr;
 199         int             tmperror;
 200         int             error = 0;
 201
 202         vp = *vpp;
 203         np = VTOSMB(vp);
 204         smi = VTOSMI(vp);
 205         ssp = smi->smi_share;
 206
 207         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 208                 return (EIO);
 209
 210         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 211                 return (EIO);
 212
 213         if (vp->v_type != VREG && vp->v_type != VDIR) { /* XXX VLNK? */
 214                 SMBVDEBUG("open eacces vtype=%d\n", vp->v_type);
 215                 return (EACCES);
 216         }
 217
 218         /*
 219          * Get exclusive access to n_fid and related stuff.
 220          * No returns after this until out.
 221          */
 222         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
 223                 return (EINTR);
 224         smb_credinit(&scred, cr);
 225
 226         /*
 227          * Keep track of the vnode type at first open.
 228          * It may change later, and we need close to do
 229          * cleanup for the type we opened.  Also deny
 230          * open of new types until old type is closed.
 231          */
 232         if (np->n_ovtype == VNON) {
 233                 ASSERT(np->n_dirrefs == 0);
 234                 ASSERT(np->n_fidrefs == 0);
 235         } else if (np->n_ovtype != vp->v_type) {
 236                 SMBVDEBUG("open n_ovtype=%d v_type=%d\n",
 237                     np->n_ovtype, vp->v_type);
 238                 error = EACCES;
 239                 goto out;
 240         }
 241
 242         /*
 243          * Directory open.  See smbfs_readvdir()
 244          */
 245         if (vp->v_type == VDIR) {
 246                 if (np->n_dirseq == NULL) {
 247                         /* first open */
 248                         error = smbfs_smb_findopen(np, "*", 1,
 249                             SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
 250                             &scred, &np->n_dirseq);
 251                         if (error != 0)
 252                                 goto out;
 253                 }
 254                 np->n_dirofs = FIRST_DIROFS;
 255                 np->n_dirrefs++;
 256                 goto have_fid;
 257         }
 258
 259         /*
 260          * If caller specified O_TRUNC/FTRUNC, then be sure to set
 261          * FWRITE (to drive successful setattr(size=0) after open)
 262          */
 263         if (flag & FTRUNC)
 264                 flag |= FWRITE;
 265
 266         /*
 267          * If we already have it open, and the FID is still valid,
 268          * check whether the rights are sufficient for FID reuse.
 269          */
 270         if (np->n_fidrefs > 0 &&
 271             np->n_vcgenid == ssp->ss_vcgenid) {
 272                 int upgrade = 0;
 273
 274                 if ((flag & FWRITE) &&
 275                     !(np->n_rights & SA_RIGHT_FILE_WRITE_DATA))
 276                         upgrade = 1;
 277                 if ((flag & FREAD) &&
 278                     !(np->n_rights & SA_RIGHT_FILE_READ_DATA))
 279                         upgrade = 1;
 280                 if (!upgrade) {
 281                         /*
 282                          *  the existing open is good enough
 283                          */
 284                         np->n_fidrefs++;
 285                         goto have_fid;
 286                 }
 287         }
 288         rights = np->n_fidrefs ? np->n_rights : 0;
 289
 290         /*
 291          * we always ask for READ_CONTROL so we can always get the
 292          * owner/group IDs to satisfy a stat.  Ditto attributes.
 293          */
 294         rights |= (STD_RIGHT_READ_CONTROL_ACCESS |
 295             SA_RIGHT_FILE_READ_ATTRIBUTES);
 296         if ((flag & FREAD))
 297                 rights |= SA_RIGHT_FILE_READ_DATA;
 298         if ((flag & FWRITE))
 299                 rights |= SA_RIGHT_FILE_WRITE_DATA |
 300                     SA_RIGHT_FILE_APPEND_DATA |
 301                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
 302
 303         bzero(&fa, sizeof (fa));
 304         error = smbfs_smb_open(np,
 305             NULL, 0, 0, /* name nmlen xattr */
 306             rights, &scred,
 307             &fid, &rightsrcvd, &fa);
 308         if (error)
 309                 goto out;
 310         smbfs_attrcache_fa(vp, &fa);
 311
 312         /*
 313          * We have a new FID and access rights.
 314          */
 315         oldfid = np->n_fid;
 316         oldgenid = np->n_vcgenid;
 317         np->n_fid = fid;
 318         np->n_vcgenid = ssp->ss_vcgenid;
 319         np->n_rights = rightsrcvd;
 320         np->n_fidrefs++;
 321         if (np->n_fidrefs > 1 &&
 322             oldgenid == ssp->ss_vcgenid) {
 323                 /*
 324                  * We already had it open (presumably because
 325                  * it was open with insufficient rights.)
 326                  * Close old wire-open.
 327                  */
 328                 tmperror = smbfs_smb_close(ssp,
 329                     oldfid, NULL, &scred);
 330                 if (tmperror)
 331                         SMBVDEBUG("error %d closing %s\n",
 332                             tmperror, np->n_rpath);
 333         }
 334
 335         /*
 336          * This thread did the open.
 337          * Save our credentials too.
 338          */
 339         mutex_enter(&np->r_statelock);
 340         oldcr = np->r_cred;
 341         np->r_cred = cr;
 342         crhold(cr);
 343         if (oldcr)
 344                 crfree(oldcr);
 345         mutex_exit(&np->r_statelock);
 346
 347 have_fid:
 348         /*
 349          * Keep track of the vnode type at first open.
 350          * (see comments above)
 351          */
 352         if (np->n_ovtype == VNON)
 353                 np->n_ovtype = vp->v_type;
 354
 355 out:
 356         smb_credrele(&scred);
 357         smbfs_rw_exit(&np->r_lkserlock);
 358         return (error);
 359 }
 360
 361 /*ARGSUSED*/
 362 static int
 363 smbfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 364         caller_context_t *ct)
 365 {
 366         smbnode_t       *np;
 367         smbmntinfo_t    *smi;
 368         struct smb_cred scred;
 369         int error = 0;
 370
 371         np = VTOSMB(vp);
 372         smi = VTOSMI(vp);
 373
 374         /*
 375          * Don't "bail out" for VFS_UNMOUNTED here,
 376          * as we want to do cleanup, etc.
 377          */
 378
 379         /*
 380          * zone_enter(2) prevents processes from changing zones with SMBFS files
 381          * open; if we happen to get here from the wrong zone we can't do
 382          * anything over the wire.
 383          */
 384         if (smi->smi_zone_ref.zref_zone != curproc->p_zone) {
 385                 /*
 386                  * We could attempt to clean up locks, except we're sure
 387                  * that the current process didn't acquire any locks on
 388                  * the file: any attempt to lock a file belong to another zone
 389                  * will fail, and one can't lock an SMBFS file and then change
 390                  * zones, as that fails too.
 391                  *
 392                  * Returning an error here is the sane thing to do.  A
 393                  * subsequent call to VN_RELE() which translates to a
 394                  * smbfs_inactive() will clean up state: if the zone of the
 395                  * vnode's origin is still alive and kicking, an async worker
 396                  * thread will handle the request (from the correct zone), and
 397                  * everything (minus the final smbfs_getattr_otw() call) should
 398                  * be OK. If the zone is going away smbfs_async_inactive() will
 399                  * throw away cached pages inline.
 400                  */
 401                 return (EIO);
 402         }
 403
 404         /*
 405          * If we are using local locking for this filesystem, then
 406          * release all of the SYSV style record locks.  Otherwise,
 407          * we are doing network locking and we need to release all
 408          * of the network locks.  All of the locks held by this
 409          * process on this file are released no matter what the
 410          * incoming reference count is.
 411          */
 412         if (smi->smi_flags & SMI_LLOCK) {
 413                 pid_t pid = ddi_get_pid();
 414                 cleanlocks(vp, pid, 0);
 415                 cleanshares(vp, pid);
 416         }
 417         /*
 418          * else doing OtW locking.  SMB servers drop all locks
 419          * on the file ID we close here, so no _lockrelease()
 420          */
 421
 422         /*
 423          * This (passed in) count is the ref. count from the
 424          * user's file_t before the closef call (fio.c).
 425          * The rest happens only on last close.
 426          */
 427         if (count > 1)
 428                 return (0);
 429
 430         /* NFS has DNLC purge here. */
 431
 432         /*
 433          * If the file was open for write and there are pages,
 434          * then make sure dirty pages written back.
 435          *
 436          * NFS does this async when "close-to-open" is off
 437          * (MI_NOCTO flag is set) to avoid blocking the caller.
 438          * For now, always do this synchronously (no B_ASYNC).
 439          */
 440         if ((flag & FWRITE) && vn_has_cached_data(vp)) {
 441                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
 442                 if (error == EAGAIN)
 443                         error = 0;
 444         }
 445         if (error == 0) {
 446                 mutex_enter(&np->r_statelock);
 447                 np->r_flags &= ~RSTALE;
 448                 np->r_error = 0;
 449                 mutex_exit(&np->r_statelock);
 450         }
 451
 452         /*
 453          * Decrement the reference count for the FID
 454          * and possibly do the OtW close.
 455          *
 456          * Exclusive lock for modifying n_fid stuff.
 457          * Don't want this one ever interruptible.
 458          */
 459         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
 460         smb_credinit(&scred, cr);
 461
 462         smbfs_rele_fid(np, &scred);
 463
 464         smb_credrele(&scred);
 465         smbfs_rw_exit(&np->r_lkserlock);
 466
 467         return (0);
 468 }
 469
 470 /*
 471  * Helper for smbfs_close.  Decrement the reference count
 472  * for an SMB-level file or directory ID, and when the last
 473  * reference for the fid goes away, do the OtW close.
 474  * Also called in smbfs_inactive (defensive cleanup).
 475  */
 476 static void
 477 smbfs_rele_fid(smbnode_t *np, struct smb_cred *scred)
 478 {
 479         smb_share_t     *ssp;
 480         cred_t          *oldcr;
 481         struct smbfs_fctx *fctx;
 482         int             error;
 483         uint16_t ofid;
 484
 485         ssp = np->n_mount->smi_share;
 486         error = 0;
 487
 488         /* Make sure we serialize for n_dirseq use. */
 489         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
 490
 491         /*
 492          * Note that vp->v_type may change if a remote node
 493          * is deleted and recreated as a different type, and
 494          * our getattr may change v_type accordingly.
 495          * Now use n_ovtype to keep track of the v_type
 496          * we had during open (see comments above).
 497          */
 498         switch (np->n_ovtype) {
 499         case VDIR:
 500                 ASSERT(np->n_dirrefs > 0);
 501                 if (--np->n_dirrefs)
 502                         return;
 503                 if ((fctx = np->n_dirseq) != NULL) {
 504                         np->n_dirseq = NULL;
 505                         np->n_dirofs = 0;
 506                         error = smbfs_smb_findclose(fctx, scred);
 507                 }
 508                 break;
 509
 510         case VREG:
 511                 ASSERT(np->n_fidrefs > 0);
 512                 if (--np->n_fidrefs)
 513                         return;
 514                 if ((ofid = np->n_fid) != SMB_FID_UNUSED) {
 515                         np->n_fid = SMB_FID_UNUSED;
 516                         /* After reconnect, n_fid is invalid */
 517                         if (np->n_vcgenid == ssp->ss_vcgenid) {
 518                                 error = smbfs_smb_close(
 519                                     ssp, ofid, NULL, scred);
 520                         }
 521                 }
 522                 break;
 523
 524         default:
 525                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
 526                 break;
 527         }
 528         if (error) {
 529                 SMBVDEBUG("error %d closing %s\n",
 530                     error, np->n_rpath);
 531         }
 532
 533         /* Allow next open to use any v_type. */
 534         np->n_ovtype = VNON;
 535
 536         /*
 537          * Other "last close" stuff.
 538          */
 539         mutex_enter(&np->r_statelock);
 540         if (np->n_flag & NATTRCHANGED)
 541                 smbfs_attrcache_rm_locked(np);
 542         oldcr = np->r_cred;
 543         np->r_cred = NULL;
 544         mutex_exit(&np->r_statelock);
 545         if (oldcr != NULL)
 546                 crfree(oldcr);
 547 }
 548
 549 /* ARGSUSED */
 550 static int
 551 smbfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 552         caller_context_t *ct)
 553 {
 554         struct smb_cred scred;
 555         struct vattr    va;
 556         smbnode_t       *np;
 557         smbmntinfo_t    *smi;
 558         smb_share_t     *ssp;
 559         offset_t        endoff;
 560         ssize_t         past_eof;
 561         int             error;
 562
 563         caddr_t         base;
 564         uoff_t  off;
 565         size_t          n;
 566         int             on;
 567         uint_t          flags;
 568
 569         np = VTOSMB(vp);
 570         smi = VTOSMI(vp);
 571         ssp = smi->smi_share;
 572
 573         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 574                 return (EIO);
 575
 576         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 577                 return (EIO);
 578
 579         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
 580
 581         if (vp->v_type != VREG)
 582                 return (EISDIR);
 583
 584         if (uiop->uio_resid == 0)
 585                 return (0);
 586
 587         /*
 588          * Like NFS3, just check for 63-bit overflow.
 589          * Our SMB layer takes care to return EFBIG
 590          * when it has to fallback to a 32-bit call.
 591          */
 592         endoff = uiop->uio_loffset + uiop->uio_resid;
 593         if (uiop->uio_loffset < 0 || endoff < 0)
 594                 return (EINVAL);
 595
 596         /* get vnode attributes from server */
 597         va.va_mask = VATTR_SIZE | VATTR_MTIME;
 598         if (error = smbfsgetattr(vp, &va, cr))
 599                 return (error);
 600
 601         /* Update mtime with mtime from server here? */
 602
 603         /* if offset is beyond EOF, read nothing */
 604         if (uiop->uio_loffset >= va.va_size)
 605                 return (0);
 606
 607         /*
 608          * Limit the read to the remaining file size.
 609          * Do this by temporarily reducing uio_resid
 610          * by the amount the lies beyoned the EOF.
 611          */
 612         if (endoff > va.va_size) {
 613                 past_eof = (ssize_t)(endoff - va.va_size);
 614                 uiop->uio_resid -= past_eof;
 615         } else
 616                 past_eof = 0;
 617
 618         /*
 619          * Bypass VM if caching has been disabled (e.g., locking) or if
 620          * using client-side direct I/O and the file is not mmap'd and
 621          * there are no cached pages.
 622          */
 623         if ((vp->v_flag & VNOCACHE) ||
 624             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 625             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 626             !vn_has_cached_data(vp))) {
 627
 628                 /* Shared lock for n_fid use in smb_rwuio */
 629                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 630                         return (EINTR);
 631                 smb_credinit(&scred, cr);
 632
 633                 /* After reconnect, n_fid is invalid */
 634                 if (np->n_vcgenid != ssp->ss_vcgenid)
 635                         error = ESTALE;
 636                 else
 637                         error = smb_rwuio(ssp, np->n_fid, UIO_READ,
 638                             uiop, &scred, smb_timo_read);
 639
 640                 smb_credrele(&scred);
 641                 smbfs_rw_exit(&np->r_lkserlock);
 642
 643                 /* undo adjustment of resid */
 644                 uiop->uio_resid += past_eof;
 645
 646                 return (error);
 647         }
 648
 649         /* (else) Do I/O through segmap. */
 650         do {
 651                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 652                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 653                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 654
 655                 error = smbfs_validate_caches(vp, cr);
 656                 if (error)
 657                         break;
 658
 659                 /* NFS waits for RINCACHEPURGE here. */
 660
 661                 if (vpm_enable) {
 662                         /*
 663                          * Copy data.
 664                          */
 665                         error = vpm_data_copy(vp, off + on, n, uiop,
 666                             1, NULL, 0, S_READ);
 667                 } else {
 668                         base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
 669                             S_READ);
 670
 671                         error = uiomove(base + on, n, UIO_READ, uiop);
 672                 }
 673
 674                 if (!error) {
 675                         /*
 676                          * If read a whole block or read to eof,
 677                          * won't need this buffer again soon.
 678                          */
 679                         mutex_enter(&np->r_statelock);
 680                         if (n + on == MAXBSIZE ||
 681                             uiop->uio_loffset == np->r_size)
 682                                 flags = SM_DONTNEED;
 683                         else
 684                                 flags = 0;
 685                         mutex_exit(&np->r_statelock);
 686                         if (vpm_enable) {
 687                                 error = vpm_sync_pages(vp, off, n, flags);
 688                         } else {
 689                                 error = segmap_release(segkmap, base, flags);
 690                         }
 691                 } else {
 692                         if (vpm_enable) {
 693                                 (void) vpm_sync_pages(vp, off, n, 0);
 694                         } else {
 695                                 (void) segmap_release(segkmap, base, 0);
 696                         }
 697                 }
 698         } while (!error && uiop->uio_resid > 0);
 699
 700         /* undo adjustment of resid */
 701         uiop->uio_resid += past_eof;
 702
 703         return (error);
 704 }
 705
 706
 707 /* ARGSUSED */
 708 static int
 709 smbfs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 710         caller_context_t *ct)
 711 {
 712         struct smb_cred scred;
 713         struct vattr    va;
 714         smbnode_t       *np;
 715         smbmntinfo_t    *smi;
 716         smb_share_t     *ssp;
 717         offset_t        endoff, limit;
 718         ssize_t         past_limit;
 719         int             error, timo;
 720         caddr_t         base;
 721         uoff_t  off;
 722         size_t          n;
 723         int             on;
 724         uint_t          flags;
 725         uoff_t  last_off;
 726         size_t          last_resid;
 727         uint_t          bsize;
 728
 729         np = VTOSMB(vp);
 730         smi = VTOSMI(vp);
 731         ssp = smi->smi_share;
 732
 733         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 734                 return (EIO);
 735
 736         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 737                 return (EIO);
 738
 739         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
 740
 741         if (vp->v_type != VREG)
 742                 return (EISDIR);
 743
 744         if (uiop->uio_resid == 0)
 745                 return (0);
 746
 747         /*
 748          * Handle ioflag bits: (FAPPEND|FSYNC|FDSYNC)
 749          */
 750         if (ioflag & (FAPPEND | FSYNC)) {
 751                 if (np->n_flag & NMODIFIED) {
 752                         smbfs_attrcache_remove(np);
 753                 }
 754         }
 755         if (ioflag & FAPPEND) {
 756                 /*
 757                  * File size can be changed by another client
 758                  *
 759                  * Todo: Consider redesigning this to use a
 760                  * handle opened for append instead.
 761                  */
 762                 va.va_mask = VATTR_SIZE;
 763                 if (error = smbfsgetattr(vp, &va, cr))
 764                         return (error);
 765                 uiop->uio_loffset = va.va_size;
 766         }
 767
 768         /*
 769          * Like NFS3, just check for 63-bit overflow.
 770          */
 771         endoff = uiop->uio_loffset + uiop->uio_resid;
 772         if (uiop->uio_loffset < 0 || endoff < 0)
 773                 return (EINVAL);
 774
 775         /*
 776          * Check to make sure that the process will not exceed
 777          * its limit on file size.  It is okay to write up to
 778          * the limit, but not beyond.  Thus, the write which
 779          * reaches the limit will be short and the next write
 780          * will return an error.
 781          *
 782          * So if we're starting at or beyond the limit, EFBIG.
 783          * Otherwise, temporarily reduce resid to the amount
 784          * that is after the limit.
 785          */
 786         limit = uiop->uio_llimit;
 787         if (limit == RLIM_INFINITY || limit > MAXOFFSET_T)
 788                 limit = MAXOFFSET_T;
 789         if (uiop->uio_loffset >= limit) {
 790                 proc_t *p = ttoproc(curthread);
 791
 792                 mutex_enter(&p->p_lock);
 793                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 794                     p->p_rctls, p, RCA_UNSAFE_SIGINFO);
 795                 mutex_exit(&p->p_lock);
 796                 return (EFBIG);
 797         }
 798         if (endoff > limit) {
 799                 past_limit = (ssize_t)(endoff - limit);
 800                 uiop->uio_resid -= past_limit;
 801         } else
 802                 past_limit = 0;
 803
 804         /*
 805          * Bypass VM if caching has been disabled (e.g., locking) or if
 806          * using client-side direct I/O and the file is not mmap'd and
 807          * there are no cached pages.
 808          */
 809         if ((vp->v_flag & VNOCACHE) ||
 810             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 811             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 812             !vn_has_cached_data(vp))) {
 813
 814 smbfs_fwrite:
 815                 if (np->r_flags & RSTALE) {
 816                         last_resid = uiop->uio_resid;
 817                         last_off = uiop->uio_loffset;
 818                         error = np->r_error;
 819                         /*
 820                          * A close may have cleared r_error, if so,
 821                          * propagate ESTALE error return properly
 822                          */
 823                         if (error == 0)
 824                                 error = ESTALE;
 825                         goto bottom;
 826                 }
 827
 828                 /* Timeout: longer for append. */
 829                 timo = smb_timo_write;
 830                 if (endoff > np->r_size)
 831                         timo = smb_timo_append;
 832
 833                 /* Shared lock for n_fid use in smb_rwuio */
 834                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 835                         return (EINTR);
 836                 smb_credinit(&scred, cr);
 837
 838                 /* After reconnect, n_fid is invalid */
 839                 if (np->n_vcgenid != ssp->ss_vcgenid)
 840                         error = ESTALE;
 841                 else
 842                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
 843                             uiop, &scred, timo);
 844
 845                 if (error == 0) {
 846                         mutex_enter(&np->r_statelock);
 847                         np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
 848                         if (uiop->uio_loffset > (offset_t)np->r_size)
 849                                 np->r_size = (len_t)uiop->uio_loffset;
 850                         mutex_exit(&np->r_statelock);
 851                         if (ioflag & (FSYNC | FDSYNC)) {
 852                                 /* Don't error the I/O if this fails. */
 853                                 (void) smbfs_smb_flush(np, &scred);
 854                         }
 855                 }
 856
 857                 smb_credrele(&scred);
 858                 smbfs_rw_exit(&np->r_lkserlock);
 859
 860                 /* undo adjustment of resid */
 861                 uiop->uio_resid += past_limit;
 862
 863                 return (error);
 864         }
 865
 866         /* (else) Do I/O through segmap. */
 867         bsize = vp->v_vfsp->vfs_bsize;
 868
 869         do {
 870                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 871                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 872                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 873
 874                 last_resid = uiop->uio_resid;
 875                 last_off = uiop->uio_loffset;
 876
 877                 if (np->r_flags & RSTALE) {
 878                         error = np->r_error;
 879                         /*
 880                          * A close may have cleared r_error, if so,
 881                          * propagate ESTALE error return properly
 882                          */
 883                         if (error == 0)
 884                                 error = ESTALE;
 885                         break;
 886                 }
 887
 888                 /*
 889                  * From NFS: Don't create dirty pages faster than they
 890                  * can be cleaned.
 891                  *
 892                  * Here NFS also checks for async writes (np->r_awcount)
 893                  */
 894                 mutex_enter(&np->r_statelock);
 895                 while (np->r_gcount > 0) {
 896                         if (SMBINTR(vp)) {
 897                                 klwp_t *lwp = ttolwp(curthread);
 898
 899                                 if (lwp != NULL)
 900                                         lwp->lwp_nostop++;
 901                                 if (!cv_wait_sig(&np->r_cv, &np->r_statelock)) {
 902                                         mutex_exit(&np->r_statelock);
 903                                         if (lwp != NULL)
 904                                                 lwp->lwp_nostop--;
 905                                         error = EINTR;
 906                                         goto bottom;
 907                                 }
 908                                 if (lwp != NULL)
 909                                         lwp->lwp_nostop--;
 910                         } else
 911                                 cv_wait(&np->r_cv, &np->r_statelock);
 912                 }
 913                 mutex_exit(&np->r_statelock);
 914
 915                 /*
 916                  * Touch the page and fault it in if it is not in core
 917                  * before segmap_getmapflt or vpm_data_copy can lock it.
 918                  * This is to avoid the deadlock if the buffer is mapped
 919                  * to the same file through mmap which we want to write.
 920                  */
 921                 uio_prefaultpages((long)n, uiop);
 922
 923                 if (vpm_enable) {
 924                         /*
 925                          * It will use kpm mappings, so no need to
 926                          * pass an address.
 927                          */
 928                         error = smbfs_writenp(np, NULL, n, uiop, 0);
 929                 } else {
 930                         if (segmap_kpm) {
 931                                 int pon = uiop->uio_loffset & PAGEOFFSET;
 932                                 size_t pn = MIN(PAGESIZE - pon,
 933                                     uiop->uio_resid);
 934                                 int pagecreate;
 935
 936                                 mutex_enter(&np->r_statelock);
 937                                 pagecreate = (pon == 0) && (pn == PAGESIZE ||
 938                                     uiop->uio_loffset + pn >= np->r_size);
 939                                 mutex_exit(&np->r_statelock);
 940
 941                                 base = segmap_getmapflt(segkmap, vp, off + on,
 942                                     pn, !pagecreate, S_WRITE);
 943
 944                                 error = smbfs_writenp(np, base + pon, n, uiop,
 945                                     pagecreate);
 946
 947                         } else {
 948                                 base = segmap_getmapflt(segkmap, vp, off + on,
 949                                     n, 0, S_READ);
 950                                 error = smbfs_writenp(np, base + on, n, uiop, 0);
 951                         }
 952                 }
 953
 954                 if (!error) {
 955                         if (smi->smi_flags & SMI_NOAC)
 956                                 flags = SM_WRITE;
 957                         else if ((uiop->uio_loffset % bsize) == 0 ||
 958                             IS_SWAPVP(vp)) {
 959                                 /*
 960                                  * Have written a whole block.
 961                                  * Start an asynchronous write
 962                                  * and mark the buffer to
 963                                  * indicate that it won't be
 964                                  * needed again soon.
 965                                  */
 966                                 flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
 967                         } else
 968                                 flags = 0;
 969                         if ((ioflag & (FSYNC|FDSYNC)) ||
 970                             (np->r_flags & ROUTOFSPACE)) {
 971                                 flags &= ~SM_ASYNC;
 972                                 flags |= SM_WRITE;
 973                         }
 974                         if (vpm_enable) {
 975                                 error = vpm_sync_pages(vp, off, n, flags);
 976                         } else {
 977                                 error = segmap_release(segkmap, base, flags);
 978                         }
 979                 } else {
 980                         if (vpm_enable) {
 981                                 (void) vpm_sync_pages(vp, off, n, 0);
 982                         } else {
 983                                 (void) segmap_release(segkmap, base, 0);
 984                         }
 985                         /*
 986                          * In the event that we got an access error while
 987                          * faulting in a page for a write-only file just
 988                          * force a write.
 989                          */
 990                         if (error == EACCES)
 991                                 goto smbfs_fwrite;
 992                 }
 993         } while (!error && uiop->uio_resid > 0);
 994
 995 bottom:
 996         /* undo adjustment of resid */
 997         if (error) {
 998                 uiop->uio_resid = last_resid + past_limit;
 999                 uiop->uio_loffset = last_off;
1000         } else {
1001                 uiop->uio_resid += past_limit;
1002         }
1003
1004         return (error);
1005 }
1006
1007 /*
1008  * Like nfs_client.c: writerp()
1009  *
1010  * Write by creating pages and uiomove data onto them.
1011  */
1012
1013 int
1014 smbfs_writenp(smbnode_t *np, caddr_t base, int tcount, struct uio *uio,
1015     int pgcreated)
1016 {
1017         int             pagecreate;
1018         int             n;
1019         int             saved_n;
1020         caddr_t         saved_base;
1021         uoff_t  offset;
1022         int             error;
1023         int             sm_error;
1024         vnode_t         *vp = SMBTOV(np);
1025
1026         ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
1027         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
1028         if (!vpm_enable) {
1029                 ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
1030         }
1031
1032         /*
1033          * Move bytes in at most PAGESIZE chunks. We must avoid
1034          * spanning pages in uiomove() because page faults may cause
1035          * the cache to be invalidated out from under us. The r_size is not
1036          * updated until after the uiomove. If we push the last page of a
1037          * file before r_size is correct, we will lose the data written past
1038          * the current (and invalid) r_size.
1039          */
1040         do {
1041                 offset = uio->uio_loffset;
1042                 pagecreate = 0;
1043
1044                 /*
1045                  * n is the number of bytes required to satisfy the request
1046                  *   or the number of bytes to fill out the page.
1047                  */
1048                 n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
1049
1050                 /*
1051                  * Check to see if we can skip reading in the page
1052                  * and just allocate the memory.  We can do this
1053                  * if we are going to rewrite the entire mapping
1054                  * or if we are going to write to or beyond the current
1055                  * end of file from the beginning of the mapping.
1056                  *
1057                  * The read of r_size is now protected by r_statelock.
1058                  */
1059                 mutex_enter(&np->r_statelock);
1060                 /*
1061                  * When pgcreated is nonzero the caller has already done
1062                  * a segmap_getmapflt with forcefault 0 and S_WRITE. With
1063                  * segkpm this means we already have at least one page
1064                  * created and mapped at base.
1065                  */
1066                 pagecreate = pgcreated ||
1067                     ((offset & PAGEOFFSET) == 0 &&
1068                     (n == PAGESIZE || ((offset + n) >= np->r_size)));
1069
1070                 mutex_exit(&np->r_statelock);
1071                 if (!vpm_enable && pagecreate) {
1072                         /*
1073                          * The last argument tells segmap_pagecreate() to
1074                          * always lock the page, as opposed to sometimes
1075                          * returning with the page locked. This way we avoid a
1076                          * fault on the ensuing uiomove(), but also
1077                          * more importantly (to fix bug 1094402) we can
1078                          * call segmap_fault() to unlock the page in all
1079                          * cases. An alternative would be to modify
1080                          * segmap_pagecreate() to tell us when it is
1081                          * locking a page, but that's a fairly major
1082                          * interface change.
1083                          */
1084                         if (pgcreated == 0)
1085                                 (void) segmap_pagecreate(segkmap, base,
1086                                     (uint_t)n, 1);
1087                         saved_base = base;
1088                         saved_n = n;
1089                 }
1090
1091                 /*
1092                  * The number of bytes of data in the last page can not
1093                  * be accurately be determined while page is being
1094                  * uiomove'd to and the size of the file being updated.
1095                  * Thus, inform threads which need to know accurately
1096                  * how much data is in the last page of the file.  They
1097                  * will not do the i/o immediately, but will arrange for
1098                  * the i/o to happen later when this modify operation
1099                  * will have finished.
1100                  */
1101                 ASSERT(!(np->r_flags & RMODINPROGRESS));
1102                 mutex_enter(&np->r_statelock);
1103                 np->r_flags |= RMODINPROGRESS;
1104                 np->r_modaddr = (offset & MAXBMASK);
1105                 mutex_exit(&np->r_statelock);
1106
1107                 if (vpm_enable) {
1108                         /*
1109                          * Copy data. If new pages are created, part of
1110                          * the page that is not written will be initizliazed
1111                          * with zeros.
1112                          */
1113                         error = vpm_data_copy(vp, offset, n, uio,
1114                             !pagecreate, NULL, 0, S_WRITE);
1115                 } else {
1116                         error = uiomove(base, n, UIO_WRITE, uio);
1117                 }
1118
1119                 /*
1120                  * r_size is the maximum number of
1121                  * bytes known to be in the file.
1122                  * Make sure it is at least as high as the
1123                  * first unwritten byte pointed to by uio_loffset.
1124                  */
1125                 mutex_enter(&np->r_statelock);
1126                 if (np->r_size < uio->uio_loffset)
1127                         np->r_size = uio->uio_loffset;
1128                 np->r_flags &= ~RMODINPROGRESS;
1129                 np->r_flags |= RDIRTY;
1130                 mutex_exit(&np->r_statelock);
1131
1132                 /* n = # of bytes written */
1133                 n = (int)(uio->uio_loffset - offset);
1134
1135                 if (!vpm_enable) {
1136                         base += n;
1137                 }
1138                 tcount -= n;
1139                 /*
1140                  * If we created pages w/o initializing them completely,
1141                  * we need to zero the part that wasn't set up.
1142                  * This happens on a most EOF write cases and if
1143                  * we had some sort of error during the uiomove.
1144                  */
1145                 if (!vpm_enable && pagecreate) {
1146                         if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
1147                                 (void) kzero(base, PAGESIZE - n);
1148
1149                         if (pgcreated) {
1150                                 /*
1151                                  * Caller is responsible for this page,
1152                                  * it was not created in this loop.
1153                                  */
1154                                 pgcreated = 0;
1155                         } else {
1156                                 /*
1157                                  * For bug 1094402: segmap_pagecreate locks
1158                                  * page. Unlock it. This also unlocks the
1159                                  * pages allocated by page_create_va() in
1160                                  * segmap_pagecreate().
1161                                  */
1162                                 sm_error = segmap_fault(kas.a_hat, segkmap,
1163                                     saved_base, saved_n,
1164                                     F_SOFTUNLOCK, S_WRITE);
1165                                 if (error == 0)
1166                                         error = sm_error;
1167                         }
1168                 }
1169         } while (tcount > 0 && error == 0);
1170
1171         return (error);
1172 }
1173
1174 /*
1175  * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
1176  * Like nfs3_rdwrlbn()
1177  */
1178 static int
1179 smbfs_rdwrlbn(vnode_t *vp, page_t *pp, uoff_t off, size_t len,
1180         int flags, cred_t *cr)
1181 {
1182         smbmntinfo_t    *smi = VTOSMI(vp);
1183         struct buf *bp;
1184         int error;
1185         int sync;
1186
1187         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1188                 return (EIO);
1189
1190         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1191                 return (EIO);
1192
1193         bp = pageio_setup(pp, len, vp, flags);
1194         ASSERT(bp != NULL);
1195
1196         /*
1197          * pageio_setup should have set b_addr to 0.  This
1198          * is correct since we want to do I/O on a page
1199          * boundary.  bp_mapin will use this addr to calculate
1200          * an offset, and then set b_addr to the kernel virtual
1201          * address it allocated for us.
1202          */
1203         ASSERT(bp->b_un.b_addr == 0);
1204
1205         bp->b_edev = 0;
1206         bp->b_dev = 0;
1207         bp->b_lblkno = lbtodb(off);
1208         bp->b_file = vp;
1209         bp->b_offset = (offset_t)off;
1210         bp_mapin(bp);
1211
1212         /*
1213          * Calculate the desired level of stability to write data.
1214          */
1215         if ((flags & (B_WRITE|B_ASYNC)) == (B_WRITE|B_ASYNC) &&
1216             freemem > desfree) {
1217                 sync = 0;
1218         } else {
1219                 sync = 1;
1220         }
1221
1222         error = smbfs_bio(bp, sync, cr);
1223
1224         bp_mapout(bp);
1225         pageio_done(bp);
1226
1227         return (error);
1228 }
1229
1230
1231 /*
1232  * Corresponds to nfs3_vnopc.c : nfs3_bio(), though the NFS code
1233  * uses nfs3read()/nfs3write() where we use smb_rwuio().  Also,
1234  * NFS has this later in the file.  Move it up here closer to
1235  * the one call site just above.
1236  */
1237
1238 static int
1239 smbfs_bio(struct buf *bp, int sync, cred_t *cr)
1240 {
1241         struct iovec aiov[1];
1242         struct uio  auio;
1243         struct smb_cred scred;
1244         smbnode_t *np = VTOSMB(bp->b_vp);
1245         smbmntinfo_t *smi = np->n_mount;
1246         smb_share_t *ssp = smi->smi_share;
1247         offset_t offset;
1248         offset_t endoff;
1249         size_t count;
1250         size_t past_eof;
1251         int error;
1252
1253         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
1254
1255         offset = ldbtob(bp->b_lblkno);
1256         count = bp->b_bcount;
1257         endoff = offset + count;
1258         if (offset < 0 || endoff < 0)
1259                 return (EINVAL);
1260
1261         /*
1262          * Limit file I/O to the remaining file size, but see
1263          * the notes in smbfs_getpage about SMBFS_EOF.
1264          */
1265         mutex_enter(&np->r_statelock);
1266         if (offset >= np->r_size) {
1267                 mutex_exit(&np->r_statelock);
1268                 if (bp->b_flags & B_READ) {
1269                         return (SMBFS_EOF);
1270                 } else {
1271                         return (EINVAL);
1272                 }
1273         }
1274         if (endoff > np->r_size) {
1275                 past_eof = (size_t)(endoff - np->r_size);
1276                 count -= past_eof;
1277         } else
1278                 past_eof = 0;
1279         mutex_exit(&np->r_statelock);
1280         ASSERT(count > 0);
1281
1282         /* Caller did bpmapin().  Mapped address is... */
1283         aiov[0].iov_base = bp->b_un.b_addr;
1284         aiov[0].iov_len = count;
1285         auio.uio_iov = aiov;
1286         auio.uio_iovcnt = 1;
1287         auio.uio_loffset = offset;
1288         auio.uio_segflg = UIO_SYSSPACE;
1289         auio.uio_fmode = 0;
1290         auio.uio_resid = count;
1291
1292         /* Shared lock for n_fid use in smb_rwuio */
1293         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER,
1294             smi->smi_flags & SMI_INT))
1295                 return (EINTR);
1296         smb_credinit(&scred, cr);
1297
1298         DTRACE_IO1(start, struct buf *, bp);
1299
1300         if (bp->b_flags & B_READ) {
1301
1302                 /* After reconnect, n_fid is invalid */
1303                 if (np->n_vcgenid != ssp->ss_vcgenid)
1304                         error = ESTALE;
1305                 else
1306                         error = smb_rwuio(ssp, np->n_fid, UIO_READ,
1307                             &auio, &scred, smb_timo_read);
1308
1309                 /* Like NFS, only set b_error here. */
1310                 bp->b_error = error;
1311                 bp->b_resid = auio.uio_resid;
1312
1313                 if (!error && auio.uio_resid != 0)
1314                         error = EIO;
1315                 if (!error && past_eof != 0) {
1316                         /* Zero the memory beyond EOF. */
1317                         bzero(bp->b_un.b_addr + count, past_eof);
1318                 }
1319         } else {
1320
1321                 /* After reconnect, n_fid is invalid */
1322                 if (np->n_vcgenid != ssp->ss_vcgenid)
1323                         error = ESTALE;
1324                 else
1325                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
1326                             &auio, &scred, smb_timo_write);
1327
1328                 /* Like NFS, only set b_error here. */
1329                 bp->b_error = error;
1330                 bp->b_resid = auio.uio_resid;
1331
1332                 if (!error && auio.uio_resid != 0)
1333                         error = EIO;
1334                 if (!error && sync) {
1335                         (void) smbfs_smb_flush(np, &scred);
1336                 }
1337         }
1338
1339         /*
1340          * This comes from nfs3_commit()
1341          */
1342         if (error != 0) {
1343                 mutex_enter(&np->r_statelock);
1344                 if (error == ESTALE)
1345                         np->r_flags |= RSTALE;
1346                 if (!np->r_error)
1347                         np->r_error = error;
1348                 mutex_exit(&np->r_statelock);
1349                 bp->b_flags |= B_ERROR;
1350         }
1351
1352         DTRACE_IO1(done, struct buf *, bp);
1353
1354         smb_credrele(&scred);
1355         smbfs_rw_exit(&np->r_lkserlock);
1356
1357         if (error == ESTALE)
1358                 smbfs_attrcache_remove(np);
1359
1360         return (error);
1361 }
1362
1363 /*
1364  * Here NFS has: nfs3write, nfs3read
1365  * We use smb_rwuio instead.
1366  */
1367
1368 /* ARGSUSED */
1369 static int
1370 smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag,
1371         cred_t *cr, int *rvalp, caller_context_t *ct)
1372 {
1373         int             error;
1374         smbmntinfo_t    *smi;
1375
1376         smi = VTOSMI(vp);
1377
1378         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1379                 return (EIO);
1380
1381         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1382                 return (EIO);
1383
1384         switch (cmd) {
1385
1386         case _FIOFFS:
1387                 error = smbfs_fsync(vp, 0, cr, ct);
1388                 break;
1389
1390                 /*
1391                  * The following two ioctls are used by bfu.
1392                  * Silently ignore to avoid bfu errors.
1393                  */
1394         case _FIOGDIO:
1395         case _FIOSDIO:
1396                 error = 0;
1397                 break;
1398
1399 #if 0   /* Todo - SMB ioctl query regions */
1400         case _FIO_SEEK_DATA:
1401         case _FIO_SEEK_HOLE:
1402 #endif
1403
1404         case _FIODIRECTIO:
1405                 error = smbfs_directio(vp, (int)arg, cr);
1406                 break;
1407
1408                 /*
1409                  * Allow get/set with "raw" security descriptor (SD) data.
1410                  * Useful for testing, diagnosing idmap problems, etc.
1411                  */
1412         case SMBFSIO_GETSD:
1413                 error = smbfs_acl_iocget(vp, arg, flag, cr);
1414                 break;
1415
1416         case SMBFSIO_SETSD:
1417                 error = smbfs_acl_iocset(vp, arg, flag, cr);
1418                 break;
1419
1420         default:
1421                 error = ENOTTY;
1422                 break;
1423         }
1424
1425         return (error);
1426 }
1427
1428
1429 /*
1430  * Return either cached or remote attributes. If get remote attr
1431  * use them to check and invalidate caches, then cache the new attributes.
1432  */
1433 /* ARGSUSED */
1434 static int
1435 smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1436         caller_context_t *ct)
1437 {
1438         smbnode_t *np;
1439         smbmntinfo_t *smi;
1440         int error;
1441
1442         smi = VTOSMI(vp);
1443
1444         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1445                 return (EIO);
1446
1447         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1448                 return (EIO);
1449
1450         /*
1451          * If it has been specified that the return value will
1452          * just be used as a hint, and we are only being asked
1453          * for size, fsid or rdevid, then return the client's
1454          * notion of these values without checking to make sure
1455          * that the attribute cache is up to date.
1456          * The whole point is to avoid an over the wire GETATTR
1457          * call.
1458          */
1459         np = VTOSMB(vp);
1460         if (flags & ATTR_HINT) {
1461                 if (vap->va_mask ==
1462                     (vap->va_mask & (VATTR_SIZE | VATTR_FSID | VATTR_RDEV))) {
1463                         mutex_enter(&np->r_statelock);
1464                         if (vap->va_mask | VATTR_SIZE)
1465                                 vap->va_size = np->r_size;
1466                         if (vap->va_mask | VATTR_FSID)
1467                                 vap->va_fsid = vp->v_vfsp->vfs_dev;
1468                         if (vap->va_mask | VATTR_RDEV)
1469                                 vap->va_rdev = vp->v_rdev;
1470                         mutex_exit(&np->r_statelock);
1471                         return (0);
1472                 }
1473         }
1474
1475         /*
1476          * Only need to flush pages if asking for the mtime
1477          * and if there any dirty pages.
1478          *
1479          * Here NFS also checks for async writes (np->r_awcount)
1480          */
1481         if (vap->va_mask & VATTR_MTIME) {
1482                 if (vn_has_cached_data(vp) &&
1483                     ((np->r_flags & RDIRTY) != 0)) {
1484                         mutex_enter(&np->r_statelock);
1485                         np->r_gcount++;
1486                         mutex_exit(&np->r_statelock);
1487                         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
1488                         mutex_enter(&np->r_statelock);
1489                         if (error && (error == ENOSPC || error == EDQUOT)) {
1490                                 if (!np->r_error)
1491                                         np->r_error = error;
1492                         }
1493                         if (--np->r_gcount == 0)
1494                                 cv_broadcast(&np->r_cv);
1495                         mutex_exit(&np->r_statelock);
1496                 }
1497         }
1498
1499         return (smbfsgetattr(vp, vap, cr));
1500 }
1501
1502 /* smbfsgetattr() in smbfs_client.c */
1503
1504 /*ARGSUSED4*/
1505 static int
1506 smbfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1507                 caller_context_t *ct)
1508 {
1509         vfs_t           *vfsp;
1510         smbmntinfo_t    *smi;
1511         int             error;
1512         uint_t          mask;
1513         struct vattr    oldva;
1514
1515         vfsp = vp->v_vfsp;
1516         smi = VFTOSMI(vfsp);
1517
1518         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1519                 return (EIO);
1520
1521         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1522                 return (EIO);
1523
1524         mask = vap->va_mask;
1525         if (mask & VATTR_NOSET)
1526                 return (EINVAL);
1527
1528         if (vfsp->vfs_flag & VFS_RDONLY)
1529                 return (EROFS);
1530
1531         /*
1532          * This is a _local_ access check so that only the owner of
1533          * this mount can set attributes.  With ACLs enabled, the
1534          * file owner can be different from the mount owner, and we
1535          * need to check the _mount_ owner here.  See _access_rwx
1536          */
1537         bzero(&oldva, sizeof (oldva));
1538         oldva.va_mask = VATTR_TYPE | VATTR_MODE;
1539         error = smbfsgetattr(vp, &oldva, cr);
1540         if (error)
1541                 return (error);
1542         oldva.va_mask |= VATTR_UID | VATTR_GID;
1543         oldva.va_uid = smi->smi_uid;
1544         oldva.va_gid = smi->smi_gid;
1545
1546         error = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
1547             smbfs_accessx, vp);
1548         if (error)
1549                 return (error);
1550
1551         if (mask & (VATTR_UID | VATTR_GID)) {
1552                 if (smi->smi_flags & SMI_ACL)
1553                         error = smbfs_acl_setids(vp, vap, cr);
1554                 else
1555                         error = ENOSYS;
1556                 if (error != 0) {
1557                         SMBVDEBUG("error %d seting UID/GID on %s",
1558                             error, VTOSMB(vp)->n_rpath);
1559                         /*
1560                          * It might be more correct to return the
1561                          * error here, but that causes complaints
1562                          * when root extracts a cpio archive, etc.
1563                          * So ignore this error, and go ahead with
1564                          * the rest of the setattr work.
1565                          */
1566                 }
1567         }
1568
1569         error = smbfssetattr(vp, vap, flags, cr);
1570
1571 #ifdef  SMBFS_VNEVENT
1572         if (error == 0 && (vap->va_mask & VATTR_SIZE) && vap->va_size == 0)
1573                 vnevent_truncate(vp, ct);
1574 #endif
1575
1576         return (error);
1577 }
1578
1579 /*
1580  * Mostly from Darwin smbfs_setattr()
1581  * but then modified a lot.
1582  */
1583 /* ARGSUSED */
1584 static int
1585 smbfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr)
1586 {
1587         int             error = 0;
1588         smbnode_t       *np = VTOSMB(vp);
1589         uint_t          mask = vap->va_mask;
1590         struct timespec *mtime, *atime;
1591         struct smb_cred scred;
1592         int             cerror, modified = 0;
1593         unsigned short  fid;
1594         int have_fid = 0;
1595         uint32_t rights = 0;
1596         uint32_t dosattr = 0;
1597
1598         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
1599
1600         /*
1601          * There are no settable attributes on the XATTR dir,
1602          * so just silently ignore these.  On XATTR files,
1603          * you can set the size but nothing else.
1604          */
1605         if (vp->v_flag & V_XATTRDIR)
1606                 return (0);
1607         if (np->n_flag & N_XATTR) {
1608                 if (mask & VATTR_TIMES)
1609                         SMBVDEBUG("ignore set time on xattr\n");
1610                 mask &= VATTR_SIZE;
1611         }
1612
1613         /*
1614          * Only need to flush pages if there are any pages and
1615          * if the file is marked as dirty in some fashion.  The
1616          * file must be flushed so that we can accurately
1617          * determine the size of the file and the cached data
1618          * after the SETATTR returns.  A file is considered to
1619          * be dirty if it is either marked with RDIRTY, has
1620          * outstanding i/o's active, or is mmap'd.  In this
1621          * last case, we can't tell whether there are dirty
1622          * pages, so we flush just to be sure.
1623          */
1624         if (vn_has_cached_data(vp) &&
1625             ((np->r_flags & RDIRTY) ||
1626             np->r_count > 0 ||
1627             np->r_mapcnt > 0)) {
1628                 ASSERT(vp->v_type != VCHR);
1629                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, NULL);
1630                 if (error && (error == ENOSPC || error == EDQUOT)) {
1631                         mutex_enter(&np->r_statelock);
1632                         if (!np->r_error)
1633                                 np->r_error = error;
1634                         mutex_exit(&np->r_statelock);
1635                 }
1636         }
1637
1638         /*
1639          * If our caller is trying to set multiple attributes, they
1640          * can make no assumption about what order they are done in.
1641          * Here we try to do them in order of decreasing likelihood
1642          * of failure, just to minimize the chance we'll wind up
1643          * with a partially complete request.
1644          */
1645
1646         /* Shared lock for (possible) n_fid use. */
1647         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
1648                 return (EINTR);
1649         smb_credinit(&scred, cr);
1650
1651         /*
1652          * If the caller has provided extensible attributes,
1653          * map those into DOS attributes supported by SMB.
1654          * Note: zero means "no change".
1655          */
1656         if (mask & VATTR_XVATTR)
1657                 dosattr = xvattr_to_dosattr(np, vap);
1658
1659         /*
1660          * Will we need an open handle for this setattr?
1661          * If so, what rights will we need?
1662          */
1663         if (dosattr || (mask & (VATTR_ATIME | VATTR_MTIME))) {
1664                 rights |=
1665                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
1666         }
1667         if (mask & VATTR_SIZE) {
1668                 rights |=
1669                     SA_RIGHT_FILE_WRITE_DATA |
1670                     SA_RIGHT_FILE_APPEND_DATA;
1671         }
1672
1673         /*
1674          * Only SIZE really requires a handle, but it's
1675          * simpler and more reliable to set via a handle.
1676          * Some servers like NT4 won't set times by path.
1677          * Also, we're usually setting everything anyway.
1678          */
1679         if (rights != 0) {
1680                 error = smbfs_smb_tmpopen(np, rights, &scred, &fid);
1681                 if (error) {
1682                         SMBVDEBUG("error %d opening %s\n",
1683                             error, np->n_rpath);
1684                         goto out;
1685                 }
1686                 have_fid = 1;
1687         }
1688
1689         /*
1690          * If the server supports the UNIX extensions, right here is where
1691          * we'd support changes to uid, gid, mode, and possibly va_flags.
1692          * For now we claim to have made any such changes.
1693          */
1694
1695         if (mask & VATTR_SIZE) {
1696                 /*
1697                  * If the new file size is less than what the client sees as
1698                  * the file size, then just change the size and invalidate
1699                  * the pages.
1700                  */
1701
1702                 /*
1703                  * Set the file size to vap->va_size.
1704                  */
1705                 ASSERT(have_fid);
1706                 error = smbfs_smb_setfsize(np, fid, vap->va_size, &scred);
1707                 if (error) {
1708                         SMBVDEBUG("setsize error %d file %s\n",
1709                             error, np->n_rpath);
1710                 } else {
1711                         /*
1712                          * Darwin had code here to zero-extend.
1713                          * Tests indicate the server will zero-fill,
1714                          * so looks like we don't need to do that.
1715                          */
1716                         mutex_enter(&np->r_statelock);
1717                         np->r_size = vap->va_size;
1718                         mutex_exit(&np->r_statelock);
1719                         modified = 1;
1720                 }
1721         }
1722
1723         /*
1724          * Todo: Implement setting create_time (which is
1725          * different from ctime).
1726          */
1727         mtime = ((mask & VATTR_MTIME) ? &vap->va_mtime : 0);
1728         atime = ((mask & VATTR_ATIME) ? &vap->va_atime : 0);
1729
1730         if (dosattr || mtime || atime) {
1731                 /*
1732                  * Always use the handle-based set attr call now.
1733                  */
1734                 ASSERT(have_fid);
1735                 error = smbfs_smb_setfattr(np, fid,
1736                     dosattr, mtime, atime, &scred);
1737                 if (error) {
1738                         SMBVDEBUG("set times error %d file %s\n",
1739                             error, np->n_rpath);
1740                 } else {
1741                         modified = 1;
1742                 }
1743         }
1744
1745 out:
1746         if (have_fid) {
1747                 cerror = smbfs_smb_tmpclose(np, fid, &scred);
1748                 if (cerror)
1749                         SMBVDEBUG("error %d closing %s\n",
1750                             cerror, np->n_rpath);
1751         }
1752
1753         smb_credrele(&scred);
1754         smbfs_rw_exit(&np->r_lkserlock);
1755
1756         if (modified) {
1757                 /*
1758                  * Invalidate attribute cache in case the server
1759                  * doesn't set exactly the attributes we asked.
1760                  */
1761                 smbfs_attrcache_remove(np);
1762
1763                 /*
1764                  * If changing the size of the file, invalidate
1765                  * any local cached data which is no longer part
1766                  * of the file.  We also possibly invalidate the
1767                  * last page in the file.  We could use
1768                  * pvn_vpzero(), but this would mark the page as
1769                  * modified and require it to be written back to
1770                  * the server for no particularly good reason.
1771                  * This way, if we access it, then we bring it
1772                  * back in.  A read should be cheaper than a
1773                  * write.
1774                  */
1775                 if (mask & VATTR_SIZE) {
1776                         smbfs_invalidate_pages(vp,
1777                             (vap->va_size & PAGEMASK), cr);
1778                 }
1779         }
1780
1781         return (error);
1782 }
1783
1784 /*
1785  * Helper function for extensible system attributes (PSARC 2007/315)
1786  * Compute the DOS attribute word to pass to _setfattr (see above).
1787  * This returns zero IFF no change is being made to attributes.
1788  * Otherwise return the new attributes or SMB_EFA_NORMAL.
1789  */
1790 static uint32_t
1791 xvattr_to_dosattr(smbnode_t *np, struct vattr *vap)
1792 {
1793         xvattr_t *xvap = (xvattr_t *)vap;
1794         xoptattr_t *xoap = NULL;
1795         uint32_t attr = np->r_attr.fa_attr;
1796         boolean_t anyset = B_FALSE;
1797
1798         if ((xoap = xva_getxoptattr(xvap)) == NULL)
1799                 return (0);
1800
1801         if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1802                 if (xoap->xoa_archive)
1803                         attr |= SMB_FA_ARCHIVE;
1804                 else
1805                         attr &= ~SMB_FA_ARCHIVE;
1806                 XVA_SET_RTN(xvap, XAT_ARCHIVE);
1807                 anyset = B_TRUE;
1808         }
1809         if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1810                 if (xoap->xoa_system)
1811                         attr |= SMB_FA_SYSTEM;
1812                 else
1813                         attr &= ~SMB_FA_SYSTEM;
1814                 XVA_SET_RTN(xvap, XAT_SYSTEM);
1815                 anyset = B_TRUE;
1816         }
1817         if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1818                 if (xoap->xoa_readonly)
1819                         attr |= SMB_FA_RDONLY;
1820                 else
1821                         attr &= ~SMB_FA_RDONLY;
1822                 XVA_SET_RTN(xvap, XAT_READONLY);
1823                 anyset = B_TRUE;
1824         }
1825         if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1826                 if (xoap->xoa_hidden)
1827                         attr |= SMB_FA_HIDDEN;
1828                 else
1829                         attr &= ~SMB_FA_HIDDEN;
1830                 XVA_SET_RTN(xvap, XAT_HIDDEN);
1831                 anyset = B_TRUE;
1832         }
1833
1834         if (anyset == B_FALSE)
1835                 return (0);     /* no change */
1836         if (attr == 0)
1837                 attr = SMB_EFA_NORMAL;
1838
1839         return (attr);
1840 }
1841
1842 /*
1843  * smbfs_access_rwx()
1844  * Common function for smbfs_access, etc.
1845  *
1846  * The security model implemented by the FS is unusual
1847  * due to the current "single user mounts" restriction:
1848  * All access under a given mount point uses the CIFS
1849  * credentials established by the owner of the mount.
1850  *
1851  * Most access checking is handled by the CIFS server,
1852  * but we need sufficient Unix access checks here to
1853  * prevent other local Unix users from having access
1854  * to objects under this mount that the uid/gid/mode
1855  * settings in the mount would not allow.
1856  *
1857  * With this model, there is a case where we need the
1858  * ability to do an access check before we have the
1859  * vnode for an object.  This function takes advantage
1860  * of the fact that the uid/gid/mode is per mount, and
1861  * avoids the need for a vnode.
1862  *
1863  * We still (sort of) need a vnode when we call
1864  * secpolicy_vnode_access, but that only uses
1865  * the vtype field, so we can use a pair of fake
1866  * vnodes that have only v_type filled in.
1867  */
1868 static int
1869 smbfs_access_rwx(vfs_t *vfsp, int vtype, int mode, cred_t *cr)
1870 {
1871         /* See the secpolicy call below. */
1872         static const vnode_t tmpl_vdir = { .v_type = VDIR };
1873         static const vnode_t tmpl_vreg = { .v_type = VREG };
1874         vattr_t         va;
1875         vnode_t         *tvp;
1876         struct smbmntinfo *smi = VFTOSMI(vfsp);
1877         int shift = 0;
1878
1879         /*
1880          * Build our (fabricated) vnode attributes.
1881          */
1882         bzero(&va, sizeof (va));
1883         va.va_mask = VATTR_TYPE | VATTR_MODE | VATTR_UID | VATTR_GID;
1884         va.va_type = vtype;
1885         va.va_mode = (vtype == VDIR) ?
1886             smi->smi_dmode : smi->smi_fmode;
1887         va.va_uid = smi->smi_uid;
1888         va.va_gid = smi->smi_gid;
1889
1890         /*
1891          * Disallow write attempts on read-only file systems,
1892          * unless the file is a device or fifo node.  Note:
1893          * Inline vn_is_readonly and IS_DEVVP here because
1894          * we may not have a vnode ptr.  Original expr. was:
1895          * (mode & VWRITE) && vn_is_readonly(vp) && !IS_DEVVP(vp))
1896          */
1897         if ((mode & VWRITE) &&
1898             (vfsp->vfs_flag & VFS_RDONLY) &&
1899             !(vtype == VCHR || vtype == VBLK || vtype == VFIFO))
1900                 return (EROFS);
1901
1902         /*
1903          * Disallow attempts to access mandatory lock files.
1904          * Similarly, expand MANDLOCK here.
1905          */
1906         if ((mode & (VWRITE | VREAD | VEXEC)) &&
1907             va.va_type == VREG && MANDMODE(va.va_mode))
1908                 return (EACCES);
1909
1910         /*
1911          * Access check is based on only
1912          * one of owner, group, public.
1913          * If not owner, then check group.
1914          * If not a member of the group,
1915          * then check public access.
1916          */
1917         if (crgetuid(cr) != va.va_uid) {
1918                 shift += 3;
1919                 if (!groupmember(va.va_gid, cr))
1920                         shift += 3;
1921         }
1922
1923         /*
1924          * We need a vnode for secpolicy_vnode_access,
1925          * but the only thing it looks at is v_type,
1926          * so pass one of the templates above.
1927          */
1928         tvp = (va.va_type == VDIR) ?
1929             (vnode_t *)&tmpl_vdir :
1930             (vnode_t *)&tmpl_vreg;
1931
1932         return (secpolicy_vnode_access2(cr, tvp, va.va_uid,
1933             va.va_mode << shift, mode));
1934 }
1935
1936 /*
1937  * See smbfs_setattr
1938  */
1939 static int
1940 smbfs_accessx(void *arg, int mode, cred_t *cr)
1941 {
1942         vnode_t *vp = arg;
1943         /*
1944          * Note: The caller has checked the current zone,
1945          * the SMI_DEAD and VFS_UNMOUNTED flags, etc.
1946          */
1947         return (smbfs_access_rwx(vp->v_vfsp, vp->v_type, mode, cr));
1948 }
1949
1950 /*
1951  * XXX
1952  * This op should support PSARC 2007/403, Modified Access Checks for CIFS
1953  */
1954 /* ARGSUSED */
1955 static int
1956 smbfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
1957 {
1958         vfs_t           *vfsp;
1959         smbmntinfo_t    *smi;
1960
1961         vfsp = vp->v_vfsp;
1962         smi = VFTOSMI(vfsp);
1963
1964         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1965                 return (EIO);
1966
1967         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1968                 return (EIO);
1969
1970         return (smbfs_access_rwx(vfsp, vp->v_type, mode, cr));
1971 }
1972
1973
1974 /* ARGSUSED */
1975 static int
1976 smbfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1977 {
1978         /* Not yet... */
1979         return (ENOSYS);
1980 }
1981
1982
1983 /*
1984  * Flush local dirty pages to stable storage on the server.
1985  *
1986  * If FNODSYNC is specified, then there is nothing to do because
1987  * metadata changes are not cached on the client before being
1988  * sent to the server.
1989  */
1990 /* ARGSUSED */
1991 static int
1992 smbfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
1993 {
1994         int             error = 0;
1995         smbmntinfo_t    *smi;
1996         smbnode_t       *np;
1997         struct smb_cred scred;
1998
1999         np = VTOSMB(vp);
2000         smi = VTOSMI(vp);
2001
2002         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2003                 return (EIO);
2004
2005         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2006                 return (EIO);
2007
2008         if ((syncflag & FNODSYNC) || IS_SWAPVP(vp))
2009                 return (0);
2010
2011         if ((syncflag & (FSYNC|FDSYNC)) == 0)
2012                 return (0);
2013
2014         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
2015         if (error)
2016                 return (error);
2017
2018         /* Shared lock for n_fid use in _flush */
2019         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
2020                 return (EINTR);
2021         smb_credinit(&scred, cr);
2022
2023         error = smbfs_smb_flush(np, &scred);
2024
2025         smb_credrele(&scred);
2026         smbfs_rw_exit(&np->r_lkserlock);
2027
2028         return (error);
2029 }
2030
2031 /*
2032  * Last reference to vnode went away.
2033  */
2034 /* ARGSUSED */
2035 static void
2036 smbfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
2037 {
2038         struct smb_cred scred;
2039         smbnode_t       *np = VTOSMB(vp);
2040         int error;
2041
2042         /*
2043          * Don't "bail out" for VFS_UNMOUNTED here,
2044          * as we want to do cleanup, etc.
2045          * See also pcfs_inactive
2046          */
2047
2048         /*
2049          * If this is coming from the wrong zone, we let someone in the right
2050          * zone take care of it asynchronously.  We can get here due to
2051          * VN_RELE() being called from pageout() or fsflush().  This call may
2052          * potentially turn into an expensive no-op if, for instance, v_count
2053          * gets incremented in the meantime, but it's still correct.
2054          */
2055
2056         /*
2057          * From NFS:rinactive()
2058          *
2059          * Before freeing anything, wait until all asynchronous
2060          * activity is done on this rnode.  This will allow all
2061          * asynchronous read ahead and write behind i/o's to
2062          * finish.
2063          */
2064         mutex_enter(&np->r_statelock);
2065         while (np->r_count > 0)
2066                 cv_wait(&np->r_cv, &np->r_statelock);
2067         mutex_exit(&np->r_statelock);
2068
2069         /*
2070          * Flush and invalidate all pages associated with the vnode.
2071          */
2072         if (vn_has_cached_data(vp)) {
2073                 if ((np->r_flags & RDIRTY) && !np->r_error) {
2074                         error = smbfs_putpage(vp, 0, 0, 0, cr, ct);
2075                         if (error && (error == ENOSPC || error == EDQUOT)) {
2076                                 mutex_enter(&np->r_statelock);
2077                                 if (!np->r_error)
2078                                         np->r_error = error;
2079                                 mutex_exit(&np->r_statelock);
2080                         }
2081                 }
2082                 smbfs_invalidate_pages(vp, 0, cr);
2083         }
2084         /*
2085          * This vnode should have lost all cached data.
2086          */
2087         ASSERT(vn_has_cached_data(vp) == 0);
2088
2089         /*
2090          * Defend against the possibility that higher-level callers
2091          * might not correctly balance open and close calls.  If we
2092          * get here with open references remaining, it means there
2093          * was a missing fop_close somewhere.  If that happens, do
2094          * the close here so we don't "leak" FIDs on the server.
2095          *
2096          * Exclusive lock for modifying n_fid stuff.
2097          * Don't want this one ever interruptible.
2098          */
2099         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
2100         smb_credinit(&scred, cr);
2101
2102         switch (np->n_ovtype) {
2103         case VNON:
2104                 /* not open (OK) */
2105                 break;
2106
2107         case VDIR:
2108                 if (np->n_dirrefs == 0)
2109                         break;
2110                 SMBVDEBUG("open dir: refs %d path %s\n",
2111                     np->n_dirrefs, np->n_rpath);
2112                 /* Force last close. */
2113                 np->n_dirrefs = 1;
2114                 smbfs_rele_fid(np, &scred);
2115                 break;
2116
2117         case VREG:
2118                 if (np->n_fidrefs == 0)
2119                         break;
2120                 SMBVDEBUG("open file: refs %d id 0x%x path %s\n",
2121                     np->n_fidrefs, np->n_fid, np->n_rpath);
2122                 /* Force last close. */
2123                 np->n_fidrefs = 1;
2124                 smbfs_rele_fid(np, &scred);
2125                 break;
2126
2127         default:
2128                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
2129                 np->n_ovtype = VNON;
2130                 break;
2131         }
2132
2133         smb_credrele(&scred);
2134         smbfs_rw_exit(&np->r_lkserlock);
2135
2136         /*
2137          * XATTR directories (and the files under them) have
2138          * little value for reclaim, so just remove them from
2139          * the "hash" (AVL) as soon as they go inactive.
2140          * Note that the node may already have been removed
2141          * from the hash by smbfsremove.
2142          */
2143         if ((np->n_flag & N_XATTR) != 0 &&
2144             (np->r_flags & RHASHED) != 0)
2145                 smbfs_rmhash(np);
2146
2147         smbfs_addfree(np);
2148 }
2149
2150 /*
2151  * Remote file system operations having to do with directory manipulation.
2152  */
2153 /* ARGSUSED */
2154 static int
2155 smbfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
2156         int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
2157         int *direntflags, pathname_t *realpnp)
2158 {
2159         vfs_t           *vfs;
2160         smbmntinfo_t    *smi;
2161         smbnode_t       *dnp;
2162         int             error;
2163
2164         vfs = dvp->v_vfsp;
2165         smi = VFTOSMI(vfs);
2166
2167         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2168                 return (EPERM);
2169
2170         if (smi->smi_flags & SMI_DEAD || vfs->vfs_flag & VFS_UNMOUNTED)
2171                 return (EIO);
2172
2173         dnp = VTOSMB(dvp);
2174
2175         /*
2176          * Are we looking up extended attributes?  If so, "dvp" is
2177          * the file or directory for which we want attributes, and
2178          * we need a lookup of the (faked up) attribute directory
2179          * before we lookup the rest of the path.
2180          */
2181         if (flags & LOOKUP_XATTR) {
2182                 /*
2183                  * Require the xattr mount option.
2184                  */
2185                 if ((vfs->vfs_flag & VFS_XATTR) == 0)
2186                         return (EINVAL);
2187
2188                 error = smbfs_get_xattrdir(dvp, vpp, cr, flags);
2189                 return (error);
2190         }
2191
2192         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_READER, SMBINTR(dvp)))
2193                 return (EINTR);
2194
2195         error = smbfslookup(dvp, nm, vpp, cr, 1, ct);
2196
2197         smbfs_rw_exit(&dnp->r_rwlock);
2198
2199         return (error);
2200 }
2201
2202 /* ARGSUSED */
2203 static int
2204 smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
2205         int cache_ok, caller_context_t *ct)
2206 {
2207         int             error;
2208         int             supplen; /* supported length */
2209         vnode_t         *vp;
2210         smbnode_t       *np;
2211         smbnode_t       *dnp;
2212         smbmntinfo_t    *smi;
2213         /* struct smb_vc        *vcp; */
2214         const char      *ill;
2215         const char      *name = (const char *)nm;
2216         int             nmlen = strlen(nm);
2217         int             rplen;
2218         struct smb_cred scred;
2219         struct smbfattr fa;
2220
2221         smi = VTOSMI(dvp);
2222         dnp = VTOSMB(dvp);
2223
2224         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
2225
2226 #ifdef NOT_YET
2227         vcp = SSTOVC(smi->smi_share);
2228
2229         /* XXX: Should compute this once and store it in smbmntinfo_t */
2230         supplen = (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) ? 255 : 12;
2231 #else
2232         supplen = 255;
2233 #endif
2234
2235         /*
2236          * RWlock must be held, either reader or writer.
2237          */
2238         ASSERT(dnp->r_rwlock.count != 0);
2239
2240         /*
2241          * If lookup is for "", just return dvp.
2242          * No need to perform any access checks.
2243          */
2244         if (nmlen == 0) {
2245                 VN_HOLD(dvp);
2246                 *vpp = dvp;
2247                 return (0);
2248         }
2249
2250         /*
2251          * Can't do lookups in non-directories.
2252          */
2253         if (dvp->v_type != VDIR)
2254                 return (ENOTDIR);
2255
2256         /*
2257          * Need search permission in the directory.
2258          */
2259         error = smbfs_access(dvp, VEXEC, 0, cr, ct);
2260         if (error)
2261                 return (error);
2262
2263         /*
2264          * If lookup is for ".", just return dvp.
2265          * Access check was done above.
2266          */
2267         if (nmlen == 1 && name[0] == '.') {
2268                 VN_HOLD(dvp);
2269                 *vpp = dvp;
2270                 return (0);
2271         }
2272
2273         /*
2274          * Now some sanity checks on the name.
2275          * First check the length.
2276          */
2277         if (nmlen > supplen)
2278                 return (ENAMETOOLONG);
2279
2280         /*
2281          * Avoid surprises with characters that are
2282          * illegal in Windows file names.
2283          * Todo: CATIA mappings?
2284          */
2285         ill = illegal_chars;
2286         if (dnp->n_flag & N_XATTR)
2287                 ill++; /* allow colon */
2288         if (strpbrk(nm, ill))
2289                 return (EINVAL);
2290
2291         /*
2292          * Special handling for lookup of ".."
2293          *
2294          * We keep full pathnames (as seen on the server)
2295          * so we can just trim off the last component to
2296          * get the full pathname of the parent.  Note:
2297          * We don't actually copy and modify, but just
2298          * compute the trimmed length and pass that with
2299          * the current dir path (not null terminated).
2300          *
2301          * We don't go over-the-wire to get attributes
2302          * for ".." because we know it's a directory,
2303          * and we can just leave the rest "stale"
2304          * until someone does a getattr.
2305          */
2306         if (nmlen == 2 && name[0] == '.' && name[1] == '.') {
2307                 if (dvp->v_flag & VROOT) {
2308                         /*
2309                          * Already at the root.  This can happen
2310                          * with directory listings at the root,
2311                          * which lookup "." and ".." to get the
2312                          * inode numbers.  Let ".." be the same
2313                          * as "." in the FS root.
2314                          */
2315                         VN_HOLD(dvp);
2316                         *vpp = dvp;
2317                         return (0);
2318                 }
2319
2320                 /*
2321                  * Special case for XATTR directory
2322                  */
2323                 if (dvp->v_flag & V_XATTRDIR) {
2324                         error = smbfs_xa_parent(dvp, vpp);
2325                         return (error);
2326                 }
2327
2328                 /*
2329                  * Find the parent path length.
2330                  */
2331                 rplen = dnp->n_rplen;
2332                 ASSERT(rplen > 0);
2333                 while (--rplen >= 0) {
2334                         if (dnp->n_rpath[rplen] == '\\')
2335                                 break;
2336                 }
2337                 if (rplen <= 0) {
2338                         /* Found our way to the root. */
2339                         vp = SMBTOV(smi->smi_root);
2340                         VN_HOLD(vp);
2341                         *vpp = vp;
2342                         return (0);
2343                 }
2344                 np = smbfs_node_findcreate(smi,
2345                     dnp->n_rpath, rplen, NULL, 0, 0,
2346                     &smbfs_fattr0); /* force create */
2347                 ASSERT(np != NULL);
2348                 vp = SMBTOV(np);
2349                 vp->v_type = VDIR;
2350
2351                 /* Success! */
2352                 *vpp = vp;
2353                 return (0);
2354         }
2355
2356         /*
2357          * Normal lookup of a name under this directory.
2358          * Note we handled "", ".", ".." above.
2359          */
2360         if (cache_ok) {
2361                 /*
2362                  * The caller indicated that it's OK to use a
2363                  * cached result for this lookup, so try to
2364                  * reclaim a node from the smbfs node cache.
2365                  */
2366                 error = smbfslookup_cache(dvp, nm, nmlen, &vp, cr);
2367                 if (error)
2368                         return (error);
2369                 if (vp != NULL) {
2370                         /* hold taken in lookup_cache */
2371                         *vpp = vp;
2372                         return (0);
2373                 }
2374         }
2375
2376         /*
2377          * OK, go over-the-wire to get the attributes,
2378          * then create the node.
2379          */
2380         smb_credinit(&scred, cr);
2381         /* Note: this can allocate a new "name" */
2382         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fa, &scred);
2383         smb_credrele(&scred);
2384         if (error == ENOTDIR) {
2385                 /*
2386                  * Lookup failed because this directory was
2387                  * removed or renamed by another client.
2388                  * Remove any cached attributes under it.
2389                  */
2390                 smbfs_attrcache_remove(dnp);
2391                 smbfs_attrcache_prune(dnp);
2392         }
2393         if (error)
2394                 goto out;
2395
2396         error = smbfs_nget(dvp, name, nmlen, &fa, &vp);
2397         if (error)
2398                 goto out;
2399
2400         /* Success! */
2401         *vpp = vp;
2402
2403 out:
2404         /* smbfs_smb_lookup may have allocated name. */
2405         if (name != nm)
2406                 smbfs_name_free(name, nmlen);
2407
2408         return (error);
2409 }
2410
2411 /*
2412  * smbfslookup_cache
2413  *
2414  * Try to reclaim a node from the smbfs node cache.
2415  * Some statistics for DEBUG.
2416  *
2417  * This mechanism lets us avoid many of the five (or more)
2418  * OtW lookup calls per file seen with "ls -l" if we search
2419  * the smbfs node cache for recently inactive(ated) nodes.
2420  */
2421 #ifdef DEBUG
2422 int smbfs_lookup_cache_calls = 0;
2423 int smbfs_lookup_cache_error = 0;
2424 int smbfs_lookup_cache_miss = 0;
2425 int smbfs_lookup_cache_stale = 0;
2426 int smbfs_lookup_cache_hits = 0;
2427 #endif /* DEBUG */
2428
2429 /* ARGSUSED */
2430 static int
2431 smbfslookup_cache(vnode_t *dvp, char *nm, int nmlen,
2432         vnode_t **vpp, cred_t *cr)
2433 {
2434         struct vattr va;
2435         smbnode_t *dnp;
2436         smbnode_t *np;
2437         vnode_t *vp;
2438         int error;
2439         char sep;
2440
2441         dnp = VTOSMB(dvp);
2442         *vpp = NULL;
2443
2444 #ifdef DEBUG
2445         smbfs_lookup_cache_calls++;
2446 #endif
2447
2448         /*
2449          * First make sure we can get attributes for the
2450          * directory.  Cached attributes are OK here.
2451          * If we removed or renamed the directory, this
2452          * will return ENOENT.  If someone else removed
2453          * this directory or file, we'll find out when we
2454          * try to open or get attributes.
2455          */
2456         va.va_mask = VATTR_TYPE | VATTR_MODE;
2457         error = smbfsgetattr(dvp, &va, cr);
2458         if (error) {
2459 #ifdef DEBUG
2460                 smbfs_lookup_cache_error++;
2461 #endif
2462                 return (error);
2463         }
2464
2465         /*
2466          * Passing NULL smbfattr here so we will
2467          * just look, not create.
2468          */
2469         sep = SMBFS_DNP_SEP(dnp);
2470         np = smbfs_node_findcreate(dnp->n_mount,
2471             dnp->n_rpath, dnp->n_rplen,
2472             nm, nmlen, sep, NULL);
2473         if (np == NULL) {
2474 #ifdef DEBUG
2475                 smbfs_lookup_cache_miss++;
2476 #endif
2477                 return (0);
2478         }
2479
2480         /*
2481          * Found it.  Attributes still valid?
2482          */
2483         vp = SMBTOV(np);
2484         if (np->r_attrtime <= gethrtime()) {
2485                 /* stale */
2486 #ifdef DEBUG
2487                 smbfs_lookup_cache_stale++;
2488 #endif
2489                 VN_RELE(vp);
2490                 return (0);
2491         }
2492
2493         /*
2494          * Success!
2495          * Caller gets hold from smbfs_node_findcreate
2496          */
2497 #ifdef DEBUG
2498         smbfs_lookup_cache_hits++;
2499 #endif
2500         *vpp = vp;
2501         return (0);
2502 }
2503
2504
2505 /*
2506  * XXX
2507  * vsecattr_t is new to build 77, and we need to eventually support
2508  * it in order to create an ACL when an object is created.
2509  *
2510  * This op should support the new FIGNORECASE flag for case-insensitive
2511  * lookups, per PSARC 2007/244.
2512  */
2513 /* ARGSUSED */
2514 static int
2515 smbfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
2516         int mode, vnode_t **vpp, cred_t *cr, int flags, caller_context_t *ct,
2517         vsecattr_t *vsecp)
2518 {
2519         int             error;
2520         int             cerror;
2521         vfs_t           *vfsp;
2522         vnode_t         *vp;
2523         smbnode_t       *np;
2524         smbnode_t       *dnp;
2525         smbmntinfo_t    *smi;
2526         struct vattr    vattr;
2527         struct smbfattr fattr;
2528         struct smb_cred scred;
2529         const char *name = (const char *)nm;
2530         int             nmlen = strlen(nm);
2531         uint32_t        disp;
2532         uint16_t        fid;
2533         int             xattr;
2534
2535         vfsp = dvp->v_vfsp;
2536         smi = VFTOSMI(vfsp);
2537         dnp = VTOSMB(dvp);
2538         vp = NULL;
2539
2540         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2541                 return (EPERM);
2542
2543         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
2544                 return (EIO);
2545
2546         /*
2547          * Note: this may break mknod(2) calls to create a directory,
2548          * but that's obscure use.  Some other filesystems do this.
2549          * Todo: redirect VDIR type here to _mkdir.
2550          */
2551         if (va->va_type != VREG)
2552                 return (EINVAL);
2553
2554         /*
2555          * If the pathname is "", just use dvp, no checks.
2556          * Do this outside of the rwlock (like zfs).
2557          */
2558         if (nmlen == 0) {
2559                 VN_HOLD(dvp);
2560                 *vpp = dvp;
2561                 return (0);
2562         }
2563
2564         /* Don't allow "." or ".." through here. */
2565         if ((nmlen == 1 && name[0] == '.') ||
2566             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
2567                 return (EISDIR);
2568
2569         /*
2570          * We make a copy of the attributes because the caller does not
2571          * expect us to change what va points to.
2572          */
2573         vattr = *va;
2574
2575         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2576                 return (EINTR);
2577         smb_credinit(&scred, cr);
2578
2579         /*
2580          * NFS needs to go over the wire, just to be sure whether the
2581          * file exists or not.  Using a cached result is dangerous in
2582          * this case when making a decision regarding existence.
2583          *
2584          * The SMB protocol does NOT really need to go OTW here
2585          * thanks to the expressive NTCREATE disposition values.
2586          * Unfortunately, to do Unix access checks correctly,
2587          * we need to know if the object already exists.
2588          * When the object does not exist, we need VWRITE on
2589          * the directory.  Note: smbfslookup() checks VEXEC.
2590          */
2591         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2592         if (error == 0) {
2593                 /*
2594                  * The file already exists.  Error?
2595                  * NB: have a hold from smbfslookup
2596                  */
2597                 if (exclusive == EXCL) {
2598                         error = EEXIST;
2599                         VN_RELE(vp);
2600                         goto out;
2601                 }
2602                 /*
2603                  * Verify requested access.
2604                  */
2605                 error = smbfs_access(vp, mode, 0, cr, ct);
2606                 if (error) {
2607                         VN_RELE(vp);
2608                         goto out;
2609                 }
2610
2611                 /*
2612                  * Truncate (if requested).
2613                  */
2614                 if ((vattr.va_mask & VATTR_SIZE) && vp->v_type == VREG) {
2615                         vattr.va_mask = VATTR_SIZE;
2616                         error = smbfssetattr(vp, &vattr, 0, cr);
2617                         if (error) {
2618                                 VN_RELE(vp);
2619                                 goto out;
2620                         }
2621 #ifdef  SMBFS_VNEVENT
2622                         /* Existing file was truncated */
2623                         vnevent_create(vp, ct);
2624 #endif
2625                         /* invalidate pages done in smbfssetattr() */
2626                 }
2627                 /* Success! */
2628                 *vpp = vp;
2629                 goto out;
2630         }
2631
2632         /*
2633          * The file did not exist.  Need VWRITE in the directory.
2634          */
2635         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
2636         if (error)
2637                 goto out;
2638
2639         /*
2640          * Now things get tricky.  We also need to check the
2641          * requested open mode against the file we may create.
2642          * See comments at smbfs_access_rwx
2643          */
2644         error = smbfs_access_rwx(vfsp, VREG, mode, cr);
2645         if (error)
2646                 goto out;
2647
2648         /*
2649          * Now the code derived from Darwin,
2650          * but with greater use of NT_CREATE
2651          * disposition options.  Much changed.
2652          *
2653          * Create (or open) a new child node.
2654          * Note we handled "." and ".." above.
2655          */
2656
2657         if (exclusive == EXCL)
2658                 disp = NTCREATEX_DISP_CREATE;
2659         else {
2660                 /* Truncate regular files if requested. */
2661                 if ((va->va_type == VREG) &&
2662                     (va->va_mask & VATTR_SIZE) &&
2663                     (va->va_size == 0))
2664                         disp = NTCREATEX_DISP_OVERWRITE_IF;
2665                 else
2666                         disp = NTCREATEX_DISP_OPEN_IF;
2667         }
2668         xattr = (dnp->n_flag & N_XATTR) ? 1 : 0;
2669         error = smbfs_smb_create(dnp,
2670             name, nmlen, xattr,
2671             disp, &scred, &fid);
2672         if (error)
2673                 goto out;
2674
2675         /*
2676          * Should use the fid to get/set the size
2677          * while we have it opened here.  See above.
2678          */
2679
2680         cerror = smbfs_smb_close(smi->smi_share, fid, NULL, &scred);
2681         if (cerror)
2682                 SMBVDEBUG("error %d closing %s\\%s\n",
2683                     cerror, dnp->n_rpath, name);
2684
2685         /*
2686          * In the open case, the name may differ a little
2687          * from what we passed to create (case, etc.)
2688          * so call lookup to get the (opened) name.
2689          *
2690          * XXX: Could avoid this extra lookup if the
2691          * "createact" result from NT_CREATE says we
2692          * created the object.
2693          */
2694         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
2695         if (error)
2696                 goto out;
2697
2698         /* update attr and directory cache */
2699         smbfs_attr_touchdir(dnp);
2700
2701         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
2702         if (error)
2703                 goto out;
2704
2705         /* Success! */
2706         *vpp = vp;
2707         error = 0;
2708
2709 out:
2710         smb_credrele(&scred);
2711         smbfs_rw_exit(&dnp->r_rwlock);
2712         if (name != nm)
2713                 smbfs_name_free(name, nmlen);
2714         return (error);
2715 }
2716
2717 /*
2718  * XXX
2719  * This op should support the new FIGNORECASE flag for case-insensitive
2720  * lookups, per PSARC 2007/244.
2721  */
2722 /* ARGSUSED */
2723 static int
2724 smbfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
2725         int flags)
2726 {
2727         struct smb_cred scred;
2728         vnode_t         *vp = NULL;
2729         smbnode_t       *dnp = VTOSMB(dvp);
2730         smbmntinfo_t    *smi = VTOSMI(dvp);
2731         int             error;
2732
2733         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2734                 return (EPERM);
2735
2736         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2737                 return (EIO);
2738
2739         /*
2740          * Verify access to the dirctory.
2741          */
2742         error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
2743         if (error)
2744                 return (error);
2745
2746         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2747                 return (EINTR);
2748         smb_credinit(&scred, cr);
2749
2750         /* Lookup the file to remove. */
2751         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2752         if (error == 0) {
2753                 /*
2754                  * Do the real remove work
2755                  */
2756                 error = smbfsremove(dvp, vp, &scred, flags);
2757                 VN_RELE(vp);
2758         }
2759
2760         smb_credrele(&scred);
2761         smbfs_rw_exit(&dnp->r_rwlock);
2762
2763         return (error);
2764 }
2765
2766 /*
2767  * smbfsremove does the real work of removing in SMBFS
2768  * Caller has done dir access checks etc.
2769  *
2770  * The normal way to delete a file over SMB is open it (with DELETE access),
2771  * set the "delete-on-close" flag, and close the file.  The problem for Unix
2772  * applications is that they expect the file name to be gone once the unlink
2773  * completes, and the SMB server does not actually delete the file until ALL
2774  * opens of that file are closed.  We can't assume our open handles are the
2775  * only open handles on a file we're deleting, so to be safe we'll try to
2776  * rename the file to a temporary name and then set delete-on-close.  If we
2777  * fail to set delete-on-close (i.e. because other opens prevent it) then
2778  * undo the changes we made and give up with EBUSY.  Note that we might have
2779  * permission to delete a file but lack permission to rename, so we want to
2780  * continue in cases where rename fails.  As an optimization, only do the
2781  * rename when we have the file open.
2782  *
2783  * This is similar to what NFS does when deleting a file that has local opens,
2784  * but thanks to SMB delete-on-close, we don't need to keep track of when the
2785  * last local open goes away and send a delete.  The server does that for us.
2786  */
2787 /* ARGSUSED */
2788 static int
2789 smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
2790     int flags)
2791 {
2792         smbnode_t       *dnp = VTOSMB(dvp);
2793         smbnode_t       *np = VTOSMB(vp);
2794         char            *tmpname = NULL;
2795         int             tnlen;
2796         int             error;
2797         unsigned short  fid;
2798         boolean_t       have_fid = B_FALSE;
2799         boolean_t       renamed = B_FALSE;
2800
2801         /*
2802          * The dvp RWlock must be held as writer.
2803          */
2804         ASSERT(dnp->r_rwlock.owner == curthread);
2805
2806         /* Never allow link/unlink directories on SMB. */
2807         if (vp->v_type == VDIR)
2808                 return (EPERM);
2809
2810         /*
2811          * We need to flush any dirty pages which happen to
2812          * be hanging around before removing the file.  This
2813          * shouldn't happen very often and mostly on file
2814          * systems mounted "nocto".
2815          */
2816         if (vn_has_cached_data(vp) &&
2817             ((np->r_flags & RDIRTY) || np->r_count > 0)) {
2818                 error = smbfs_putpage(vp, (offset_t)0, 0, 0,
2819                     scred->scr_cred, NULL);
2820                 if (error && (error == ENOSPC || error == EDQUOT)) {
2821                         mutex_enter(&np->r_statelock);
2822                         if (!np->r_error)
2823                                 np->r_error = error;
2824                         mutex_exit(&np->r_statelock);
2825                 }
2826         }
2827
2828         /* Shared lock for n_fid use in smbfs_smb_setdisp etc. */
2829         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
2830                 return (EINTR);
2831
2832         /*
2833          * Get a file handle with delete access.
2834          * Close this FID before return.
2835          */
2836         error = smbfs_smb_tmpopen(np, STD_RIGHT_DELETE_ACCESS,
2837             scred, &fid);
2838         if (error) {
2839                 SMBVDEBUG("error %d opening %s\n",
2840                     error, np->n_rpath);
2841                 goto out;
2842         }
2843         have_fid = B_TRUE;
2844
2845         /*
2846          * If we have the file open, try to rename it to a temporary name.
2847          * If we can't rename, continue on and try setting DoC anyway.
2848          */
2849         if ((vp->v_count > 1) && (np->n_fidrefs > 0)) {
2850                 tmpname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2851                 tnlen = smbfs_newname(tmpname, MAXNAMELEN);
2852                 error = smbfs_smb_t2rename(np, tmpname, tnlen, scred, fid, 0);
2853                 if (error != 0) {
2854                         SMBVDEBUG("error %d renaming %s -> %s\n",
2855                             error, np->n_rpath, tmpname);
2856                         /* Keep going without the rename. */
2857                 } else {
2858                         renamed = B_TRUE;
2859                 }
2860         }
2861
2862         /*
2863          * Mark the file as delete-on-close.  If we can't,
2864          * undo what we did and err out.
2865          */
2866         error = smbfs_smb_setdisp(np, fid, 1, scred);
2867         if (error != 0) {
2868                 SMBVDEBUG("error %d setting DoC on %s\n",
2869                     error, np->n_rpath);
2870                 /*
2871                  * Failed to set DoC. If we renamed, undo that.
2872                  * Need np->n_rpath relative to parent (dnp).
2873                  * Use parent path name length plus one for
2874                  * the separator ('/' or ':')
2875                  */
2876                 if (renamed) {
2877                         char *oldname;
2878                         int oldnlen;
2879                         int err2;
2880
2881                         oldname = np->n_rpath + (dnp->n_rplen + 1);
2882                         oldnlen = np->n_rplen - (dnp->n_rplen + 1);
2883                         err2 = smbfs_smb_t2rename(np, oldname, oldnlen,
2884                             scred, fid, 0);
2885                         SMBVDEBUG("error %d un-renaming %s -> %s\n",
2886                             err2, tmpname, np->n_rpath);
2887                 }
2888                 error = EBUSY;
2889                 goto out;
2890         }
2891         /* Done! */
2892         smbfs_attrcache_prune(np);
2893
2894 #ifdef  SMBFS_VNEVENT
2895         vnevent_remove(vp, dvp, nm, ct);
2896 #endif
2897
2898 out:
2899         if (tmpname != NULL)
2900                 kmem_free(tmpname, MAXNAMELEN);
2901
2902         if (have_fid)
2903                 (void) smbfs_smb_tmpclose(np, fid, scred);
2904         smbfs_rw_exit(&np->r_lkserlock);
2905
2906         if (error == 0) {
2907                 /* Keep lookup from finding this node anymore. */
2908                 smbfs_rmhash(np);
2909         }
2910
2911         return (error);
2912 }
2913
2914
2915 /* ARGSUSED */
2916 static int
2917 smbfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
2918         caller_context_t *ct, int flags)
2919 {
2920         /* Not yet... */
2921         return (ENOSYS);
2922 }
2923
2924
2925 /*
2926  * XXX
2927  * This op should support the new FIGNORECASE flag for case-insensitive
2928  * lookups, per PSARC 2007/244.
2929  */
2930 /* ARGSUSED */
2931 static int
2932 smbfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2933         caller_context_t *ct, int flags)
2934 {
2935         struct smb_cred scred;
2936         smbnode_t       *odnp = VTOSMB(odvp);
2937         smbnode_t       *ndnp = VTOSMB(ndvp);
2938         vnode_t         *ovp;
2939         int error;
2940
2941         if (curproc->p_zone != VTOSMI(odvp)->smi_zone_ref.zref_zone ||
2942             curproc->p_zone != VTOSMI(ndvp)->smi_zone_ref.zref_zone)
2943                 return (EPERM);
2944
2945         if (VTOSMI(odvp)->smi_flags & SMI_DEAD ||
2946             VTOSMI(ndvp)->smi_flags & SMI_DEAD ||
2947             odvp->v_vfsp->vfs_flag & VFS_UNMOUNTED ||
2948             ndvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2949                 return (EIO);
2950
2951         if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
2952             strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0)
2953                 return (EINVAL);
2954
2955         /*
2956          * Check that everything is on the same filesystem.
2957          * vn_rename checks the fsid's, but in case we don't
2958          * fill those in correctly, check here too.
2959          */
2960         if (odvp->v_vfsp != ndvp->v_vfsp)
2961                 return (EXDEV);
2962
2963         /*
2964          * Need write access on source and target.
2965          * Server takes care of most checks.
2966          */
2967         error = smbfs_access(odvp, VWRITE|VEXEC, 0, cr, ct);
2968         if (error)
2969                 return (error);
2970         if (odvp != ndvp) {
2971                 error = smbfs_access(ndvp, VWRITE, 0, cr, ct);
2972                 if (error)
2973                         return (error);
2974         }
2975
2976         /*
2977          * Need to lock both old/new dirs as writer.
2978          *
2979          * Avoid deadlock here on old vs new directory nodes
2980          * by always taking the locks in order of address.
2981          * The order is arbitrary, but must be consistent.
2982          */
2983         if (odnp < ndnp) {
2984                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
2985                     SMBINTR(odvp)))
2986                         return (EINTR);
2987                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
2988                     SMBINTR(ndvp))) {
2989                         smbfs_rw_exit(&odnp->r_rwlock);
2990                         return (EINTR);
2991                 }
2992         } else {
2993                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
2994                     SMBINTR(ndvp)))
2995                         return (EINTR);
2996                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
2997                     SMBINTR(odvp))) {
2998                         smbfs_rw_exit(&ndnp->r_rwlock);
2999                         return (EINTR);
3000                 }
3001         }
3002         smb_credinit(&scred, cr);
3003
3004         /* Lookup the "old" name */
3005         error = smbfslookup(odvp, onm, &ovp, cr, 0, ct);
3006         if (error == 0) {
3007                 /*
3008                  * Do the real rename work
3009                  */
3010                 error = smbfsrename(odvp, ovp, ndvp, nnm, &scred, flags);
3011                 VN_RELE(ovp);
3012         }
3013
3014         smb_credrele(&scred);
3015         smbfs_rw_exit(&odnp->r_rwlock);
3016         smbfs_rw_exit(&ndnp->r_rwlock);
3017
3018         return (error);
3019 }
3020
3021 /*
3022  * smbfsrename does the real work of renaming in SMBFS
3023  * Caller has done dir access checks etc.
3024  */
3025 /* ARGSUSED */
3026 static int
3027 smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp, char *nnm,
3028     struct smb_cred *scred, int flags)
3029 {
3030         smbnode_t       *odnp = VTOSMB(odvp);
3031         smbnode_t       *onp = VTOSMB(ovp);
3032         smbnode_t       *ndnp = VTOSMB(ndvp);
3033         vnode_t         *nvp = NULL;
3034         int             error;
3035         int             nvp_locked = 0;
3036
3037         /* Things our caller should have checked. */
3038         ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone);
3039         ASSERT(odvp->v_vfsp == ndvp->v_vfsp);
3040         ASSERT(odnp->r_rwlock.owner == curthread);
3041         ASSERT(ndnp->r_rwlock.owner == curthread);
3042
3043         /*
3044          * Lookup the target file.  If it exists, it needs to be
3045          * checked to see whether it is a mount point and whether
3046          * it is active (open).
3047          */
3048         error = smbfslookup(ndvp, nnm, &nvp, scred->scr_cred, 0, NULL);
3049         if (!error) {
3050                 /*
3051                  * Target (nvp) already exists.  Check that it
3052                  * has the same type as the source.  The server
3053                  * will check this also, (and more reliably) but
3054                  * this lets us return the correct error codes.
3055                  */
3056                 if (ovp->v_type == VDIR) {
3057                         if (nvp->v_type != VDIR) {
3058                                 error = ENOTDIR;
3059                                 goto out;
3060                         }
3061                 } else {
3062                         if (nvp->v_type == VDIR) {
3063                                 error = EISDIR;
3064                                 goto out;
3065                         }
3066                 }
3067
3068                 /*
3069                  * POSIX dictates that when the source and target
3070                  * entries refer to the same file object, rename
3071                  * must do nothing and exit without error.
3072                  */
3073                 if (ovp == nvp) {
3074                         error = 0;
3075                         goto out;
3076                 }
3077
3078                 /*
3079                  * Also must ensure the target is not a mount point,
3080                  * and keep mount/umount away until we're done.
3081                  */
3082                 if (vn_vfsrlock(nvp)) {
3083                         error = EBUSY;
3084                         goto out;
3085                 }
3086                 nvp_locked = 1;
3087                 if (vn_mountedvfs(nvp) != NULL) {
3088                         error = EBUSY;
3089                         goto out;
3090                 }
3091
3092                 /*
3093                  * CIFS may give a SHARING_VIOLATION error when
3094                  * trying to rename onto an exising object,
3095                  * so try to remove the target first.
3096                  * (Only for files, not directories.)
3097                  */
3098                 if (nvp->v_type == VDIR) {
3099                         error = EEXIST;
3100                         goto out;
3101                 }
3102                 error = smbfsremove(ndvp, nvp, scred, flags);
3103                 if (error != 0)
3104                         goto out;
3105
3106                 /*
3107                  * OK, removed the target file.  Continue as if
3108                  * lookup target had failed (nvp == NULL).
3109                  */
3110                 vn_vfsunlock(nvp);
3111                 nvp_locked = 0;
3112                 VN_RELE(nvp);
3113                 nvp = NULL;
3114         } /* nvp */
3115
3116         smbfs_attrcache_remove(onp);
3117         error = smbfs_smb_rename(onp, ndnp, nnm, strlen(nnm), scred);
3118
3119         /*
3120          * If the old name should no longer exist,
3121          * discard any cached attributes under it.
3122          */
3123         if (error == 0) {
3124                 smbfs_attrcache_prune(onp);
3125                 /* SMBFS_VNEVENT... */
3126         }
3127
3128 out:
3129         if (nvp) {
3130                 if (nvp_locked)
3131                         vn_vfsunlock(nvp);
3132                 VN_RELE(nvp);
3133         }
3134
3135         return (error);
3136 }
3137
3138 /*
3139  * XXX
3140  * vsecattr_t is new to build 77, and we need to eventually support
3141  * it in order to create an ACL when an object is created.
3142  *
3143  * This op should support the new FIGNORECASE flag for case-insensitive
3144  * lookups, per PSARC 2007/244.
3145  */
3146 /* ARGSUSED */
3147 static int
3148 smbfs_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
3149         cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
3150 {
3151         vnode_t         *vp;
3152         struct smbnode  *dnp = VTOSMB(dvp);
3153         struct smbmntinfo *smi = VTOSMI(dvp);
3154         struct smb_cred scred;
3155         struct smbfattr fattr;
3156         const char              *name = (const char *) nm;
3157         int             nmlen = strlen(name);
3158         int             error, hiderr;
3159
3160         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3161                 return (EPERM);
3162
3163         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3164                 return (EIO);
3165
3166         if ((nmlen == 1 && name[0] == '.') ||
3167             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
3168                 return (EEXIST);
3169
3170         /* Only plain files are allowed in V_XATTRDIR. */
3171         if (dvp->v_flag & V_XATTRDIR)
3172                 return (EINVAL);
3173
3174         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3175                 return (EINTR);
3176         smb_credinit(&scred, cr);
3177
3178         /*
3179          * Require write access in the containing directory.
3180          */
3181         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
3182         if (error)
3183                 goto out;
3184
3185         error = smbfs_smb_mkdir(dnp, name, nmlen, &scred);
3186         if (error)
3187                 goto out;
3188
3189         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
3190         if (error)
3191                 goto out;
3192
3193         smbfs_attr_touchdir(dnp);
3194
3195         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
3196         if (error)
3197                 goto out;
3198
3199         if (name[0] == '.')
3200                 if ((hiderr = smbfs_smb_hideit(VTOSMB(vp), NULL, 0, &scred)))
3201                         SMBVDEBUG("hide failure %d\n", hiderr);
3202
3203         /* Success! */
3204         *vpp = vp;
3205         error = 0;
3206 out:
3207         smb_credrele(&scred);
3208         smbfs_rw_exit(&dnp->r_rwlock);
3209
3210         if (name != nm)
3211                 smbfs_name_free(name, nmlen);
3212
3213         return (error);
3214 }
3215
3216 /*
3217  * XXX
3218  * This op should support the new FIGNORECASE flag for case-insensitive
3219  * lookups, per PSARC 2007/244.
3220  */
3221 /* ARGSUSED */
3222 static int
3223 smbfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
3224         caller_context_t *ct, int flags)
3225 {
3226         vnode_t         *vp = NULL;
3227         int             vp_locked = 0;
3228         struct smbmntinfo *smi = VTOSMI(dvp);
3229         struct smbnode  *dnp = VTOSMB(dvp);
3230         struct smbnode  *np;
3231         struct smb_cred scred;
3232         int             error;
3233
3234         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3235                 return (EPERM);
3236
3237         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3238                 return (EIO);
3239
3240         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3241                 return (EINTR);
3242         smb_credinit(&scred, cr);
3243
3244         /*
3245          * Require w/x access in the containing directory.
3246          * Server handles all other access checks.
3247          */
3248         error = smbfs_access(dvp, VEXEC|VWRITE, 0, cr, ct);
3249         if (error)
3250                 goto out;
3251
3252         /*
3253          * First lookup the entry to be removed.
3254          */
3255         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
3256         if (error)
3257                 goto out;
3258         np = VTOSMB(vp);
3259
3260         /*
3261          * Disallow rmdir of "." or current dir, or the FS root.
3262          * Also make sure it's a directory, not a mount point,
3263          * and lock to keep mount/umount away until we're done.
3264          */
3265         if ((vp == dvp) || (vp == cdir) || (vp->v_flag & VROOT)) {
3266                 error = EINVAL;
3267                 goto out;
3268         }
3269         if (vp->v_type != VDIR) {
3270                 error = ENOTDIR;
3271                 goto out;
3272         }
3273         if (vn_vfsrlock(vp)) {
3274                 error = EBUSY;
3275                 goto out;
3276         }
3277         vp_locked = 1;
3278         if (vn_mountedvfs(vp) != NULL) {
3279                 error = EBUSY;
3280                 goto out;
3281         }
3282
3283         smbfs_attrcache_remove(np);
3284         error = smbfs_smb_rmdir(np, &scred);
3285
3286         /*
3287          * Similar to smbfs_remove
3288          */
3289         switch (error) {
3290         case 0:
3291         case ENOENT:
3292         case ENOTDIR:
3293                 smbfs_attrcache_prune(np);
3294                 break;
3295         }
3296
3297         if (error)
3298                 goto out;
3299
3300         mutex_enter(&np->r_statelock);
3301         dnp->n_flag |= NMODIFIED;
3302         mutex_exit(&np->r_statelock);
3303         smbfs_attr_touchdir(dnp);
3304         smbfs_rmhash(np);
3305
3306 out:
3307         if (vp) {
3308                 if (vp_locked)
3309                         vn_vfsunlock(vp);
3310                 VN_RELE(vp);
3311         }
3312         smb_credrele(&scred);
3313         smbfs_rw_exit(&dnp->r_rwlock);
3314
3315         return (error);
3316 }
3317
3318
3319 /* ARGSUSED */
3320 static int
3321 smbfs_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr,
3322         caller_context_t *ct, int flags)
3323 {
3324         /* Not yet... */
3325         return (ENOSYS);
3326 }
3327
3328
3329 /* ARGSUSED */
3330 static int
3331 smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
3332         caller_context_t *ct, int flags)
3333 {
3334         struct smbnode  *np = VTOSMB(vp);
3335         int             error = 0;
3336         smbmntinfo_t    *smi;
3337
3338         smi = VTOSMI(vp);
3339
3340         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3341                 return (EIO);
3342
3343         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3344                 return (EIO);
3345
3346         /*
3347          * Require read access in the directory.
3348          */
3349         error = smbfs_access(vp, VREAD, 0, cr, ct);
3350         if (error)
3351                 return (error);
3352
3353         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
3354
3355         /*
3356          * Todo readdir cache here
3357          *
3358          * I am serializing the entire readdir opreation
3359          * now since we have not yet implemented readdir
3360          * cache. This fix needs to be revisited once
3361          * we implement readdir cache.
3362          */
3363         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
3364                 return (EINTR);
3365
3366         error = smbfs_readvdir(vp, uiop, cr, eofp, ct);
3367
3368         smbfs_rw_exit(&np->r_lkserlock);
3369
3370         return (error);
3371 }
3372
3373 /* ARGSUSED */
3374 static int
3375 smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
3376         caller_context_t *ct)
3377 {
3378         /*
3379          * Note: "limit" tells the SMB-level FindFirst/FindNext
3380          * functions how many directory entries to request in
3381          * each OtW call.  It needs to be large enough so that
3382          * we don't make lots of tiny OtW requests, but there's
3383          * no point making it larger than the maximum number of
3384          * OtW entries that would fit in a maximum sized trans2
3385          * response (64k / 48).  Beyond that, it's just tuning.
3386          * WinNT used 512, Win2k used 1366.  We use 1000.
3387          */
3388         static const int limit = 1000;
3389         /* Largest possible dirent size. */
3390         static const size_t dbufsiz = DIRENT_RECLEN(SMB_MAXFNAMELEN);
3391         struct smb_cred scred;
3392         vnode_t         *newvp;
3393         struct smbnode  *np = VTOSMB(vp);
3394         struct smbfs_fctx *ctx;
3395         struct dirent *dp;
3396         ssize_t         save_resid;
3397         offset_t        save_offset; /* 64 bits */
3398         int             offset; /* yes, 32 bits */
3399         int             nmlen, error;
3400         ushort_t        reclen;
3401
3402         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
3403
3404         /* Make sure we serialize for n_dirseq use. */
3405         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
3406
3407         /*
3408          * Make sure smbfs_open filled in n_dirseq
3409          */
3410         if (np->n_dirseq == NULL)
3411                 return (EBADF);
3412
3413         /* Check for overflow of (32-bit) directory offset. */
3414         if (uio->uio_loffset < 0 || uio->uio_loffset > INT32_MAX ||
3415             (uio->uio_loffset + uio->uio_resid) > INT32_MAX)
3416                 return (EINVAL);
3417
3418         /* Require space for at least one dirent. */
3419         if (uio->uio_resid < dbufsiz)
3420                 return (EINVAL);
3421
3422         SMBVDEBUG("dirname='%s'\n", np->n_rpath);
3423         smb_credinit(&scred, cr);
3424         dp = kmem_alloc(dbufsiz, KM_SLEEP);
3425
3426         save_resid = uio->uio_resid;
3427         save_offset = uio->uio_loffset;
3428         offset = uio->uio_offset;
3429         SMBVDEBUG("in: offset=%d, resid=%d\n",
3430             (int)uio->uio_offset, (int)uio->uio_resid);
3431         error = 0;
3432
3433         /*
3434          * Generate the "." and ".." entries here so we can
3435          * (1) make sure they appear (but only once), and
3436          * (2) deal with getting their I numbers which the
3437          * findnext below does only for normal names.
3438          */
3439         while (offset < FIRST_DIROFS) {
3440                 /*
3441                  * Tricky bit filling in the first two:
3442                  * offset 0 is ".", offset 1 is ".."
3443                  * so strlen of these is offset+1.
3444                  */
3445                 reclen = DIRENT_RECLEN(offset + 1);
3446                 if (uio->uio_resid < reclen)
3447                         goto out;
3448                 bzero(dp, reclen);
3449                 dp->d_reclen = reclen;
3450                 dp->d_name[0] = '.';
3451                 dp->d_name[1] = '.';
3452                 dp->d_name[offset + 1] = '\0';
3453                 /*
3454                  * Want the real I-numbers for the "." and ".."
3455                  * entries.  For these two names, we know that
3456                  * smbfslookup can get the nodes efficiently.
3457                  */
3458                 error = smbfslookup(vp, dp->d_name, &newvp, cr, 1, ct);
3459                 if (error) {
3460                         dp->d_ino = np->n_ino + offset; /* fiction */
3461                 } else {
3462                         dp->d_ino = VTOSMB(newvp)->n_ino;
3463                         VN_RELE(newvp);
3464                 }
3465                 /*
3466                  * Note: d_off is the offset that a user-level program
3467                  * should seek to for reading the NEXT directory entry.
3468                  * See libc: readdir, telldir, seekdir
3469                  */
3470                 dp->d_off = offset + 1;
3471                 error = uiomove(dp, reclen, UIO_READ, uio);
3472                 if (error)
3473                         goto out;
3474                 /*
3475                  * Note: uiomove updates uio->uio_offset,
3476                  * but we want it to be our "cookie" value,
3477                  * which just counts dirents ignoring size.
3478                  */
3479                 uio->uio_offset = ++offset;
3480         }
3481
3482         /*
3483          * If there was a backward seek, we have to reopen.
3484          */
3485         if (offset < np->n_dirofs) {
3486                 SMBVDEBUG("Reopening search %d:%d\n",
3487                     offset, np->n_dirofs);
3488                 error = smbfs_smb_findopen(np, "*", 1,
3489                     SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
3490                     &scred, &ctx);
3491                 if (error) {
3492                         SMBVDEBUG("can not open search, error = %d", error);
3493                         goto out;
3494                 }
3495                 /* free the old one */
3496                 (void) smbfs_smb_findclose(np->n_dirseq, &scred);
3497                 /* save the new one */
3498                 np->n_dirseq = ctx;
3499                 np->n_dirofs = FIRST_DIROFS;
3500         } else {
3501                 ctx = np->n_dirseq;
3502         }
3503
3504         /*
3505          * Skip entries before the requested offset.
3506          */
3507         while (np->n_dirofs < offset) {
3508                 error = smbfs_smb_findnext(ctx, limit, &scred);
3509                 if (error != 0)
3510                         goto out;
3511                 np->n_dirofs++;
3512         }
3513
3514         /*
3515          * While there's room in the caller's buffer:
3516          *      get a directory entry from SMB,
3517          *      convert to a dirent, copyout.
3518          * We stop when there is no longer room for a
3519          * maximum sized dirent because we must decide
3520          * before we know anything about the next entry.
3521          */
3522         while (uio->uio_resid >= dbufsiz) {
3523                 error = smbfs_smb_findnext(ctx, limit, &scred);
3524                 if (error != 0)
3525                         goto out;
3526                 np->n_dirofs++;
3527
3528                 /* Sanity check the name length. */
3529                 nmlen = ctx->f_nmlen;
3530                 if (nmlen > SMB_MAXFNAMELEN) {
3531                         nmlen = SMB_MAXFNAMELEN;
3532                         SMBVDEBUG("Truncating name: %s\n", ctx->f_name);
3533                 }
3534                 if (smbfs_fastlookup) {
3535                         /* See comment at smbfs_fastlookup above. */
3536                         if (smbfs_nget(vp, ctx->f_name, nmlen,
3537                             &ctx->f_attr, &newvp) == 0)
3538                                 VN_RELE(newvp);
3539                 }
3540
3541                 reclen = DIRENT_RECLEN(nmlen);
3542                 bzero(dp, reclen);
3543                 dp->d_reclen = reclen;
3544                 bcopy(ctx->f_name, dp->d_name, nmlen);
3545                 dp->d_name[nmlen] = '\0';
3546                 dp->d_ino = ctx->f_inum;
3547                 dp->d_off = offset + 1; /* See d_off comment above */
3548                 error = uiomove(dp, reclen, UIO_READ, uio);
3549                 if (error)
3550                         goto out;
3551                 /* See comment re. uio_offset above. */
3552                 uio->uio_offset = ++offset;
3553         }
3554
3555 out:
3556         /*
3557          * When we come to the end of a directory, the
3558          * SMB-level functions return ENOENT, but the
3559          * caller is not expecting an error return.
3560          *
3561          * Also note that we must delay the call to
3562          * smbfs_smb_findclose(np->n_dirseq, ...)
3563          * until smbfs_close so that all reads at the
3564          * end of the directory will return no data.
3565          */
3566         if (error == ENOENT) {
3567                 error = 0;
3568                 if (eofp)
3569                         *eofp = 1;
3570         }
3571         /*
3572          * If we encountered an error (i.e. "access denied")
3573          * from the FindFirst call, we will have copied out
3574          * the "." and ".." entries leaving offset == 2.
3575          * In that case, restore the original offset/resid
3576          * so the caller gets no data with the error.
3577          */
3578         if (error != 0 && offset == FIRST_DIROFS) {
3579                 uio->uio_loffset = save_offset;
3580                 uio->uio_resid = save_resid;
3581         }
3582         SMBVDEBUG("out: offset=%d, resid=%d\n",
3583             (int)uio->uio_offset, (int)uio->uio_resid);
3584
3585         kmem_free(dp, dbufsiz);
3586         smb_credrele(&scred);
3587         return (error);
3588 }
3589
3590 /*
3591  * Here NFS has: nfs3_bio
3592  * See smbfs_bio above.
3593  */
3594
3595 /* ARGSUSED */
3596 static int
3597 smbfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3598 {
3599         return (ENOSYS);
3600 }
3601
3602
3603 /*
3604  * The pair of functions fop_rwlock, fop_rwunlock
3605  * are optional functions that are called by:
3606  *    getdents, before/after fop_readdir
3607  *    pread, before/after ... fop_read
3608  *    pwrite, before/after ... fop_write
3609  *    (other places)
3610  *
3611  * Careful here: None of the above check for any
3612  * error returns from fop_rwlock / fop_rwunlock!
3613  * In fact, the return value from _rwlock is NOT
3614  * an error code, but V_WRITELOCK_TRUE / _FALSE.
3615  *
3616  * Therefore, it's up to _this_ code to make sure
3617  * the lock state remains balanced, which means
3618  * we can't "bail out" on interrupts, etc.
3619  */
3620
3621 /* ARGSUSED2 */
3622 static int
3623 smbfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3624 {
3625         smbnode_t       *np = VTOSMB(vp);
3626
3627         if (!write_lock) {
3628                 (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_READER, FALSE);
3629                 return (V_WRITELOCK_FALSE);
3630         }
3631
3632
3633         (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, FALSE);
3634         return (V_WRITELOCK_TRUE);
3635 }
3636
3637 /* ARGSUSED */
3638 static void
3639 smbfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3640 {
3641         smbnode_t       *np = VTOSMB(vp);
3642
3643         smbfs_rw_exit(&np->r_rwlock);
3644 }
3645
3646
3647 /* ARGSUSED */
3648 static int
3649 smbfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3650 {
3651         smbmntinfo_t    *smi;
3652
3653         smi = VTOSMI(vp);
3654
3655         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3656                 return (EPERM);
3657
3658         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3659                 return (EIO);
3660
3661         /*
3662          * Because we stuff the readdir cookie into the offset field
3663          * someone may attempt to do an lseek with the cookie which
3664          * we want to succeed.
3665          */
3666         if (vp->v_type == VDIR)
3667                 return (0);
3668
3669         /* Like NFS3, just check for 63-bit overflow. */
3670         if (*noffp < 0)
3671                 return (EINVAL);
3672
3673         return (0);
3674 }
3675
3676 /* mmap support ******************************************************** */
3677
3678 #ifdef DEBUG
3679 static int smbfs_lostpage = 0;  /* number of times we lost original page */
3680 #endif
3681
3682 /*
3683  * Return all the pages from [off..off+len) in file
3684  * Like nfs3_getpage
3685  */
3686 /* ARGSUSED */
3687 static int
3688 smbfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3689         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3690         enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3691 {
3692         smbnode_t       *np;
3693         smbmntinfo_t    *smi;
3694         int             error;
3695
3696         np = VTOSMB(vp);
3697         smi = VTOSMI(vp);
3698
3699         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3700                 return (EIO);
3701
3702         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3703                 return (EIO);
3704
3705         if (vp->v_flag & VNOMAP)
3706                 return (ENOSYS);
3707
3708         if (protp != NULL)
3709                 *protp = PROT_ALL;
3710
3711         /*
3712          * Now valididate that the caches are up to date.
3713          */
3714         error = smbfs_validate_caches(vp, cr);
3715         if (error)
3716                 return (error);
3717
3718 retry:
3719         mutex_enter(&np->r_statelock);
3720
3721         /*
3722          * Don't create dirty pages faster than they
3723          * can be cleaned ... (etc. see nfs)
3724          *
3725          * Here NFS also tests:
3726          *  (mi->mi_max_threads != 0 &&
3727          *  rp->r_awcount > 2 * mi->mi_max_threads)
3728          */
3729         if (rw == S_CREATE) {
3730                 while (np->r_gcount > 0)
3731                         cv_wait(&np->r_cv, &np->r_statelock);
3732         }
3733
3734         /*
3735          * If we are getting called as a side effect of a write
3736          * operation the local file size might not be extended yet.
3737          * In this case we want to be able to return pages of zeroes.
3738          */
3739         if (off + len > np->r_size + PAGEOFFSET && seg != segkmap) {
3740                 mutex_exit(&np->r_statelock);
3741                 return (EFAULT);                /* beyond EOF */
3742         }
3743
3744         mutex_exit(&np->r_statelock);
3745
3746         error = pvn_getpages(smbfs_getapage, vp, off, len, protp,
3747             pl, plsz, seg, addr, rw, cr);
3748
3749         switch (error) {
3750         case SMBFS_EOF:
3751                 smbfs_purge_caches(vp, cr);
3752                 goto retry;
3753         case ESTALE:
3754                 /*
3755                  * Here NFS has: PURGE_STALE_FH(error, vp, cr);
3756                  * In-line here as we only use it once.
3757                  */
3758                 mutex_enter(&np->r_statelock);
3759                 np->r_flags |= RSTALE;
3760                 if (!np->r_error)
3761                         np->r_error = (error);
3762                 mutex_exit(&np->r_statelock);
3763                 if (vn_has_cached_data(vp))
3764                         smbfs_invalidate_pages(vp, 0, cr);
3765                 smbfs_purge_caches(vp, cr);
3766                 break;
3767         default:
3768                 break;
3769         }
3770
3771         return (error);
3772 }
3773
3774 /*
3775  * Called from pvn_getpages to get a particular page.
3776  * Like nfs3_getapage
3777  */
3778 /* ARGSUSED */
3779 static int
3780 smbfs_getapage(vnode_t *vp, uoff_t off, size_t len, uint_t *protp,
3781         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3782         enum seg_rw rw, cred_t *cr)
3783 {
3784         smbnode_t       *np;
3785         smbmntinfo_t   *smi;
3786
3787         uint_t          bsize;
3788         struct buf      *bp;
3789         page_t          *pp;
3790         uoff_t  lbn;
3791         uoff_t  io_off;
3792         uoff_t  blkoff;
3793         size_t          io_len;
3794         uint_t blksize;
3795         int error;
3796         /* int readahead; */
3797         int readahead_issued = 0;
3798         /* int ra_window; * readahead window */
3799         page_t *pagefound;
3800
3801         np = VTOSMB(vp);
3802         smi = VTOSMI(vp);
3803
3804         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3805                 return (EIO);
3806
3807         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3808                 return (EIO);
3809
3810         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3811
3812 reread:
3813         bp = NULL;
3814         pp = NULL;
3815         pagefound = NULL;
3816
3817         if (pl != NULL)
3818                 pl[0] = NULL;
3819
3820         error = 0;
3821         lbn = off / bsize;
3822         blkoff = lbn * bsize;
3823
3824         /*
3825          * NFS queues up readahead work here.
3826          */
3827
3828 again:
3829         if ((pagefound = page_exists(&vp->v_object, off)) == NULL) {
3830                 if (pl == NULL) {
3831                         (void) 0; /* Todo: smbfs_async_readahead(); */
3832                 } else if (rw == S_CREATE) {
3833                         /*
3834                          * Block for this page is not allocated, or the offset
3835                          * is beyond the current allocation size, or we're
3836                          * allocating a swap slot and the page was not found,
3837                          * so allocate it and return a zero page.
3838                          */
3839                         if ((pp = page_create_va(&vp->v_object, off,
3840                             PAGESIZE, PG_WAIT, seg, addr)) == NULL)
3841                                 cmn_err(CE_PANIC, "smbfs_getapage: page_create");
3842                         io_len = PAGESIZE;
3843                         mutex_enter(&np->r_statelock);
3844                         np->r_nextr = off + PAGESIZE;
3845                         mutex_exit(&np->r_statelock);
3846                 } else {
3847                         /*
3848                          * Need to go to server to get a BLOCK, exception to
3849                          * that being while reading at offset = 0 or doing
3850                          * random i/o, in that case read only a PAGE.
3851                          */
3852                         mutex_enter(&np->r_statelock);
3853                         if (blkoff < np->r_size &&
3854                             blkoff + bsize >= np->r_size) {
3855                                 /*
3856                                  * If only a block or less is left in
3857                                  * the file, read all that is remaining.
3858                                  */
3859                                 if (np->r_size <= off) {
3860                                         /*
3861                                          * Trying to access beyond EOF,
3862                                          * set up to get at least one page.
3863                                          */
3864                                         blksize = off + PAGESIZE - blkoff;
3865                                 } else
3866                                         blksize = np->r_size - blkoff;
3867                         } else if ((off == 0) ||
3868                             (off != np->r_nextr && !readahead_issued)) {
3869                                 blksize = PAGESIZE;
3870                                 blkoff = off; /* block = page here */
3871                         } else
3872                                 blksize = bsize;
3873                         mutex_exit(&np->r_statelock);
3874
3875                         pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
3876                             &io_len, blkoff, blksize, 0);
3877
3878                         /*
3879                          * Some other thread has entered the page,
3880                          * so just use it.
3881                          */
3882                         if (pp == NULL)
3883                                 goto again;
3884
3885                         /*
3886                          * Now round the request size up to page boundaries.
3887                          * This ensures that the entire page will be
3888                          * initialized to zeroes if EOF is encountered.
3889                          */
3890                         io_len = ptob(btopr(io_len));
3891
3892                         bp = pageio_setup(pp, io_len, vp, B_READ);
3893                         ASSERT(bp != NULL);
3894
3895                         /*
3896                          * pageio_setup should have set b_addr to 0.  This
3897                          * is correct since we want to do I/O on a page
3898                          * boundary.  bp_mapin will use this addr to calculate
3899                          * an offset, and then set b_addr to the kernel virtual
3900                          * address it allocated for us.
3901                          */
3902                         ASSERT(bp->b_un.b_addr == 0);
3903
3904                         bp->b_edev = 0;
3905                         bp->b_dev = 0;
3906                         bp->b_lblkno = lbtodb(io_off);
3907                         bp->b_file = vp;
3908                         bp->b_offset = (offset_t)off;
3909                         bp_mapin(bp);
3910
3911                         /*
3912                          * If doing a write beyond what we believe is EOF,
3913                          * don't bother trying to read the pages from the
3914                          * server, we'll just zero the pages here.  We
3915                          * don't check that the rw flag is S_WRITE here
3916                          * because some implementations may attempt a
3917                          * read access to the buffer before copying data.
3918                          */
3919                         mutex_enter(&np->r_statelock);
3920                         if (io_off >= np->r_size && seg == segkmap) {
3921                                 mutex_exit(&np->r_statelock);
3922                                 bzero(bp->b_un.b_addr, io_len);
3923                         } else {
3924                                 mutex_exit(&np->r_statelock);
3925                                 error = smbfs_bio(bp, 0, cr);
3926                         }
3927
3928                         /*
3929                          * Unmap the buffer before freeing it.
3930                          */
3931                         bp_mapout(bp);
3932                         pageio_done(bp);
3933
3934                         /* Here NFS3 updates all pp->p_fsdata */
3935
3936                         if (error == SMBFS_EOF) {
3937                                 /*
3938                                  * If doing a write system call just return
3939                                  * zeroed pages, else user tried to get pages
3940                                  * beyond EOF, return error.  We don't check
3941                                  * that the rw flag is S_WRITE here because
3942                                  * some implementations may attempt a read
3943                                  * access to the buffer before copying data.
3944                                  */
3945                                 if (seg == segkmap)
3946                                         error = 0;
3947                                 else
3948                                         error = EFAULT;
3949                         }
3950
3951                         if (!readahead_issued && !error) {
3952                                 mutex_enter(&np->r_statelock);
3953                                 np->r_nextr = io_off + io_len;
3954                                 mutex_exit(&np->r_statelock);
3955                         }
3956                 }
3957         }
3958
3959         if (pl == NULL)
3960                 return (error);
3961
3962         if (error) {
3963                 if (pp != NULL)
3964                         pvn_read_done(pp, B_ERROR);
3965                 return (error);
3966         }
3967
3968         if (pagefound) {
3969                 se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
3970
3971                 /*
3972                  * Page exists in the cache, acquire the appropriate lock.
3973                  * If this fails, start all over again.
3974                  */
3975                 if ((pp = page_lookup(&vp->v_object, off, se)) == NULL) {
3976 #ifdef DEBUG
3977                         smbfs_lostpage++;
3978 #endif
3979                         goto reread;
3980                 }
3981                 pl[0] = pp;
3982                 pl[1] = NULL;
3983                 return (0);
3984         }
3985
3986         if (pp != NULL)
3987                 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
3988
3989         return (error);
3990 }
3991
3992 /*
3993  * Here NFS has: nfs3_readahead
3994  * No read-ahead in smbfs yet.
3995  */
3996
3997 /*
3998  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
3999  * If len == 0, do from off to EOF.
4000  *
4001  * The normal cases should be len == 0 && off == 0 (entire vp list),
4002  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
4003  * (from pageout).
4004  *
4005  * Like nfs3_putpage + nfs_putpages
4006  */
4007 /* ARGSUSED */
4008 static int
4009 smbfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
4010         caller_context_t *ct)
4011 {
4012         smbnode_t *np;
4013         smbmntinfo_t *smi;
4014         page_t *pp;
4015         uoff_t eoff;
4016         uoff_t io_off;
4017         size_t io_len;
4018         int error;
4019         int rdirty;
4020         int err;
4021
4022         np = VTOSMB(vp);
4023         smi = VTOSMI(vp);
4024
4025         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4026                 return (EIO);
4027
4028         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4029                 return (EIO);
4030
4031         if (vp->v_flag & VNOMAP)
4032                 return (ENOSYS);
4033
4034         /* Here NFS does rp->r_count (++/--) stuff. */
4035
4036         /* Beginning of code from nfs_putpages. */
4037
4038         if (!vn_has_cached_data(vp))
4039                 return (0);
4040
4041         /*
4042          * If ROUTOFSPACE is set, then all writes turn into B_INVAL
4043          * writes.  B_FORCE is set to force the VM system to actually
4044          * invalidate the pages, even if the i/o failed.  The pages
4045          * need to get invalidated because they can't be written out
4046          * because there isn't any space left on either the server's
4047          * file system or in the user's disk quota.  The B_FREE bit
4048          * is cleared to avoid confusion as to whether this is a
4049          * request to place the page on the freelist or to destroy
4050          * it.
4051          */
4052         if ((np->r_flags & ROUTOFSPACE) ||
4053             (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
4054                 flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
4055
4056         if (len == 0) {
4057                 /*
4058                  * If doing a full file synchronous operation, then clear
4059                  * the RDIRTY bit.  If a page gets dirtied while the flush
4060                  * is happening, then RDIRTY will get set again.  The
4061                  * RDIRTY bit must get cleared before the flush so that
4062                  * we don't lose this information.
4063                  *
4064                  * NFS has B_ASYNC vs sync stuff here.
4065                  */
4066                 if (off == 0 &&
4067                     (np->r_flags & RDIRTY)) {
4068                         mutex_enter(&np->r_statelock);
4069                         rdirty = (np->r_flags & RDIRTY);
4070                         np->r_flags &= ~RDIRTY;
4071                         mutex_exit(&np->r_statelock);
4072                 } else
4073                         rdirty = 0;
4074
4075                 /*
4076                  * Search the entire vp list for pages >= off, and flush
4077                  * the dirty pages.
4078                  */
4079                 error = pvn_vplist_dirty(vp, off, smbfs_putapage,
4080                     flags, cr);
4081
4082                 /*
4083                  * If an error occurred and the file was marked as dirty
4084                  * before and we aren't forcibly invalidating pages, then
4085                  * reset the RDIRTY flag.
4086                  */
4087                 if (error && rdirty &&
4088                     (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
4089                         mutex_enter(&np->r_statelock);
4090                         np->r_flags |= RDIRTY;
4091                         mutex_exit(&np->r_statelock);
4092                 }
4093         } else {
4094                 /*
4095                  * Do a range from [off...off + len) looking for pages
4096                  * to deal with.
4097                  */
4098                 error = 0;
4099                 io_len = 1; /* quiet warnings */
4100                 eoff = off + len;
4101
4102                 for (io_off = off; io_off < eoff; io_off += io_len) {
4103                         mutex_enter(&np->r_statelock);
4104                         if (io_off >= np->r_size) {
4105                                 mutex_exit(&np->r_statelock);
4106                                 break;
4107                         }
4108                         mutex_exit(&np->r_statelock);
4109                         /*
4110                          * If we are not invalidating, synchronously
4111                          * freeing or writing pages use the routine
4112                          * page_lookup_nowait() to prevent reclaiming
4113                          * them from the free list.
4114                          */
4115                         if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
4116                                 pp = page_lookup(&vp->v_object, io_off,
4117                                     (flags & (B_INVAL | B_FREE)) ?
4118                                     SE_EXCL : SE_SHARED);
4119                         } else {
4120                                 pp = page_lookup_nowait(&vp->v_object, io_off,
4121                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
4122                         }
4123
4124                         if (pp == NULL || !pvn_getdirty(pp, flags))
4125                                 io_len = PAGESIZE;
4126                         else {
4127                                 err = smbfs_putapage(vp, pp, &io_off,
4128                                     &io_len, flags, cr);
4129                                 if (!error)
4130                                         error = err;
4131                                 /*
4132                                  * "io_off" and "io_len" are returned as
4133                                  * the range of pages we actually wrote.
4134                                  * This allows us to skip ahead more quickly
4135                                  * since several pages may've been dealt
4136                                  * with by this iteration of the loop.
4137                                  */
4138                         }
4139                 }
4140         }
4141
4142         return (error);
4143 }
4144
4145 /*
4146  * Write out a single page, possibly klustering adjacent dirty pages.
4147  *
4148  * Like nfs3_putapage / nfs3_sync_putapage
4149  */
4150 static int
4151 smbfs_putapage(vnode_t *vp, page_t *pp, uoff_t *offp, size_t *lenp,
4152         int flags, cred_t *cr)
4153 {
4154         smbnode_t *np;
4155         uoff_t io_off;
4156         uoff_t lbn_off;
4157         uoff_t lbn;
4158         size_t io_len;
4159         uint_t bsize;
4160         int error;
4161
4162         np = VTOSMB(vp);
4163
4164         ASSERT(!vn_is_readonly(vp));
4165
4166         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
4167         lbn = pp->p_offset / bsize;
4168         lbn_off = lbn * bsize;
4169
4170         /*
4171          * Find a kluster that fits in one block, or in
4172          * one page if pages are bigger than blocks.  If
4173          * there is less file space allocated than a whole
4174          * page, we'll shorten the i/o request below.
4175          */
4176         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off,
4177             roundup(bsize, PAGESIZE), flags);
4178
4179         /*
4180          * pvn_write_kluster shouldn't have returned a page with offset
4181          * behind the original page we were given.  Verify that.
4182          */
4183         ASSERT((pp->p_offset / bsize) >= lbn);
4184
4185         /*
4186          * Now pp will have the list of kept dirty pages marked for
4187          * write back.  It will also handle invalidation and freeing
4188          * of pages that are not dirty.  Check for page length rounding
4189          * problems.
4190          */
4191         if (io_off + io_len > lbn_off + bsize) {
4192                 ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE);
4193                 io_len = lbn_off + bsize - io_off;
4194         }
4195         /*
4196          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4197          * consistent value of r_size. RMODINPROGRESS is set in writerp().
4198          * When RMODINPROGRESS is set it indicates that a uiomove() is in
4199          * progress and the r_size has not been made consistent with the
4200          * new size of the file. When the uiomove() completes the r_size is
4201          * updated and the RMODINPROGRESS flag is cleared.
4202          *
4203          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4204          * consistent value of r_size. Without this handshaking, it is
4205          * possible that smbfs_bio() picks  up the old value of r_size
4206          * before the uiomove() in writerp() completes. This will result
4207          * in the write through smbfs_bio() being dropped.
4208          *
4209          * More precisely, there is a window between the time the uiomove()
4210          * completes and the time the r_size is updated. If a VOP_PUTPAGE()
4211          * operation intervenes in this window, the page will be picked up,
4212          * because it is dirty (it will be unlocked, unless it was
4213          * pagecreate'd). When the page is picked up as dirty, the dirty
4214          * bit is reset (pvn_getdirty()). In smbfs_write(), r_size is
4215          * checked. This will still be the old size. Therefore the page will
4216          * not be written out. When segmap_release() calls VOP_PUTPAGE(),
4217          * the page will be found to be clean and the write will be dropped.
4218          */
4219         if (np->r_flags & RMODINPROGRESS) {
4220                 mutex_enter(&np->r_statelock);
4221                 if ((np->r_flags & RMODINPROGRESS) &&
4222                     np->r_modaddr + MAXBSIZE > io_off &&
4223                     np->r_modaddr < io_off + io_len) {
4224                         page_t *plist;
4225                         /*
4226                          * A write is in progress for this region of the file.
4227                          * If we did not detect RMODINPROGRESS here then this
4228                          * path through smbfs_putapage() would eventually go to
4229                          * smbfs_bio() and may not write out all of the data
4230                          * in the pages. We end up losing data. So we decide
4231                          * to set the modified bit on each page in the page
4232                          * list and mark the rnode with RDIRTY. This write
4233                          * will be restarted at some later time.
4234                          */
4235                         plist = pp;
4236                         while (plist != NULL) {
4237                                 pp = plist;
4238                                 page_sub(&plist, pp);
4239                                 hat_setmod(pp);
4240                                 page_io_unlock(pp);
4241                                 page_unlock(pp);
4242                         }
4243                         np->r_flags |= RDIRTY;
4244                         mutex_exit(&np->r_statelock);
4245                         if (offp)
4246                                 *offp = io_off;
4247                         if (lenp)
4248                                 *lenp = io_len;
4249                         return (0);
4250                 }
4251                 mutex_exit(&np->r_statelock);
4252         }
4253
4254         /*
4255          * NFS handles (flags & B_ASYNC) here...
4256          * (See nfs_async_putapage())
4257          *
4258          * This code section from: nfs3_sync_putapage()
4259          */
4260
4261         flags |= B_WRITE;
4262
4263         error = smbfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
4264
4265         if ((error == ENOSPC || error == EDQUOT || error == EFBIG ||
4266             error == EACCES) &&
4267             (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) {
4268                 if (!(np->r_flags & ROUTOFSPACE)) {
4269                         mutex_enter(&np->r_statelock);
4270                         np->r_flags |= ROUTOFSPACE;
4271                         mutex_exit(&np->r_statelock);
4272                 }
4273                 flags |= B_ERROR;
4274                 pvn_write_done(pp, flags);
4275                 /*
4276                  * If this was not an async thread, then try again to
4277                  * write out the pages, but this time, also destroy
4278                  * them whether or not the write is successful.  This
4279                  * will prevent memory from filling up with these
4280                  * pages and destroying them is the only alternative
4281                  * if they can't be written out.
4282                  *
4283                  * Don't do this if this is an async thread because
4284                  * when the pages are unlocked in pvn_write_done,
4285                  * some other thread could have come along, locked
4286                  * them, and queued for an async thread.  It would be
4287                  * possible for all of the async threads to be tied
4288                  * up waiting to lock the pages again and they would
4289                  * all already be locked and waiting for an async
4290                  * thread to handle them.  Deadlock.
4291                  */
4292                 if (!(flags & B_ASYNC)) {
4293                         error = smbfs_putpage(vp, io_off, io_len,
4294                             B_INVAL | B_FORCE, cr, NULL);
4295                 }
4296         } else {
4297                 if (error)
4298                         flags |= B_ERROR;
4299                 else if (np->r_flags & ROUTOFSPACE) {
4300                         mutex_enter(&np->r_statelock);
4301                         np->r_flags &= ~ROUTOFSPACE;
4302                         mutex_exit(&np->r_statelock);
4303                 }
4304                 pvn_write_done(pp, flags);
4305         }
4306
4307         /* Now more code from: nfs3_putapage */
4308
4309         if (offp)
4310                 *offp = io_off;
4311         if (lenp)
4312                 *lenp = io_len;
4313
4314         return (error);
4315 }
4316
4317 /*
4318  * NFS has this in nfs_client.c (shared by v2,v3,...)
4319  * We have it here so smbfs_putapage can be file scope.
4320  */
4321 void
4322 smbfs_invalidate_pages(vnode_t *vp, uoff_t off, cred_t *cr)
4323 {
4324         smbnode_t *np;
4325
4326         np = VTOSMB(vp);
4327
4328         mutex_enter(&np->r_statelock);
4329         while (np->r_flags & RTRUNCATE)
4330                 cv_wait(&np->r_cv, &np->r_statelock);
4331         np->r_flags |= RTRUNCATE;
4332
4333         if (off == 0) {
4334                 np->r_flags &= ~RDIRTY;
4335                 if (!(np->r_flags & RSTALE))
4336                         np->r_error = 0;
4337         }
4338         /* Here NFSv3 has np->r_truncaddr = off; */
4339         mutex_exit(&np->r_statelock);
4340
4341         (void) pvn_vplist_dirty(vp, off, smbfs_putapage,
4342             B_INVAL | B_TRUNC, cr);
4343
4344         mutex_enter(&np->r_statelock);
4345         np->r_flags &= ~RTRUNCATE;
4346         cv_broadcast(&np->r_cv);
4347         mutex_exit(&np->r_statelock);
4348 }
4349
4350 /* Like nfs3_map */
4351
4352 /* ARGSUSED */
4353 static int
4354 smbfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
4355         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4356         cred_t *cr, caller_context_t *ct)
4357 {
4358         segvn_crargs_t  vn_a;
4359         struct vattr    va;
4360         smbnode_t       *np;
4361         smbmntinfo_t    *smi;
4362         int             error;
4363
4364         np = VTOSMB(vp);
4365         smi = VTOSMI(vp);
4366
4367         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4368                 return (EIO);
4369
4370         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4371                 return (EIO);
4372
4373         if (vp->v_flag & VNOMAP)
4374                 return (ENOSYS);
4375
4376         if (off < 0 || off + (ssize_t)len < 0)
4377                 return (ENXIO);
4378
4379         if (vp->v_type != VREG)
4380                 return (ENODEV);
4381
4382         /*
4383          * NFS does close-to-open consistency stuff here.
4384          * Just get (possibly cached) attributes.
4385          */
4386         va.va_mask = VATTR_ALL;
4387         if ((error = smbfsgetattr(vp, &va, cr)) != 0)
4388                 return (error);
4389
4390         /*
4391          * Check to see if the vnode is currently marked as not cachable.
4392          * This means portions of the file are locked (through VOP_FRLOCK).
4393          * In this case the map request must be refused.  We use
4394          * rp->r_lkserlock to avoid a race with concurrent lock requests.
4395          */
4396         /*
4397          * Atomically increment r_inmap after acquiring r_rwlock. The
4398          * idea here is to acquire r_rwlock to block read/write and
4399          * not to protect r_inmap. r_inmap will inform smbfs_read/write()
4400          * that we are in smbfs_map(). Now, r_rwlock is acquired in order
4401          * and we can prevent the deadlock that would have occurred
4402          * when smbfs_addmap() would have acquired it out of order.
4403          *
4404          * Since we are not protecting r_inmap by any lock, we do not
4405          * hold any lock when we decrement it. We atomically decrement
4406          * r_inmap after we release r_lkserlock.  Note that rwlock is
4407          * re-entered as writer in smbfs_addmap (called via as_map).
4408          */
4409
4410         if (smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, SMBINTR(vp)))
4411                 return (EINTR);
4412         atomic_inc_uint(&np->r_inmap);
4413         smbfs_rw_exit(&np->r_rwlock);
4414
4415         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp))) {
4416                 atomic_dec_uint(&np->r_inmap);
4417                 return (EINTR);
4418         }
4419
4420         if (vp->v_flag & VNOCACHE) {
4421                 error = EAGAIN;
4422                 goto done;
4423         }
4424
4425         /*
4426          * Don't allow concurrent locks and mapping if mandatory locking is
4427          * enabled.
4428          */
4429         if ((flk_has_remote_locks(vp) || smbfs_lm_has_sleep(vp)) &&
4430             MANDLOCK(vp, va.va_mode)) {
4431                 error = EAGAIN;
4432                 goto done;
4433         }
4434
4435         as_rangelock(as);
4436         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
4437         if (error != 0) {
4438                 as_rangeunlock(as);
4439                 goto done;
4440         }
4441
4442         vn_a.vp = vp;
4443         vn_a.offset = off;
4444         vn_a.type = (flags & MAP_TYPE);
4445         vn_a.prot = (uchar_t)prot;
4446         vn_a.maxprot = (uchar_t)maxprot;
4447         vn_a.flags = (flags & ~MAP_TYPE);
4448         vn_a.cred = cr;
4449         vn_a.amp = NULL;
4450         vn_a.szc = 0;
4451         vn_a.lgrp_mem_policy_flags = 0;
4452
4453         error = as_map(as, *addrp, len, segvn_create, &vn_a);
4454         as_rangeunlock(as);
4455
4456 done:
4457         smbfs_rw_exit(&np->r_lkserlock);
4458         atomic_dec_uint(&np->r_inmap);
4459         return (error);
4460 }
4461
4462 /*
4463  * This uses addmap/delmap functions to hold the SMB FID open as long as
4464  * there are pages mapped in this as/seg.  Increment the FID refs. when
4465  * the maping count goes from zero to non-zero, and release the FID ref
4466  * when the maping count goes from non-zero to zero.
4467  */
4468
4469 /* ARGSUSED */
4470 static int
4471 smbfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4472         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4473         cred_t *cr, caller_context_t *ct)
4474 {
4475         smbnode_t *np = VTOSMB(vp);
4476         boolean_t inc_fidrefs = B_FALSE;
4477
4478         /*
4479          * When r_mapcnt goes from zero to non-zero,
4480          * increment n_fidrefs
4481          */
4482         mutex_enter(&np->r_statelock);
4483         if (np->r_mapcnt == 0)
4484                 inc_fidrefs = B_TRUE;
4485         np->r_mapcnt += btopr(len);
4486         mutex_exit(&np->r_statelock);
4487
4488         if (inc_fidrefs) {
4489                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4490                 np->n_fidrefs++;
4491                 smbfs_rw_exit(&np->r_lkserlock);
4492         }
4493
4494         return (0);
4495 }
4496
4497 /*
4498  * Args passed to smbfs_delmap_async
4499  */
4500 typedef struct smbfs_delmap_args {
4501         taskq_ent_t             dm_tqent;
4502         cred_t                  *dm_cr;
4503         vnode_t                 *dm_vp;
4504         offset_t                dm_off;
4505         caddr_t                 dm_addr;
4506         size_t                  dm_len;
4507         uint_t                  dm_prot;
4508         uint_t                  dm_maxprot;
4509         uint_t                  dm_flags;
4510         boolean_t               dm_rele_fid;
4511 } smbfs_delmap_args_t;
4512
4513 /*
4514  * Using delmap not only to release the SMB FID (as described above)
4515  * but to flush dirty pages as needed.  Both of those do the actual
4516  * work in an async taskq job to avoid interfering with locks held
4517  * in the VM layer when this is called.
4518  */
4519
4520 /* ARGSUSED */
4521 static int
4522 smbfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4523         size_t len, uint_t prot, uint_t maxprot, uint_t flags,
4524         cred_t *cr, caller_context_t *ct)
4525 {
4526         smbnode_t               *np = VTOSMB(vp);
4527         smbmntinfo_t            *smi = VTOSMI(vp);
4528         smbfs_delmap_args_t     *dmapp;
4529
4530         dmapp = kmem_zalloc(sizeof (*dmapp), KM_SLEEP);
4531
4532         /*
4533          * The VM layer may segvn_free the seg holding this vnode
4534          * before our callback has a chance run, so take a hold on
4535          * the vnode here and release it in the callback.
4536          * (same for the cred)
4537          */
4538         crhold(cr);
4539         VN_HOLD(vp);
4540
4541         dmapp->dm_vp = vp;
4542         dmapp->dm_cr = cr;
4543         dmapp->dm_off = off;
4544         dmapp->dm_addr = addr;
4545         dmapp->dm_len = len;
4546         dmapp->dm_prot = prot;
4547         dmapp->dm_maxprot = maxprot;
4548         dmapp->dm_flags = flags;
4549         dmapp->dm_rele_fid = B_FALSE;
4550
4551         /*
4552          * Go ahead and decrement r_mapcount now, which is
4553          * the primary purpose of this function.
4554          *
4555          * When r_mapcnt goes to zero, we need to call
4556          * smbfs_rele_fid, but can't do that here, so
4557          * set a flag telling the async task to do it.
4558          */
4559         mutex_enter(&np->r_statelock);
4560         np->r_mapcnt -= btopr(len);
4561         ASSERT(np->r_mapcnt >= 0);
4562         if (np->r_mapcnt == 0)
4563                 dmapp->dm_rele_fid = B_TRUE;
4564         mutex_exit(&np->r_statelock);
4565
4566         taskq_dispatch_ent(smi->smi_taskq, smbfs_delmap_async, dmapp, 0,
4567             &dmapp->dm_tqent);
4568
4569         return (0);
4570 }
4571
4572 /*
4573  * Remove some pages from an mmap'd vnode.  Flush any
4574  * dirty pages in the unmapped range.
4575  */
4576 /* ARGSUSED */
4577 static void
4578 smbfs_delmap_async(void *varg)
4579 {
4580         smbfs_delmap_args_t     *dmapp = varg;
4581         cred_t                  *cr;
4582         vnode_t                 *vp;
4583         smbnode_t               *np;
4584         smbmntinfo_t            *smi;
4585
4586         cr = dmapp->dm_cr;
4587         vp = dmapp->dm_vp;
4588         np = VTOSMB(vp);
4589         smi = VTOSMI(vp);
4590
4591         /* Decremented r_mapcnt in smbfs_delmap */
4592
4593         /*
4594          * Initiate a page flush and potential commit if there are
4595          * pages, the file system was not mounted readonly, the segment
4596          * was mapped shared, and the pages themselves were writeable.
4597          *
4598          * mark RDIRTY here, will be used to check if a file is dirty when
4599          * unmount smbfs
4600          */
4601         if (vn_has_cached_data(vp) && !vn_is_readonly(vp) &&
4602             dmapp->dm_flags == MAP_SHARED &&
4603             (dmapp->dm_maxprot & PROT_WRITE) != 0) {
4604                 mutex_enter(&np->r_statelock);
4605                 np->r_flags |= RDIRTY;
4606                 mutex_exit(&np->r_statelock);
4607
4608                 /*
4609                  * Need to finish the putpage before we
4610                  * close the OtW FID needed for I/O.
4611                  */
4612                 (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len, 0,
4613                     dmapp->dm_cr, NULL);
4614         }
4615
4616         if ((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO))
4617                 (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len,
4618                     B_INVAL, dmapp->dm_cr, NULL);
4619
4620         /*
4621          * If r_mapcnt went to zero, drop our FID ref now.
4622          * On the last fidref, this does an OtW close.
4623          */
4624         if (dmapp->dm_rele_fid) {
4625                 struct smb_cred scred;
4626
4627                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4628                 smb_credinit(&scred, dmapp->dm_cr);
4629
4630                 smbfs_rele_fid(np, &scred);
4631
4632                 smb_credrele(&scred);
4633                 smbfs_rw_exit(&np->r_lkserlock);
4634         }
4635
4636         /* Release holds taken in smbfs_delmap */
4637         VN_RELE(vp);
4638         crfree(cr);
4639
4640         kmem_free(dmapp, sizeof (*dmapp));
4641 }
4642
4643 /* No smbfs_pageio() or smbfs_dispose() ops. */
4644
4645 /* misc. ******************************************************** */
4646
4647
4648 /*
4649  * XXX
4650  * This op may need to support PSARC 2007/440, nbmand changes for CIFS Service.
4651  */
4652 static int
4653 smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4654         offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
4655         caller_context_t *ct)
4656 {
4657         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4658                 return (EIO);
4659
4660         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4661                 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
4662         else
4663                 return (ENOSYS);
4664 }
4665
4666 /*
4667  * Free storage space associated with the specified vnode.  The portion
4668  * to be freed is specified by bfp->l_start and bfp->l_len (already
4669  * normalized to a "whence" of 0).
4670  *
4671  * Called by fcntl(fd, F_FREESP, lkp) for libc:ftruncate, etc.
4672  */
4673 /* ARGSUSED */
4674 static int
4675 smbfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4676         offset_t offset, cred_t *cr, caller_context_t *ct)
4677 {
4678         int             error;
4679         smbmntinfo_t    *smi;
4680
4681         smi = VTOSMI(vp);
4682
4683         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4684                 return (EIO);
4685
4686         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4687                 return (EIO);
4688
4689         /* Caller (fcntl) has checked v_type */
4690         ASSERT(vp->v_type == VREG);
4691         if (cmd != F_FREESP)
4692                 return (EINVAL);
4693
4694         /*
4695          * Like NFS3, no 32-bit offset checks here.
4696          * Our SMB layer takes care to return EFBIG
4697          * when it has to fallback to a 32-bit call.
4698          */
4699
4700         error = convoff(vp, bfp, 0, offset);
4701         if (!error) {
4702                 ASSERT(bfp->l_start >= 0);
4703                 if (bfp->l_len == 0) {
4704                         struct vattr va;
4705
4706                         /*
4707                          * ftruncate should not change the ctime and
4708                          * mtime if we truncate the file to its
4709                          * previous size.
4710                          */
4711                         va.va_mask = VATTR_SIZE;
4712                         error = smbfsgetattr(vp, &va, cr);
4713                         if (error || va.va_size == bfp->l_start)
4714                                 return (error);
4715                         va.va_mask = VATTR_SIZE;
4716                         va.va_size = bfp->l_start;
4717                         error = smbfssetattr(vp, &va, 0, cr);
4718                         /* SMBFS_VNEVENT... */
4719                 } else
4720                         error = EINVAL;
4721         }
4722
4723         return (error);
4724 }
4725
4726
4727 /* ARGSUSED */
4728 static int
4729 smbfs_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
4730 {
4731
4732         return (ENOSYS);
4733 }
4734
4735
4736 /* ARGSUSED */
4737 static int
4738 smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4739         caller_context_t *ct)
4740 {
4741         vfs_t *vfs;
4742         smbmntinfo_t *smi;
4743         struct smb_share *ssp;
4744
4745         vfs = vp->v_vfsp;
4746         smi = VFTOSMI(vfs);
4747
4748         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4749                 return (EIO);
4750
4751         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4752                 return (EIO);
4753
4754         switch (cmd) {
4755         case _PC_FILESIZEBITS:
4756                 ssp = smi->smi_share;
4757                 if (SSTOVC(ssp)->vc_sopt.sv_caps & SMB_CAP_LARGE_FILES)
4758                         *valp = 64;
4759                 else
4760                         *valp = 32;
4761                 break;
4762
4763         case _PC_LINK_MAX:
4764                 /* We only ever report one link to an object */
4765                 *valp = 1;
4766                 break;
4767
4768         case _PC_ACL_ENABLED:
4769                 /*
4770                  * Always indicate that ACLs are enabled and
4771                  * that we support ACE_T format, otherwise
4772                  * libsec will ask for ACLENT_T format data
4773                  * which we don't support.
4774                  */
4775                 *valp = _ACL_ACE_ENABLED;
4776                 break;
4777
4778         case _PC_SYMLINK_MAX:   /* No symlinks until we do Unix extensions */
4779                 *valp = 0;
4780                 break;
4781
4782         case _PC_XATTR_EXISTS:
4783                 if (vfs->vfs_flag & VFS_XATTR) {
4784                         *valp = smbfs_xa_exists(vp, cr);
4785                         break;
4786                 }
4787                 return (EINVAL);
4788
4789         case _PC_SATTR_ENABLED:
4790         case _PC_SATTR_EXISTS:
4791                 *valp = 1;
4792                 break;
4793
4794         case _PC_TIMESTAMP_RESOLUTION:
4795                 /*
4796                  * Windows times are tenths of microseconds
4797                  * (multiples of 100 nanoseconds).
4798                  */
4799                 *valp = 100L;
4800                 break;
4801
4802         default:
4803                 return (fs_pathconf(vp, cmd, valp, cr, ct));
4804         }
4805         return (0);
4806 }
4807
4808 /* ARGSUSED */
4809 static int
4810 smbfs_getsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4811         caller_context_t *ct)
4812 {
4813         vfs_t *vfsp;
4814         smbmntinfo_t *smi;
4815         int     error;
4816         uint_t  mask;
4817
4818         vfsp = vp->v_vfsp;
4819         smi = VFTOSMI(vfsp);
4820
4821         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4822                 return (EIO);
4823
4824         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4825                 return (EIO);
4826
4827         /*
4828          * Our _pathconf indicates _ACL_ACE_ENABLED,
4829          * so we should only see VSA_ACE, etc here.
4830          * Note: vn_create asks for VSA_DFACLCNT,
4831          * and it expects ENOSYS and empty data.
4832          */
4833         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT |
4834             VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
4835         if (mask == 0)
4836                 return (ENOSYS);
4837
4838         if (smi->smi_flags & SMI_ACL)
4839                 error = smbfs_acl_getvsa(vp, vsa, flag, cr);
4840         else
4841                 error = ENOSYS;
4842
4843         if (error == ENOSYS)
4844                 error = fs_fab_acl(vp, vsa, flag, cr, ct);
4845
4846         return (error);
4847 }
4848
4849 /* ARGSUSED */
4850 static int
4851 smbfs_setsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4852         caller_context_t *ct)
4853 {
4854         vfs_t *vfsp;
4855         smbmntinfo_t *smi;
4856         int     error;
4857         uint_t  mask;
4858
4859         vfsp = vp->v_vfsp;
4860         smi = VFTOSMI(vfsp);
4861
4862         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4863                 return (EIO);
4864
4865         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4866                 return (EIO);
4867
4868         /*
4869          * Our _pathconf indicates _ACL_ACE_ENABLED,
4870          * so we should only see VSA_ACE, etc here.
4871          */
4872         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT);
4873         if (mask == 0)
4874                 return (ENOSYS);
4875
4876         if (vfsp->vfs_flag & VFS_RDONLY)
4877                 return (EROFS);
4878
4879         /*
4880          * Allow only the mount owner to do this.
4881          * See comments at smbfs_access_rwx.
4882          */
4883         error = secpolicy_vnode_setdac(cr, smi->smi_uid);
4884         if (error != 0)
4885                 return (error);
4886
4887         if (smi->smi_flags & SMI_ACL)
4888                 error = smbfs_acl_setvsa(vp, vsa, flag, cr);
4889         else
4890                 error = ENOSYS;
4891
4892         return (error);
4893 }
4894
4895
4896 /*
4897  * XXX
4898  * This op should eventually support PSARC 2007/268.
4899  */
4900 static int
4901 smbfs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
4902         caller_context_t *ct)
4903 {
4904         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4905                 return (EIO);
4906
4907         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4908                 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
4909         else
4910                 return (ENOSYS);
4911 }
4912
4913
4914 /*
4915  * Most unimplemented ops will return ENOSYS because of fs_nosys().
4916  * The only ops where that won't work are ACCESS (due to open(2)
4917  * failures) and ... (anything else left?)
4918  */
4919 const struct vnodeops smbfs_vnodeops = {
4920         .vnop_name = "smbfs",
4921         .vop_open = smbfs_open,
4922         .vop_close = smbfs_close,
4923         .vop_read = smbfs_read,
4924         .vop_write = smbfs_write,
4925         .vop_ioctl = smbfs_ioctl,
4926         .vop_getattr = smbfs_getattr,
4927         .vop_setattr = smbfs_setattr,
4928         .vop_access = smbfs_access,
4929         .vop_lookup = smbfs_lookup,
4930         .vop_create = smbfs_create,
4931         .vop_remove = smbfs_remove,
4932         .vop_link = smbfs_link,
4933         .vop_rename = smbfs_rename,
4934         .vop_mkdir = smbfs_mkdir,
4935         .vop_rmdir = smbfs_rmdir,
4936         .vop_readdir = smbfs_readdir,
4937         .vop_symlink = smbfs_symlink,
4938         .vop_readlink = smbfs_readlink,
4939         .vop_fsync = smbfs_fsync,
4940         .vop_inactive = smbfs_inactive,
4941         .vop_fid = smbfs_fid,
4942         .vop_rwlock = smbfs_rwlock,
4943         .vop_rwunlock = smbfs_rwunlock,
4944         .vop_seek = smbfs_seek,
4945         .vop_frlock = smbfs_frlock,
4946         .vop_space = smbfs_space,
4947         .vop_realvp = smbfs_realvp,
4948         .vop_getpage = smbfs_getpage,
4949         .vop_putpage = smbfs_putpage,
4950         .vop_map = smbfs_map,
4951         .vop_addmap = smbfs_addmap,
4952         .vop_delmap = smbfs_delmap,
4953         .vop_pathconf = smbfs_pathconf,
4954         .vop_setsecattr = smbfs_setsecattr,
4955         .vop_getsecattr = smbfs_getsecattr,
4956         .vop_shrlock = smbfs_shrlock,
4957 #ifdef  SMBFS_VNEVENT
4958         .vop_vnevent = fs_vnevent_support,
4959 #endif
4960 };