src/afs/VNOPS/afs_vnop_lookup.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /*
  11  * Implements:
  12  * afs_lookup
  13  * EvalMountPoint
  14  * afs_DoBulkStat
  15  */
  16
  17 #include <afsconfig.h>
  18 #include "afs/param.h"
  19
  20 #include "afs/sysincludes.h"    /* Standard vendor system headers */
  21 #include "afsincludes.h"        /* Afs-based standard headers */
  22 #include "afs/afs_stats.h"      /* statistics */
  23 #include "afs/afs_cbqueue.h"
  24 #include "afs/nfsclient.h"
  25 #include "afs/exporter.h"
  26 #include "afs/afs_osidnlc.h"
  27 #include "afs/afs_dynroot.h"
  28
  29 extern struct vcache *afs_globalVp;
  30
  31 afs_int32 afs_bkvolpref = 0;
  32 afs_int32 afs_bulkStatsDone;
  33 static int bulkStatCounter = 0; /* counter for bulk stat seq. numbers */
  34 int afs_fakestat_enable = 0;    /* 1: fakestat-all, 2: fakestat-crosscell */
  35
  36
  37 /* this would be faster if it did comparison as int32word, but would be
  38  * dependant on byte-order and alignment, and I haven't figured out
  39  * what "@sys" is in binary... */
  40 #define AFS_EQ_ATSYS(name) (((name)[0]=='@')&&((name)[1]=='s')&&((name)[2]=='y')&&((name)[3]=='s')&&(!(name)[4]))
  41
  42 /* call under write lock, evaluate mvid.target_root field from a mt pt.
  43  * avc is the vnode of the mount point object; must be write-locked.
  44  * advc is the vnode of the containing directory (optional; if NULL and
  45  *   EvalMountPoint succeeds, caller must initialize *avolpp->dotdot)
  46  * avolpp is where we return a pointer to the volume named by the mount pt, if success
  47  * areq is the identity of the caller.
  48  *
  49  * NOTE: this function returns a held volume structure in *volpp if it returns 0!
  50  */
  51 static int
  52 EvalMountData(char type, char *data, afs_uint32 states, afs_uint32 cellnum,
  53               struct volume **avolpp, struct vrequest *areq,
  54               afs_uint32 *acellidxp, afs_uint32 *avolnump,
  55               afs_uint32 *avnoidp, afs_uint32 *auniqp)
  56 {
  57     struct volume *tvp = 0;
  58     struct VenusFid tfid;
  59     struct cell *tcell;
  60     char *cpos, *volnamep = NULL;
  61     char *buf, *endptr;
  62     afs_int32 prefetch;         /* 1=>None  2=>RO  3=>BK */
  63     afs_int32 mtptCell, assocCell = 0, hac = 0;
  64     afs_int32 samecell, roname, len;
  65     afs_uint32 volid = 0, cellidx, vnoid = 0, uniq = 0;
  66
  67     /* Start by figuring out and finding the cell */
  68     cpos = afs_strchr(data, ':');       /* if cell name present */
  69     if (cpos) {
  70         afs_uint32 mtptCellnum;
  71         volnamep = cpos + 1;
  72         *cpos = 0;
  73         if ((afs_strtoi_r(data, &endptr, &mtptCellnum) == 0) &&
  74             (endptr == cpos)) {
  75             tcell = afs_GetCell(mtptCellnum, READ_LOCK);
  76         } else {
  77             tcell = afs_GetCellByName(data, READ_LOCK);
  78         }
  79         *cpos = ':';
  80     } else if (cellnum) {
  81         volnamep = data;
  82         tcell = afs_GetCell(cellnum, READ_LOCK);
  83     } else {
  84         /* No cellname or cellnum; return ENODEV */
  85         return ENODEV;
  86     }
  87     if (!tcell) {
  88         /* no cell found; return ENODEV */
  89         return ENODEV;
  90     }
  91
  92     cellidx = tcell->cellIndex;
  93     mtptCell = tcell->cellNum;  /* The cell for the mountpoint */
  94     if (tcell->lcellp) {
  95         hac = 1;                /* has associated cell */
  96         assocCell = tcell->lcellp->cellNum;     /* The associated cell */
  97     }
  98     afs_PutCell(tcell, READ_LOCK);
  99
 100     /* If there's nothing to look up, we can't proceed */
 101     if (!*volnamep)
 102         return ENODEV;
 103
 104     /* cell found. figure out volume */
 105     cpos = afs_strchr(volnamep, ':');
 106     if (cpos)
 107         *cpos = 0;
 108
 109     /* Look for an all-numeric volume ID */
 110     if ((afs_strtoi_r(volnamep, &endptr, &volid) == 0) &&
 111         ((endptr == cpos) || (!*endptr)))
 112     {
 113         /* Ok. Is there a vnode and uniq? */
 114         if (cpos) {
 115             char *vnodep = (char *)(cpos + 1);
 116             char *uniqp = NULL;
 117             if ((!*vnodep) /* no vnode after colon */
 118                 || !(uniqp = afs_strchr(vnodep, ':')) /* no colon for uniq */
 119                 || (!*(++uniqp)) /* no uniq after colon */
 120                 || (afs_strtoi_r(vnodep, &endptr, &vnoid) != 0) /* bad vno */
 121                 || (*endptr != ':') /* bad vnode field */
 122                 || (afs_strtoi_r(uniqp, &endptr, &uniq) != 0) /* bad uniq */
 123                 || (*endptr)) /* anything after uniq */
 124             {
 125                 *cpos = ':';
 126                 /* sorry. vnode and uniq, or nothing */
 127                 return ENODEV;
 128             }
 129         }
 130     } else
 131             volid = 0;
 132
 133     /*
 134      * If the volume ID was all-numeric, and they didn't ask for a
 135      * pointer to the volume structure, then just return the number
 136      * as-is.  This is currently only used for handling name lookups
 137      * in the dynamic mount directory.
 138      */
 139     if (volid && !avolpp) {
 140         if (cpos)
 141             *cpos = ':';
 142         goto done;
 143     }
 144
 145     /*
 146      * If the volume ID was all-numeric, and the type was '%', then
 147      * assume whoever made the mount point knew what they were doing,
 148      * and don't second-guess them by forcing use of a RW volume when
 149      * they gave the ID of something else.
 150      */
 151     if (volid && type == '%') {
 152         tfid.Fid.Volume = volid;        /* remember BK volume */
 153         tfid.Cell = mtptCell;
 154         tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK);   /* get the new one */
 155         if (cpos) /* one way or another we're done */
 156             *cpos = ':';
 157         if (!tvp)
 158             return ENODEV; /* afs_GetVolume failed; return ENODEV */
 159         goto done;
 160     }
 161
 162     /* Is volume name a "<n>.backup" or "<n>.readonly" name */
 163     len = strlen(volnamep);
 164     roname = ((len > 9) && (strcmp(&volnamep[len - 9], ".readonly") == 0))
 165         || ((len > 7) && (strcmp(&volnamep[len - 7], ".backup") == 0));
 166
 167     /* When we cross mountpoint, do we stay in the same cell */
 168     samecell = (cellnum == mtptCell) || (hac && (cellnum == assocCell));
 169
 170     /* Decide whether to prefetch the BK, or RO.  Also means we want the BK or
 171      * RO.
 172      * If this is a regular mountpoint with a RW volume name
 173      * - If BK preference is enabled AND we remain within the same cell AND
 174      *   start from a BK volume, then we will want to prefetch the BK volume.
 175      * - If we cross a cell boundary OR start from a RO volume, then we will
 176      *   want to prefetch the RO volume.
 177      */
 178     if ((type == '#') && !roname) {
 179         if (afs_bkvolpref && samecell && (states & CBackup))
 180             prefetch = 3;       /* Prefetch the BK */
 181         else if (!samecell || (states & CRO))
 182             prefetch = 2;       /* Prefetch the RO */
 183         else
 184             prefetch = 1;       /* Do not prefetch */
 185     } else {
 186         prefetch = 1;           /* Do not prefetch */
 187     }
 188
 189     /* Get the volume struct. Unless this volume name has ".readonly" or
 190      * ".backup" in it, this will get the volume struct for the RW volume.
 191      * The RO volume will be prefetched if requested (but not returned).
 192      * Set up to use volname first.
 193      */
 194     tvp = afs_GetVolumeByName(volnamep, mtptCell, prefetch, areq, WRITE_LOCK);
 195
 196     /* If no volume was found in this cell, try the associated linked cell */
 197     if (!tvp && hac && areq->volumeError) {
 198         tvp =
 199             afs_GetVolumeByName(volnamep, assocCell, prefetch, areq,
 200                                 WRITE_LOCK);
 201     }
 202
 203     /* Still not found. If we are looking for the RO, then perhaps the RW
 204      * doesn't exist? Try adding ".readonly" to volname and look for that.
 205      * Don't know why we do this. Would have still found it in above call - jpm.
 206      */
 207     if (!tvp && (prefetch == 2) && len < AFS_SMALLOCSIZ - 10) {
 208         buf = osi_AllocSmallSpace(len + 10);
 209
 210         strcpy(buf, volnamep);
 211         afs_strcat(buf, ".readonly");
 212
 213         tvp = afs_GetVolumeByName(buf, mtptCell, 1, areq, WRITE_LOCK);
 214
 215         /* Try the associated linked cell if failed */
 216         if (!tvp && hac && areq->volumeError) {
 217             tvp = afs_GetVolumeByName(buf, assocCell, 1, areq, WRITE_LOCK);
 218         }
 219         osi_FreeSmallSpace(buf);
 220     }
 221     /* done with volname */
 222     if (cpos)
 223         *cpos = ':';
 224     if (!tvp)
 225         return ENODEV;          /* Couldn't find the volume */
 226     else
 227         volid = tvp->volume;
 228
 229     /* Don't cross mountpoint from a BK to a BK volume */
 230     if ((states & CBackup) && (tvp->states & VBackup)) {
 231         afs_PutVolume(tvp, WRITE_LOCK);
 232         return ENODEV;
 233     }
 234
 235     /* If we want (prefetched) the BK and it exists, then drop the RW volume
 236      * and get the BK.
 237      * Otherwise, if we want (prefetched0 the RO and it exists, then drop the
 238      * RW volume and get the RO.
 239      * Otherwise, go with the RW.
 240      */
 241     if ((prefetch == 3) && tvp->backVol) {
 242         tfid.Fid.Volume = tvp->backVol; /* remember BK volume */
 243         tfid.Cell = tvp->cell;
 244         afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */
 245         tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK);   /* get the new one */
 246         if (!tvp)
 247             return ENODEV;      /* oops, can't do it */
 248     } else if ((prefetch >= 2) && tvp->roVol) {
 249         tfid.Fid.Volume = tvp->roVol;   /* remember RO volume */
 250         tfid.Cell = tvp->cell;
 251         afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */
 252         tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK);   /* get the new one */
 253         if (!tvp)
 254             return ENODEV;      /* oops, can't do it */
 255     }
 256
 257 done:
 258     if (acellidxp)
 259         *acellidxp = cellidx;
 260     if (avolnump)
 261         *avolnump = volid;
 262     if (avnoidp)
 263         *avnoidp = vnoid;
 264     if (auniqp)
 265         *auniqp = uniq;
 266     if (avolpp)
 267         *avolpp = tvp;
 268     else if (tvp)
 269         afs_PutVolume(tvp, WRITE_LOCK);
 270     return 0;
 271 }
 272
 273 int
 274 EvalMountPoint(struct vcache *avc, struct vcache *advc,
 275                struct volume **avolpp, struct vrequest *areq)
 276 {
 277     afs_int32 code;
 278     afs_uint32 avnoid, auniq;
 279
 280     AFS_STATCNT(EvalMountPoint);
 281 #ifdef notdef
 282     if (avc->mvid.target_root && (avc->f.states & CMValid))
 283         return 0;               /* done while racing */
 284 #endif
 285     *avolpp = NULL;
 286     code = afs_HandleLink(avc, areq);
 287     if (code)
 288         return code;
 289
 290     /* Determine which cell and volume the mointpoint goes to */
 291     code = EvalMountData(avc->linkData[0], avc->linkData + 1,
 292                          avc->f.states, avc->f.fid.Cell, avolpp, areq, 0, 0,
 293                          &avnoid, &auniq);
 294     if (code) return code;
 295
 296     if (!avnoid)
 297         avnoid = 1;
 298
 299     if (!auniq)
 300         auniq = 1;
 301
 302     if (avc->mvid.target_root == NULL)
 303         avc->mvid.target_root = osi_AllocSmallSpace(sizeof(struct VenusFid));
 304     avc->mvid.target_root->Cell = (*avolpp)->cell;
 305     avc->mvid.target_root->Fid.Volume = (*avolpp)->volume;
 306     avc->mvid.target_root->Fid.Vnode = avnoid;
 307     avc->mvid.target_root->Fid.Unique = auniq;
 308     avc->f.states |= CMValid;
 309
 310     /* Used to: if the mount point is stored within a backup volume,
 311      * then we should only update the parent pointer information if
 312      * there's none already set, so as to avoid updating a volume's ..
 313      * info with something in an OldFiles directory.
 314      *
 315      * Next two lines used to be under this if:
 316      *
 317      * if (!(avc->f.states & CBackup) || tvp->dotdot.Fid.Volume == 0)
 318      *
 319      * Now: update mount point back pointer on every call, so that we handle
 320      * multiple mount points better.  This way, when du tries to go back
 321      * via chddir(".."), it will end up exactly where it started, yet
 322      * cd'ing via a new path to a volume will reset the ".." pointer
 323      * to the new path.
 324      */
 325     (*avolpp)->mtpoint = avc->f.fid;    /* setup back pointer to mtpoint */
 326
 327     if (advc)
 328         (*avolpp)->dotdot = advc->f.fid;
 329
 330     return 0;
 331 }
 332
 333 /*
 334  * afs_InitFakeStat
 335  *
 336  * Must be called on an afs_fakestat_state object before calling
 337  * afs_EvalFakeStat or afs_PutFakeStat.  Calling afs_PutFakeStat
 338  * without calling afs_EvalFakeStat is legal, as long as this
 339  * function is called.
 340  */
 341 void
 342 afs_InitFakeStat(struct afs_fakestat_state *state)
 343 {
 344     if (!afs_fakestat_enable)
 345         return;
 346
 347     state->valid = 1;
 348     state->did_eval = 0;
 349     state->need_release = 0;
 350 }
 351
 352 /*
 353  * afs_EvalFakeStat_int
 354  *
 355  * The actual implementation of afs_EvalFakeStat and afs_TryEvalFakeStat,
 356  * which is called by those wrapper functions.
 357  *
 358  * Only issues RPCs if canblock is non-zero.
 359  */
 360 static int
 361 afs_EvalFakeStat_int(struct vcache **avcp, struct afs_fakestat_state *state,
 362                      struct vrequest *areq, int canblock)
 363 {
 364     struct vcache *tvc, *root_vp;
 365     struct volume *tvolp = NULL;
 366     int code = 0;
 367
 368     if (!afs_fakestat_enable)
 369         return 0;
 370
 371     osi_Assert(state->valid == 1);
 372     osi_Assert(state->did_eval == 0);
 373     state->did_eval = 1;
 374
 375     tvc = *avcp;
 376     if (tvc->mvstat != AFS_MVSTAT_MTPT)
 377         return 0;
 378
 379     if (canblock) {
 380         /* Is the call to VerifyVCache really necessary? */
 381         code = afs_VerifyVCache(tvc, areq);
 382         if (code)
 383             goto done;
 384
 385         ObtainWriteLock(&tvc->lock, 599);
 386         code = EvalMountPoint(tvc, NULL, &tvolp, areq);
 387         ReleaseWriteLock(&tvc->lock);
 388         if (code)
 389             goto done;
 390         if (tvolp) {
 391             tvolp->dotdot = tvc->f.fid;
 392             tvolp->dotdot.Fid.Vnode = tvc->f.parent.vnode;
 393             tvolp->dotdot.Fid.Unique = tvc->f.parent.unique;
 394         }
 395     }
 396     if (tvc->mvid.target_root && (tvc->f.states & CMValid)) {
 397         if (!canblock) {
 398             afs_int32 retry;
 399
 400             do {
 401                 retry = 0;
 402                 ObtainWriteLock(&afs_xvcache, 597);
 403                 root_vp = afs_FindVCache(tvc->mvid.target_root, &retry, IS_WLOCK);
 404                 if (root_vp && retry) {
 405                     ReleaseWriteLock(&afs_xvcache);
 406                     afs_PutVCache(root_vp);
 407                 }
 408             } while (root_vp && retry);
 409             ReleaseWriteLock(&afs_xvcache);
 410         } else {
 411             root_vp = afs_GetVCache(tvc->mvid.target_root, areq, NULL, NULL);
 412         }
 413         if (!root_vp) {
 414             code = canblock ? EIO : 0;
 415             goto done;
 416         }
 417 #ifdef AFS_DARWIN80_ENV
 418         root_vp->f.m.Type = VDIR;
 419         AFS_GUNLOCK();
 420         code = afs_darwin_finalizevnode(root_vp, NULL, NULL, 0, 0);
 421         AFS_GLOCK();
 422         if (code) goto done;
 423         vnode_ref(AFSTOV(root_vp));
 424 #endif
 425         if (tvolp && !afs_InReadDir(root_vp)) {
 426             /* Is this always kosher?  Perhaps we should instead use
 427              * NBObtainWriteLock to avoid potential deadlock.
 428              */
 429             ObtainWriteLock(&root_vp->lock, 598);
 430             if (!root_vp->mvid.parent)
 431                 root_vp->mvid.parent = osi_AllocSmallSpace(sizeof(struct VenusFid));
 432             *root_vp->mvid.parent = tvolp->dotdot;
 433             ReleaseWriteLock(&root_vp->lock);
 434         }
 435         state->need_release = 1;
 436         state->root_vp = root_vp;
 437         *avcp = root_vp;
 438         code = 0;
 439     } else {
 440         code = canblock ? EIO : 0;
 441     }
 442
 443   done:
 444     if (tvolp)
 445         afs_PutVolume(tvolp, WRITE_LOCK);
 446     return code;
 447 }
 448
 449 /*
 450  * afs_EvalFakeStat
 451  *
 452  * Automatically does the equivalent of EvalMountPoint for vcache entries
 453  * which are mount points.  Remembers enough state to properly release
 454  * the volume root vcache when afs_PutFakeStat() is called.
 455  *
 456  * State variable must be initialized by afs_InitFakeState() beforehand.
 457  *
 458  * Returns 0 when everything succeeds and *avcp points to the vcache entry
 459  * that should be used for the real vnode operation.  Returns non-zero if
 460  * something goes wrong and the error code should be returned to the user.
 461  */
 462 int
 463 afs_EvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state,
 464                  struct vrequest *areq)
 465 {
 466     return afs_EvalFakeStat_int(avcp, state, areq, 1);
 467 }
 468
 469 /*
 470  * afs_TryEvalFakeStat
 471  *
 472  * Same as afs_EvalFakeStat, but tries not to talk to remote servers
 473  * and only evaluate the mount point if all the data is already in
 474  * local caches.
 475  *
 476  * Returns 0 if everything succeeds and *avcp points to a valid
 477  * vcache entry (possibly evaluated).
 478  */
 479 int
 480 afs_TryEvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state,
 481                     struct vrequest *areq)
 482 {
 483     return afs_EvalFakeStat_int(avcp, state, areq, 0);
 484 }
 485
 486 /*
 487  * afs_PutFakeStat
 488  *
 489  * Perform any necessary cleanup at the end of a vnode op, given that
 490  * afs_InitFakeStat was previously called with this state.
 491  */
 492 void
 493 afs_PutFakeStat(struct afs_fakestat_state *state)
 494 {
 495     if (!afs_fakestat_enable)
 496         return;
 497
 498     osi_Assert(state->valid == 1);
 499     if (state->need_release)
 500         afs_PutVCache(state->root_vp);
 501     state->valid = 0;
 502 }
 503
 504 int
 505 afs_ENameOK(char *aname)
 506 {
 507     int tlen;
 508
 509     AFS_STATCNT(ENameOK);
 510     tlen = strlen(aname);
 511     if (tlen >= 4 && strcmp(aname + tlen - 4, "@sys") == 0)
 512         return 0;
 513     return 1;
 514 }
 515
 516 static int
 517 afs_getsysname(struct vrequest *areq, struct vcache *adp,
 518                char *bufp, int *num, char **sysnamelist[])
 519 {
 520     struct unixuser *au;
 521     afs_int32 error;
 522
 523     AFS_STATCNT(getsysname);
 524
 525     *sysnamelist = afs_sysnamelist;
 526
 527     if (!afs_nfsexporter)
 528         strcpy(bufp, (*sysnamelist)[0]);
 529     else {
 530         au = afs_GetUser(areq->uid, adp->f.fid.Cell, READ_LOCK);
 531         if (au->exporter) {
 532             error = EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, num, 0);
 533             if (error) {
 534                 strcpy(bufp, "@sys");
 535                 afs_PutUser(au, READ_LOCK);
 536                 return -1;
 537             } else {
 538                 strcpy(bufp, (*sysnamelist)[0]);
 539             }
 540         } else
 541             strcpy(bufp, afs_sysname);
 542         afs_PutUser(au, READ_LOCK);
 543     }
 544     return 0;
 545 }
 546
 547 void
 548 Check_AtSys(struct vcache *avc, const char *aname,
 549             struct sysname_info *state, struct vrequest *areq)
 550 {
 551     int num = 0;
 552     char **sysnamelist[MAXNUMSYSNAMES];
 553
 554     if (AFS_EQ_ATSYS(aname)) {
 555         state->offset = 0;
 556         state->name = osi_AllocLargeSpace(MAXSYSNAME);
 557         state->allocked = 1;
 558         state->index =
 559             afs_getsysname(areq, avc, state->name, &num, sysnamelist);
 560     } else {
 561         state->offset = -1;
 562         state->allocked = 0;
 563         state->index = 0;
 564         state->name = (char *)aname;
 565     }
 566 }
 567
 568 int
 569 Next_AtSys(struct vcache *avc, struct vrequest *areq,
 570            struct sysname_info *state)
 571 {
 572     int num = afs_sysnamecount;
 573     char **sysnamelist[MAXNUMSYSNAMES];
 574
 575     if (state->index == -1)
 576         return 0;               /* No list */
 577
 578     /* Check for the initial state of aname != "@sys" in Check_AtSys */
 579     if (state->offset == -1 && state->allocked == 0) {
 580         char *tname;
 581
 582         /* Check for .*@sys */
 583         for (tname = state->name; *tname; tname++)
 584             /*Move to the end of the string */ ;
 585
 586         if ((tname > state->name + 4) && (AFS_EQ_ATSYS(tname - 4))) {
 587             state->offset = (tname - 4) - state->name;
 588             tname = osi_AllocLargeSpace(AFS_LRALLOCSIZ);
 589             strncpy(tname, state->name, state->offset);
 590             state->name = tname;
 591             state->allocked = 1;
 592             num = 0;
 593             state->index =
 594                 afs_getsysname(areq, avc, state->name + state->offset, &num,
 595                                sysnamelist);
 596             return 1;
 597         } else
 598             return 0;           /* .*@sys doesn't match either */
 599     } else {
 600         struct unixuser *au;
 601         afs_int32 error;
 602
 603         *sysnamelist = afs_sysnamelist;
 604
 605         if (afs_nfsexporter) {
 606             au = afs_GetUser(areq->uid, avc->f.fid.Cell, READ_LOCK);
 607             if (au->exporter) {
 608                 error =
 609                     EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, &num, 0);
 610                 if (error) {
 611                     afs_PutUser(au, READ_LOCK);
 612                     return 0;
 613                 }
 614             }
 615             afs_PutUser(au, READ_LOCK);
 616         }
 617         if (++(state->index) >= num || !(*sysnamelist)[(unsigned int)state->index])
 618             return 0;           /* end of list */
 619     }
 620     strcpy(state->name + state->offset, (*sysnamelist)[(unsigned int)state->index]);
 621     return 1;
 622 }
 623
 624 static int
 625 afs_CheckBulkStatus(struct afs_conn *tc, int nFids, AFSBulkStats *statParm,
 626                     AFSCBs *cbParm)
 627 {
 628     int i;
 629     int code;
 630
 631     if (statParm->AFSBulkStats_len != nFids || cbParm->AFSCBs_len != nFids) {
 632         afs_warn("afs: BulkFetchStatus length %u/%u, expected %u\n",
 633                  (unsigned)statParm->AFSBulkStats_len,
 634                  (unsigned)cbParm->AFSCBs_len, nFids);
 635         afs_BadFetchStatus(tc);
 636         return VBUSY;
 637     }
 638     for (i = 0; i < nFids; i++) {
 639         if (statParm->AFSBulkStats_val[i].errorCode) {
 640             continue;
 641         }
 642         code = afs_CheckFetchStatus(tc, &statParm->AFSBulkStats_val[i]);
 643         if (code) {
 644             return code;
 645         }
 646     }
 647
 648     return 0;
 649 }
 650
 651 extern int BlobScan(struct dcache * afile, afs_int32 ablob, afs_int32 *ablobOut);
 652
 653 /* called with an unlocked directory and directory cookie.  Areqp
 654  * describes who is making the call.
 655  * Scans the next N (about 30, typically) directory entries, and does
 656  * a bulk stat call to stat them all.
 657  *
 658  * Must be very careful when merging in RPC responses, since we dont
 659  * want to overwrite newer info that was added by a file system mutating
 660  * call that ran concurrently with our bulk stat call.
 661  *
 662  * We do that, as described below, by not merging in our info (always
 663  * safe to skip the merge) if the status info is valid in the vcache entry.
 664  *
 665  * If adapt ever implements the bulk stat RPC, then this code will need to
 666  * ensure that vcaches created for failed RPC's to older servers have the
 667  * CForeign bit set.
 668  */
 669 static struct vcache *BStvc = NULL;
 670
 671 int
 672 afs_DoBulkStat(struct vcache *adp, long dirCookie, struct vrequest *areqp)
 673 {
 674     int nentries;               /* # of entries to prefetch */
 675     int nskip;                  /* # of slots in the LRU queue to skip */
 676 #ifdef AFS_DARWIN80_ENV
 677     int npasses = 0;
 678     struct vnode *lruvp;
 679 #endif
 680     struct vcache *lruvcp;      /* vcache ptr of our goal pos in LRU queue */
 681     struct dcache *dcp;         /* chunk containing the dir block */
 682     afs_size_t temp;            /* temp for holding chunk length, &c. */
 683     struct AFSFid *fidsp;       /* file IDs were collecting */
 684     struct AFSCallBack *cbsp;   /* call back pointers */
 685     struct AFSCallBack *tcbp;   /* temp callback ptr */
 686     struct AFSFetchStatus *statsp;      /* file status info */
 687     struct AFSVolSync volSync;  /* vol sync return info */
 688     struct vcache *tvcp;        /* temp vcp */
 689     struct afs_q *tq;           /* temp queue variable */
 690     AFSCBFids fidParm;          /* file ID parm for bulk stat */
 691     AFSBulkStats statParm;      /* stat info parm for bulk stat */
 692     int fidIndex = 0;           /* which file were stating */
 693     struct afs_conn *tcp = 0;   /* conn for call */
 694     AFSCBs cbParm;              /* callback parm for bulk stat */
 695     struct server *hostp = 0;   /* host we got callback from */
 696     long startTime;             /* time we started the call,
 697                                  * for callback expiration base
 698                                  */
 699 #if defined(AFS_DARWIN_ENV)
 700     int ftype[4] = {VNON, VREG, VDIR, VLNK}; /* verify type is as expected */
 701 #endif
 702     afs_size_t statSeqNo = 0;   /* Valued of file size to detect races */
 703     int code;                   /* error code */
 704     afs_int32 newIndex;         /* new index in the dir */
 705     struct DirBuffer entry;     /* Buffer for dir manipulation */
 706     struct DirEntry *dirEntryp; /* dir entry we are examining */
 707     int i;
 708     struct VenusFid afid;       /* file ID we are using now */
 709     struct VenusFid tfid;       /* another temp. file ID */
 710     afs_int32 retry;            /* handle low-level SGI MP race conditions */
 711     long volStates;             /* flags from vol structure */
 712     struct volume *volp = 0;    /* volume ptr */
 713     struct VenusFid dotdot = {0, {0, 0, 0}};
 714     int flagIndex = 0;          /* First file with bulk fetch flag set */
 715     struct rx_connection *rxconn;
 716     XSTATS_DECLS;
 717     dotdot.Cell = 0;
 718     dotdot.Fid.Unique = 0;
 719     dotdot.Fid.Vnode = 0;
 720
 721     /* first compute some basic parameters.  We dont want to prefetch more
 722      * than a fraction of the cache in any given call, and we want to preserve
 723      * a portion of the LRU queue in any event, so as to avoid thrashing
 724      * the entire stat cache (we will at least leave some of it alone).
 725      * presently dont stat more than 1/8 the cache in any one call.      */
 726     nentries = afs_cacheStats / 8;
 727
 728     /* dont bother prefetching more than one calls worth of info */
 729     if (nentries > AFSCBMAX)
 730         nentries = AFSCBMAX;
 731
 732     /* heuristic to make sure that things fit in 4K.  This means that
 733      * we shouldnt make it any bigger than 47 entries.  I am typically
 734      * going to keep it a little lower, since we don't want to load
 735      * too much of the stat cache.
 736      */
 737     if (nentries > 30)
 738         nentries = 30;
 739
 740     /* now, to reduce the stack size, well allocate two 4K blocks,
 741      * one for fids and callbacks, and one for stat info.  Well set
 742      * up our pointers to the memory from there, too.
 743      */
 744     statsp = osi_Alloc(AFSCBMAX * sizeof(AFSFetchStatus));
 745     fidsp = osi_AllocLargeSpace(nentries * sizeof(AFSFid));
 746     cbsp = osi_Alloc(AFSCBMAX * sizeof(AFSCallBack));
 747
 748     /* next, we must iterate over the directory, starting from the specified
 749      * cookie offset (dirCookie), and counting out nentries file entries.
 750      * We skip files that already have stat cache entries, since we
 751      * dont want to bulk stat files that are already in the cache.
 752      */
 753   tagain:
 754     code = afs_VerifyVCache(adp, areqp);
 755     if (code)
 756         goto done2;
 757
 758     dcp = afs_GetDCache(adp, (afs_size_t) 0, areqp, &temp, &temp, 1);
 759     if (!dcp) {
 760         code = EIO;
 761         goto done2;
 762     }
 763
 764     /* lock the directory cache entry */
 765     ObtainReadLock(&adp->lock);
 766     ObtainReadLock(&dcp->lock);
 767
 768     /*
 769      * Make sure that the data in the cache is current. There are two
 770      * cases we need to worry about:
 771      * 1. The cache data is being fetched by another process.
 772      * 2. The cache data is no longer valid
 773      */
 774     while ((adp->f.states & CStatd)
 775            && (dcp->dflags & DFFetching)
 776            && hsame(adp->f.m.DataVersion, dcp->f.versionNo)) {
 777         afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING,
 778                    __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, dcp,
 779                    ICL_TYPE_INT32, dcp->dflags);
 780         ReleaseReadLock(&dcp->lock);
 781         ReleaseReadLock(&adp->lock);
 782         afs_osi_Sleep(&dcp->validPos);
 783         ObtainReadLock(&adp->lock);
 784         ObtainReadLock(&dcp->lock);
 785     }
 786     if (!(adp->f.states & CStatd)
 787         || !hsame(adp->f.m.DataVersion, dcp->f.versionNo)) {
 788         ReleaseReadLock(&dcp->lock);
 789         ReleaseReadLock(&adp->lock);
 790         afs_PutDCache(dcp);
 791         goto tagain;
 792     }
 793
 794     /* Generate a sequence number so we can tell whether we should
 795      * store the attributes when processing the response. This number is
 796      * stored in the file size when we set the CBulkFetching bit. If the
 797      * CBulkFetching is still set and this value hasn't changed, then
 798      * we know we were the last to set CBulkFetching bit for this file,
 799      * and it is safe to set the status information for this file.
 800      */
 801     statSeqNo = bulkStatCounter++;
 802     /* ensure against wrapping */
 803     if (statSeqNo == 0)
 804         statSeqNo = bulkStatCounter++;
 805
 806     /* now we have dir data in the cache, so scan the dir page */
 807     fidIndex = 0;
 808     flagIndex = 0;
 809     while (1) {                 /* Should probably have some constant bound */
 810         /* look for first safe entry to examine in the directory.  BlobScan
 811          * looks for a the 1st allocated dir after the dirCookie slot.
 812          */
 813         code = BlobScan(dcp, (dirCookie >> 5), &newIndex);
 814         if (code || newIndex == 0)
 815             break;
 816
 817         /* remember the updated directory cookie */
 818         dirCookie = newIndex << 5;
 819
 820         /* get a ptr to the dir entry */
 821         code = afs_dir_GetBlob(dcp, newIndex, &entry);
 822         if (code)
 823             break;
 824         dirEntryp = (struct DirEntry *)entry.data;
 825
 826         /* dont copy more than we have room for */
 827         if (fidIndex >= nentries) {
 828             DRelease(&entry, 0);
 829             break;
 830         }
 831
 832         /* now, if the dir entry looks good, copy it out to our list.  Vnode
 833          * 0 means deleted, although it should also be free were it deleted.
 834          */
 835         if (dirEntryp->fid.vnode != 0) {
 836             /* dont copy entries we have in our cache.  This check will
 837              * also make us skip "." and probably "..", unless it has
 838              * disappeared from the cache since we did our namei call.
 839              */
 840             tfid.Cell = adp->f.fid.Cell;
 841             tfid.Fid.Volume = adp->f.fid.Fid.Volume;
 842             tfid.Fid.Vnode = ntohl(dirEntryp->fid.vnode);
 843             tfid.Fid.Unique = ntohl(dirEntryp->fid.vunique);
 844             do {
 845                 retry = 0;
 846                 ObtainWriteLock(&afs_xvcache, 130);
 847                 tvcp = afs_FindVCache(&tfid, &retry, IS_WLOCK /* no stats | LRU */ );
 848                 if (tvcp && retry) {
 849                     ReleaseWriteLock(&afs_xvcache);
 850                     afs_PutVCache(tvcp);
 851                 }
 852             } while (tvcp && retry);
 853             if (!tvcp) {        /* otherwise, create manually */
 854                 tvcp = afs_NewBulkVCache(&tfid, hostp, statSeqNo);
 855                 if (tvcp)
 856                 {
 857                     ObtainWriteLock(&tvcp->lock, 505);
 858 #ifdef AFS_DARWIN80_ENV
 859                     /* use even/odd hack to guess file versus dir.
 860                        let links be reaped. oh well. */
 861                     if (dirEntryp->fid.vnode & 1)
 862                         tvcp->f.m.Type = VDIR;
 863                     else
 864                         tvcp->f.m.Type = VREG;
 865                     /* finalize to a best guess */
 866                     afs_darwin_finalizevnode(tvcp, AFSTOV(adp), NULL, 0, 1);
 867                     /* re-acquire usecount that finalizevnode disposed of */
 868                     vnode_ref(AFSTOV(tvcp));
 869 #endif
 870                     ReleaseWriteLock(&afs_xvcache);
 871                     afs_RemoveVCB(&tfid);
 872                     ReleaseWriteLock(&tvcp->lock);
 873                 } else {
 874                     ReleaseWriteLock(&afs_xvcache);
 875                 }
 876             } else {
 877                 ReleaseWriteLock(&afs_xvcache);
 878             }
 879             if (!tvcp)
 880             {
 881                 DRelease(&entry, 0);
 882                 ReleaseReadLock(&dcp->lock);
 883                 ReleaseReadLock(&adp->lock);
 884                 afs_PutDCache(dcp);
 885                 goto done;      /* can happen if afs_NewVCache fails */
 886             }
 887
 888             /* WARNING: afs_DoBulkStat uses the Length field to store a
 889              * sequence number for each bulk status request. Under no
 890              * circumstances should afs_DoBulkStat store a sequence number
 891              * if the new length will be ignored when afs_ProcessFS is
 892              * called with new stats. */
 893 #ifdef AFS_SGI_ENV
 894             if (!(tvcp->f.states & CStatd)
 895                 && (!((tvcp->f.states & CBulkFetching) &&
 896                       (tvcp->f.m.Length != statSeqNo)))
 897                 && (tvcp->execsOrWriters <= 0)
 898                 && !afs_DirtyPages(tvcp)
 899                 && !AFS_VN_MAPPED((vnode_t *) tvcp))
 900 #else
 901             if (!(tvcp->f.states & CStatd)
 902                 && (!((tvcp->f.states & CBulkFetching) &&
 903                       (tvcp->f.m.Length != statSeqNo)))
 904                 && (tvcp->execsOrWriters <= 0)
 905                 && !afs_DirtyPages(tvcp))
 906 #endif
 907
 908             {
 909                 /* this entry doesnt exist in the cache, and is not
 910                  * already being fetched by someone else, so add it to the
 911                  * list of file IDs to obtain.
 912                  *
 913                  * We detect a callback breaking race condition by checking the
 914                  * CBulkFetching state bit and the value in the file size.
 915                  * It is safe to set the status only if the CBulkFetching
 916                  * flag is still set and the value in the file size does
 917                  * not change. NewBulkVCache sets us up for the new ones.
 918                  * Set up the rest here.
 919                  *
 920                  * Don't fetch status for dirty files. We need to
 921                  * preserve the value of the file size. We could
 922                  * flush the pages, but it wouldn't be worthwhile.
 923                  */
 924                 if (!(tvcp->f.states & CBulkFetching)) {
 925                     tvcp->f.states |= CBulkFetching;
 926                     tvcp->f.m.Length = statSeqNo;
 927                 }
 928                 memcpy((char *)(fidsp + fidIndex), (char *)&tfid.Fid,
 929                        sizeof(*fidsp));
 930                 fidIndex++;
 931             }
 932             afs_PutVCache(tvcp);
 933         }
 934
 935         /* if dir vnode has non-zero entry */
 936         /* move to the next dir entry by adding in the # of entries
 937          * used by this dir entry.
 938          */
 939         temp = afs_dir_NameBlobs(dirEntryp->name) << 5;
 940         DRelease(&entry, 0);
 941         if (temp <= 0)
 942             break;
 943         dirCookie += temp;
 944     }                           /* while loop over all dir entries */
 945
 946     /* now release the dir lock and prepare to make the bulk RPC */
 947     ReleaseReadLock(&dcp->lock);
 948     ReleaseReadLock(&adp->lock);
 949
 950     /* release the chunk */
 951     afs_PutDCache(dcp);
 952
 953     /* dont make a null call */
 954     if (fidIndex == 0)
 955         goto done;
 956
 957     do {
 958         /* setup the RPC parm structures */
 959         fidParm.AFSCBFids_len = fidIndex;
 960         fidParm.AFSCBFids_val = fidsp;
 961         statParm.AFSBulkStats_len = fidIndex;
 962         statParm.AFSBulkStats_val = statsp;
 963         cbParm.AFSCBs_len = fidIndex;
 964         cbParm.AFSCBs_val = cbsp;
 965
 966         /* start the timer; callback expirations are relative to this */
 967         startTime = osi_Time();
 968
 969         tcp = afs_Conn(&adp->f.fid, areqp, SHARED_LOCK, &rxconn);
 970         if (tcp) {
 971             hostp = tcp->parent->srvr->server;
 972
 973             for (i = 0; i < fidIndex; i++) {
 974                 /* we must set tvcp->callback before the BulkStatus call, so
 975                  * we can detect concurrent InitCallBackState's */
 976
 977                 afid.Cell = adp->f.fid.Cell;
 978                 afid.Fid.Volume = adp->f.fid.Fid.Volume;
 979                 afid.Fid.Vnode = fidsp[i].Vnode;
 980                 afid.Fid.Unique = fidsp[i].Unique;
 981
 982                 do {
 983                     retry = 0;
 984                     ObtainReadLock(&afs_xvcache);
 985                     tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */);
 986                     ReleaseReadLock(&afs_xvcache);
 987                 } while (tvcp && retry);
 988
 989                 if (!tvcp) {
 990                     continue;
 991                 }
 992
 993                 if ((tvcp->f.states & CBulkFetching) &&
 994                      (tvcp->f.m.Length == statSeqNo)) {
 995                     tvcp->callback = hostp;
 996                 }
 997
 998                 afs_PutVCache(tvcp);
 999                 tvcp = NULL;
1000             }
1001
1002             XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_BULKSTATUS);
1003
1004             if (!(tcp->parent->srvr->server->flags & SNO_INLINEBULK)) {
1005                 RX_AFS_GUNLOCK();
1006                 code =
1007                     RXAFS_InlineBulkStatus(rxconn, &fidParm, &statParm,
1008                                            &cbParm, &volSync);
1009                 RX_AFS_GLOCK();
1010                 if (code == RXGEN_OPCODE) {
1011                     tcp->parent->srvr->server->flags |= SNO_INLINEBULK;
1012                     RX_AFS_GUNLOCK();
1013                     code =
1014                         RXAFS_BulkStatus(rxconn, &fidParm, &statParm,
1015                                          &cbParm, &volSync);
1016                     RX_AFS_GLOCK();
1017                 }
1018             } else {
1019                 RX_AFS_GUNLOCK();
1020                 code =
1021                     RXAFS_BulkStatus(rxconn, &fidParm, &statParm, &cbParm,
1022                                      &volSync);
1023                 RX_AFS_GLOCK();
1024             }
1025             XSTATS_END_TIME;
1026
1027             if (code == 0) {
1028                 code = afs_CheckBulkStatus(tcp, fidIndex, &statParm, &cbParm);
1029             }
1030         } else
1031             code = -1;
1032         /* make sure we give afs_Analyze a chance to retry,
1033          * but if the RPC succeeded we may have entries to merge.
1034          * if we wipe code with one entry's status we get bogus failures.
1035          */
1036     } while (afs_Analyze
1037              (tcp, rxconn, code ? code : (&statsp[0])->errorCode,
1038               &adp->f.fid, areqp, AFS_STATS_FS_RPCIDX_BULKSTATUS,
1039               SHARED_LOCK, NULL));
1040
1041     /* now, if we didnt get the info, bail out. */
1042     if (code)
1043         goto done;
1044
1045     /* we need vol flags to create the entries properly */
1046     dotdot.Fid.Volume = 0;
1047     volp = afs_GetVolume(&adp->f.fid, areqp, READ_LOCK);
1048     if (volp) {
1049         volStates = volp->states;
1050         if (volp->dotdot.Fid.Volume != 0)
1051             dotdot = volp->dotdot;
1052     } else
1053         volStates = 0;
1054
1055     /* find the place to merge the info into  We do this by skipping
1056      * nskip entries in the LRU queue.  The more we skip, the more
1057      * we preserve, since the head of the VLRU queue is the most recently
1058      * referenced file.
1059      */
1060   reskip:
1061     nskip = afs_cacheStats / 2; /* preserved fraction of the cache */
1062     ObtainReadLock(&afs_xvcache);
1063 #ifdef AFS_DARWIN80_ENV
1064  reskip2:
1065 #endif
1066     if (QEmpty(&VLRU)) {
1067         /* actually a serious error, probably should panic. Probably will
1068          * panic soon, oh well. */
1069         ReleaseReadLock(&afs_xvcache);
1070         afs_warnuser("afs_DoBulkStat: VLRU empty!");
1071         goto done;
1072     }
1073     if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
1074         refpanic("Bulkstat VLRU inconsistent");
1075     }
1076     for (tq = VLRU.next; tq != &VLRU; tq = QNext(tq)) {
1077         if (--nskip <= 0) {
1078 #ifdef AFS_DARWIN80_ENV
1079             if ((!(QTOV(tq)->f.states & CDeadVnode)&&!(QTOV(tq)->f.states & CVInit)))
1080 #endif
1081                 break;
1082         }
1083         if (QNext(QPrev(tq)) != tq) {
1084             BStvc = QTOV(tq);
1085             refpanic("BulkStat VLRU inconsistent");
1086         }
1087     }
1088     if (tq != &VLRU)
1089         lruvcp = QTOV(tq);
1090     else
1091         lruvcp = QTOV(VLRU.next);
1092
1093     /* now we have to hold this entry, so that it does not get moved
1094      * into the free list while we're running.  It could still get
1095      * moved within the lru queue, but hopefully that will be rare; it
1096      * doesn't hurt nearly as much.
1097      */
1098     retry = 0;
1099 #ifdef AFS_DARWIN80_ENV
1100     if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit))) {
1101         if (npasses == 0) {
1102             nskip = 1;
1103             npasses++;
1104             goto reskip2;
1105         } else
1106             panic("Can't find non-dead vnode in VLRU\n");
1107     }
1108     lruvp = AFSTOV(lruvcp);
1109     if (vnode_get(lruvp))       /* this bumps ref count */
1110         retry = 1;
1111     else if (vnode_ref(lruvp)) {
1112         AFS_GUNLOCK();
1113         /* AFSTOV(lruvcp) may be NULL */
1114         vnode_put(lruvp);
1115         AFS_GLOCK();
1116         retry = 1;
1117     }
1118 #else
1119     osi_vnhold(lruvcp, &retry);
1120 #endif
1121     ReleaseReadLock(&afs_xvcache);      /* could be read lock */
1122     if (retry)
1123         goto reskip;
1124
1125     /* otherwise, merge in the info.  We have to be quite careful here,
1126      * since we need to ensure that we don't merge old info over newer
1127      * stuff in a stat cache entry.  We're very conservative here: we don't
1128      * do the merge at all unless we ourselves create the stat cache
1129      * entry.  That's pretty safe, and should work pretty well, since we
1130      * typically expect to do the stat cache creation ourselves.
1131      *
1132      * We also have to take into account racing token revocations.
1133      */
1134     for (i = 0; i < fidIndex; i++) {
1135         if ((&statsp[i])->errorCode)
1136             continue;
1137         afid.Cell = adp->f.fid.Cell;
1138         afid.Fid.Volume = adp->f.fid.Fid.Volume;
1139         afid.Fid.Vnode = fidsp[i].Vnode;
1140         afid.Fid.Unique = fidsp[i].Unique;
1141         do {
1142             retry = 0;
1143             ObtainReadLock(&afs_xvcache);
1144             tvcp = afs_FindVCache(&afid, &retry, 0/* !stats&!lru */);
1145             ReleaseReadLock(&afs_xvcache);
1146         } while (tvcp && retry);
1147
1148         /* The entry may no longer exist */
1149         if (tvcp == NULL) {
1150             continue;
1151         }
1152
1153         /* now we have the entry held, but we need to fill it in */
1154         ObtainWriteLock(&tvcp->lock, 131);
1155
1156         /* if CBulkFetching is not set, or if the file size no longer
1157          * matches the value we placed there when we set the CBulkFetching
1158          * flag, then someone else has done something with this node,
1159          * and we may not have the latest status information for this
1160          * file.  Leave the entry alone. There's also a file type
1161          * change here, for OSX bulkstat support.
1162          */
1163         if (!(tvcp->f.states & CBulkFetching)
1164             || (tvcp->f.m.Length != statSeqNo)
1165 #if defined(AFS_DARWIN_ENV)
1166             || (ftype[(&statsp[i])->FileType] != vType(tvcp))
1167 #endif
1168            ) {
1169             flagIndex++;
1170             ReleaseWriteLock(&tvcp->lock);
1171             afs_PutVCache(tvcp);
1172             continue;
1173         }
1174
1175         /* now copy ".." entry back out of volume structure, if necessary */
1176         if (tvcp->mvstat == AFS_MVSTAT_ROOT && (dotdot.Fid.Volume != 0)) {
1177             if (!tvcp->mvid.parent)
1178                 tvcp->mvid.parent = osi_AllocSmallSpace(sizeof(struct VenusFid));
1179             *tvcp->mvid.parent = dotdot;
1180         }
1181
1182 #ifdef AFS_DARWIN80_ENV
1183         if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit)))
1184             panic("vlru control point went dead\n");
1185 #endif
1186
1187         ObtainWriteLock(&afs_xvcache, 132);
1188         if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
1189             refpanic("Bulkstat VLRU inconsistent2");
1190         }
1191         if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq)
1192             || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) {
1193             refpanic("Bulkstat VLRU inconsistent4");
1194         }
1195         if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq)
1196             || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) {
1197             refpanic("Bulkstat VLRU inconsistent5");
1198         }
1199
1200         if (tvcp != lruvcp) {   /* if they are == don't move it, don't corrupt vlru */
1201             QRemove(&tvcp->vlruq);
1202             QAdd(&lruvcp->vlruq, &tvcp->vlruq);
1203         }
1204
1205         if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
1206             refpanic("Bulkstat VLRU inconsistent3");
1207         }
1208         if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq)
1209             || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) {
1210             refpanic("Bulkstat VLRU inconsistent5");
1211         }
1212         if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq)
1213             || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) {
1214             refpanic("Bulkstat VLRU inconsistent6");
1215         }
1216         ReleaseWriteLock(&afs_xvcache);
1217
1218         ObtainWriteLock(&afs_xcbhash, 494);
1219
1220         /* We need to check the flags again. We may have missed
1221          * something while we were waiting for a lock.
1222          */
1223         if (!(tvcp->f.states & CBulkFetching) || (tvcp->f.m.Length != statSeqNo)) {
1224             flagIndex++;
1225             ReleaseWriteLock(&tvcp->lock);
1226             ReleaseWriteLock(&afs_xcbhash);
1227             afs_PutVCache(tvcp);
1228             continue;
1229         }
1230
1231         /* now merge in the resulting status back into the vnode.
1232          * We only do this if the entry looks clear.
1233          */
1234         afs_ProcessFS(tvcp, &statsp[i], areqp);
1235 #if defined(AFS_LINUX22_ENV)
1236         afs_fill_inode(AFSTOV(tvcp), NULL);     /* reset inode operations */
1237 #endif
1238
1239         /* do some accounting for bulk stats: mark this entry as
1240          * loaded, so we can tell if we use it before it gets
1241          * recycled.
1242          */
1243         tvcp->f.states |= CBulkStat;
1244         tvcp->f.states &= ~CBulkFetching;
1245         flagIndex++;
1246         afs_bulkStatsDone++;
1247
1248         /* merge in vol info */
1249         if (volStates & VRO)
1250             tvcp->f.states |= CRO;
1251         if (volStates & VBackup)
1252             tvcp->f.states |= CBackup;
1253         if (volStates & VForeign)
1254             tvcp->f.states |= CForeign;
1255
1256         /* merge in the callback info */
1257         tvcp->f.states |= CTruth;
1258
1259         /* get ptr to the callback we are interested in */
1260         tcbp = cbsp + i;
1261
1262         if (tcbp->ExpirationTime != 0) {
1263             tvcp->cbExpires = tcbp->ExpirationTime + startTime;
1264             tvcp->callback = hostp;
1265             tvcp->f.states |= CStatd;
1266             afs_QueueCallback(tvcp, CBHash(tcbp->ExpirationTime), volp);
1267         } else if (tvcp->f.states & CRO) {
1268             /* ordinary callback on a read-only volume -- AFS 3.2 style */
1269             tvcp->cbExpires = 3600 + startTime;
1270             tvcp->callback = hostp;
1271             tvcp->f.states |= CStatd;
1272             afs_QueueCallback(tvcp, CBHash(3600), volp);
1273         } else {
1274             afs_StaleVCacheFlags(tvcp,
1275                                  AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB,
1276                                  CUnique);
1277         }
1278 #ifdef AFS_DARWIN80_ENV
1279         /* reclaim->FlushVCache will need xcbhash */
1280         if (((tvcp->f.states & CDeadVnode)||(tvcp->f.states & CVInit))) {
1281             ReleaseWriteLock(&afs_xcbhash);
1282             /* passing in a parent hangs getting the vnode lock */
1283             code = afs_darwin_finalizevnode(tvcp, NULL, NULL, 0, 1);
1284             if (code) {
1285                 /* It's gonna get recycled - shouldn't happen */
1286                 afs_StaleVCacheFlags(tvcp,
1287                                      AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB,
1288                                      CUnique);
1289             } else
1290                 /* re-acquire the usecount that finalizevnode disposed of */
1291                 vnode_ref(AFSTOV(tvcp));
1292         } else
1293 #endif
1294         ReleaseWriteLock(&afs_xcbhash);
1295
1296         ReleaseWriteLock(&tvcp->lock);
1297         /* finally, we're done with the entry */
1298         afs_PutVCache(tvcp);
1299     }                           /* for all files we got back */
1300
1301     /* finally return the pointer into the LRU queue */
1302 #ifdef AFS_DARWIN80_ENV
1303     if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit)))
1304         panic("vlru control point went dead before put\n");
1305     AFS_GUNLOCK();
1306     vnode_put(lruvp);
1307     vnode_rele(lruvp);
1308     AFS_GLOCK();
1309 #else
1310     afs_PutVCache(lruvcp);
1311 #endif
1312
1313   done:
1314     /* Be sure to turn off the CBulkFetching flags */
1315     for (i = flagIndex; i < fidIndex; i++) {
1316         afid.Cell = adp->f.fid.Cell;
1317         afid.Fid.Volume = adp->f.fid.Fid.Volume;
1318         afid.Fid.Vnode = fidsp[i].Vnode;
1319         afid.Fid.Unique = fidsp[i].Unique;
1320         do {
1321             retry = 0;
1322             ObtainReadLock(&afs_xvcache);
1323             tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */);
1324             ReleaseReadLock(&afs_xvcache);
1325         } while (tvcp && retry);
1326         if (tvcp != NULL) {
1327             if ((tvcp->f.states & CBulkFetching)
1328                 && (tvcp->f.m.Length == statSeqNo)) {
1329                 tvcp->f.states &= ~CBulkFetching;
1330             }
1331             afs_PutVCache(tvcp);
1332         }
1333     }
1334     if (volp)
1335         afs_PutVolume(volp, READ_LOCK);
1336
1337   done2:
1338     osi_FreeLargeSpace((char *)fidsp);
1339     osi_Free((char *)statsp, AFSCBMAX * sizeof(AFSFetchStatus));
1340     osi_Free((char *)cbsp, AFSCBMAX * sizeof(AFSCallBack));
1341     return code;
1342 }
1343
1344 /* was: (AFS_DEC_ENV) || defined(AFS_OSF30_ENV) || defined(AFS_NCR_ENV) */
1345 #ifdef AFS_DARWIN80_ENV
1346 int AFSDOBULK = 0;
1347 #else
1348 static int AFSDOBULK = 1;
1349 #endif
1350
1351 static_inline int
1352 osi_lookup_isdot(const char *aname)
1353 {
1354 #ifdef AFS_SUN5_ENV
1355     if (!aname[0]) {
1356         /* in Solaris, we can get passed "" as a path component if we are the
1357          * root directory, e.g. after a call to chroot. It is equivalent to
1358          * looking up "." */
1359         return 1;
1360     }
1361 #endif /* AFS_SUN5_ENV */
1362     if (aname[0] == '.' && !aname[1]) {
1363         return 1;
1364     }
1365     return 0;
1366 }
1367
1368 int
1369 #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
1370 afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, struct pathname *pnp, int flags, struct vnode *rdir, afs_ucred_t *acred)
1371 #elif defined(UKERNEL)
1372 afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, afs_ucred_t *acred, int flags)
1373 #else
1374 afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, afs_ucred_t *acred)
1375 #endif
1376 {
1377     struct vrequest *treq = NULL;
1378     char *tname = NULL;
1379     struct vcache *tvc = 0;
1380     afs_int32 code;
1381     afs_int32 bulkcode = 0;
1382     int pass = 0, hit = 0;
1383     int force_eval = afs_fakestat_enable ? 0 : 1;
1384     long dirCookie;
1385     afs_hyper_t versionNo;
1386     int no_read_access = 0;
1387     struct sysname_info sysState;       /* used only for @sys checking */
1388     int dynrootRetry = 1;
1389     struct afs_fakestat_state fakestate;
1390     int tryEvalOnly = 0;
1391     OSI_VC_CONVERT(adp);
1392
1393     AFS_STATCNT(afs_lookup);
1394     afs_InitFakeStat(&fakestate);
1395
1396     AFS_DISCON_LOCK();
1397
1398     if ((code = afs_CreateReq(&treq, acred)))
1399         goto done;
1400
1401     if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_MTPT) {
1402        if (strcmp(aname, ".directory") == 0)
1403            tryEvalOnly = 1;
1404     }
1405
1406 #if defined(AFS_DARWIN_ENV)
1407     /* Workaround for MacOSX Finder, which tries to look for
1408      * .DS_Store and Contents under every directory.
1409      */
1410     if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_MTPT) {
1411         if (strcmp(aname, ".DS_Store") == 0)
1412             tryEvalOnly = 1;
1413         if (strcmp(aname, "Contents") == 0)
1414             tryEvalOnly = 1;
1415     }
1416     if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_ROOT) {
1417         if (strncmp(aname, "._", 2) == 0)
1418             tryEvalOnly = 1;
1419     }
1420 #endif
1421
1422     if (tryEvalOnly)
1423         code = afs_TryEvalFakeStat(&adp, &fakestate, treq);
1424     else
1425         code = afs_EvalFakeStat(&adp, &fakestate, treq);
1426
1427     /*printf("Code is %d\n", code);*/
1428
1429     if (tryEvalOnly && adp->mvstat == AFS_MVSTAT_MTPT)
1430         code = ENODEV;
1431     if (code)
1432         goto done;
1433
1434     /* come back to here if we encounter a non-existent object in a read-only
1435      * volume's directory */
1436   redo:
1437     *avcp = NULL;               /* Since some callers don't initialize it */
1438     bulkcode = 0;
1439
1440     if (!(adp->f.states & CStatd) && !afs_InReadDir(adp)) {
1441         if ((code = afs_VerifyVCache2(adp, treq))) {
1442             goto done;
1443         }
1444     } else
1445         code = 0;
1446
1447     /* watch for ".." in a volume root */
1448     if (adp->mvstat == AFS_MVSTAT_ROOT && aname[0] == '.' && aname[1] == '.' && !aname[2]) {
1449         /* looking up ".." in root via special hacks */
1450         if (adp->mvid.parent == (struct VenusFid *)0 || adp->mvid.parent->Fid.Volume == 0) {
1451             code = ENODEV;
1452             goto done;
1453         }
1454         /* otherwise we have the fid here, so we use it */
1455         /*printf("Getting vcache\n");*/
1456         tvc = afs_GetVCache(adp->mvid.parent, treq, NULL, NULL);
1457         afs_Trace3(afs_iclSetp, CM_TRACE_GETVCDOTDOT, ICL_TYPE_FID, adp->mvid.parent,
1458                    ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, code);
1459         *avcp = tvc;
1460         code = (tvc ? 0 : EIO);
1461         hit = 1;
1462         if (tvc && !VREFCOUNT_GT(tvc, 0)) {
1463             osi_Panic("TT1");
1464         }
1465         if (code) {
1466             /*printf("LOOKUP GETVCDOTDOT -> %d\n", code); */
1467         }
1468         goto done;
1469     }
1470
1471     /* now check the access */
1472     if (treq->uid != adp->last_looker) {
1473         if (!afs_AccessOK(adp, PRSFS_LOOKUP, treq, CHECK_MODE_BITS)) {
1474             *avcp = NULL;
1475             code = EACCES;
1476             goto done;
1477         } else
1478             adp->last_looker = treq->uid;
1479     }
1480
1481     /* Check for read access as well.  We need read access in order to
1482      * stat files, but not to stat subdirectories. */
1483     if (!afs_AccessOK(adp, PRSFS_READ, treq, CHECK_MODE_BITS))
1484         no_read_access = 1;
1485
1486     /* special case lookup of ".".  Can we check for it sooner in this code,
1487      * for instance, way up before "redo:" ??
1488      * I'm not fiddling with the LRUQ here, either, perhaps I should, or else
1489      * invent a lightweight version of GetVCache.
1490      */
1491     if (osi_lookup_isdot(aname)) {      /* special case */
1492         ObtainReadLock(&afs_xvcache);
1493         osi_vnhold(adp, 0);
1494         ReleaseReadLock(&afs_xvcache);
1495 #ifdef AFS_DARWIN80_ENV
1496         vnode_get(AFSTOV(adp));
1497 #endif
1498         code = 0;
1499         *avcp = tvc = adp;
1500         hit = 1;
1501         if (adp && !VREFCOUNT_GT(adp, 0)) {
1502             osi_Panic("TT2");
1503         }
1504         goto done;
1505     }
1506
1507     /*
1508      * Special case lookup of ".." in the dynamic mount directory.
1509      * The parent of this directory is _always_ the AFS root volume.
1510      */
1511     if (afs_IsDynrootMount(adp) &&
1512         aname[0] == '.' && aname[1] == '.' && !aname[2]) {
1513
1514         ObtainReadLock(&afs_xvcache);
1515         osi_vnhold(afs_globalVp, 0);
1516         ReleaseReadLock(&afs_xvcache);
1517 #ifdef AFS_DARWIN80_ENV
1518         vnode_get(AFSTOV(afs_globalVp));
1519 #endif
1520         code = 0;
1521         *avcp = tvc = afs_globalVp;
1522         hit = 1;
1523         goto done;
1524     }
1525
1526     /*
1527      * Special case lookups in the dynamic mount directory.
1528      * The names here take the form cell:volume, similar to a mount point.
1529      * EvalMountData parses that and returns a cell and volume ID, which
1530      * we use to construct the appropriate dynroot Fid.
1531      */
1532     if (afs_IsDynrootMount(adp)) {
1533         struct VenusFid tfid;
1534         afs_uint32 cellidx, volid, vnoid, uniq;
1535
1536         code = EvalMountData('%', aname, 0, 0, NULL, treq, &cellidx, &volid, &vnoid, &uniq);
1537         if (code)
1538             goto done;
1539         /* If a vnode was returned, it's not a real mount point */
1540         if (vnoid > 1) {
1541             struct cell *tcell = afs_GetCellByIndex(cellidx, READ_LOCK);
1542             tfid.Cell = tcell->cellNum;
1543             afs_PutCell(tcell, READ_LOCK);
1544             tfid.Fid.Vnode = vnoid;
1545             tfid.Fid.Volume = volid;
1546             tfid.Fid.Unique = uniq;
1547         } else {
1548             afs_GetDynrootMountFid(&tfid);
1549             tfid.Fid.Vnode = VNUM_FROM_TYPEID(VN_TYPE_MOUNT, cellidx << 2);
1550             tfid.Fid.Unique = volid;
1551         }
1552         *avcp = tvc = afs_GetVCache(&tfid, treq, NULL, NULL);
1553         code = (tvc ? 0 : EIO);
1554         hit = 1;
1555         goto done;
1556     }
1557
1558 #ifdef AFS_LINUX26_ENV
1559     /*
1560      * Special case of the dynamic mount volume in a static root.
1561      * This is really unfortunate, but we need this for the translator.
1562      */
1563     if (adp == afs_globalVp && !afs_GetDynrootEnable() &&
1564         !strcmp(aname, AFS_DYNROOT_MOUNTNAME)) {
1565         struct VenusFid tfid;
1566
1567         afs_GetDynrootMountFid(&tfid);
1568         *avcp = tvc = afs_GetVCache(&tfid, treq, NULL, NULL);
1569         code = 0;
1570         hit = 1;
1571         goto done;
1572     }
1573 #endif
1574
1575     Check_AtSys(adp, aname, &sysState, treq);
1576     tname = sysState.name;
1577
1578     /* 1st Check_AtSys and lookup by tname is required here, for now,
1579      * because the dnlc is *not* told to remove entries for the parent
1580      * dir of file/dir op that afs_LocalHero likes, but dnlc is informed
1581      * if the cached entry for the parent dir is invalidated for a
1582      * non-local change.
1583      * Otherwise, we'd be able to do a dnlc lookup on an entry ending
1584      * w/@sys and know the dnlc was consistent with reality. */
1585     tvc = osi_dnlc_lookup(adp, tname, WRITE_LOCK);
1586     *avcp = tvc;                /* maybe wasn't initialized, but it is now */
1587     if (tvc) {
1588         if (no_read_access && vType(tvc) != VDIR && vType(tvc) != VLNK) {
1589             /* need read access on dir to stat non-directory / non-link */
1590             afs_PutVCache(tvc);
1591             *avcp = NULL;
1592             code = EACCES;
1593             goto done;
1594         }
1595 #ifdef AFS_LINUX22_ENV
1596         if (tvc->mvstat == AFS_MVSTAT_ROOT) {   /* we don't trust the dnlc for root vcaches */
1597             AFS_RELE(AFSTOV(tvc));
1598             *avcp = 0;
1599         } else {
1600             code = 0;
1601             hit = 1;
1602             goto done;
1603         }
1604 #else /* non - LINUX */
1605         code = 0;
1606         hit = 1;
1607         goto done;
1608 #endif /* linux22 */
1609     }
1610
1611     {                           /* sub-block just to reduce stack usage */
1612         struct dcache *tdc;
1613         afs_size_t dirOffset, dirLen;
1614         struct VenusFid tfid;
1615
1616         /* now we have to lookup the next fid */
1617         if (afs_InReadDir(adp))
1618             tdc = adp->dcreaddir;
1619         else
1620             tdc = afs_GetDCache(adp, (afs_size_t) 0, treq,
1621                                 &dirOffset, &dirLen, 1);
1622         if (!tdc) {
1623             *avcp = NULL;       /* redundant, but harmless */
1624             code = EIO;
1625             goto done;
1626         }
1627
1628         /* now we will just call dir package with appropriate inode.
1629          * Dirs are always fetched in their entirety for now */
1630         ObtainReadLock(&adp->lock);
1631         ObtainReadLock(&tdc->lock);
1632
1633         /*
1634          * Make sure that the data in the cache is current. There are two
1635          * cases we need to worry about:
1636          * 1. The cache data is being fetched by another process.
1637          * 2. The cache data is no longer valid
1638          *
1639          * If a readdir is in progress _in this thread_, it has a shared
1640          * lock on the vcache and has obtained current data, so we just
1641          * use that.  This eliminates several possible deadlocks.
1642          */
1643         if (!afs_InReadDir(adp)) {
1644             while ((adp->f.states & CStatd)
1645                    && (tdc->dflags & DFFetching)
1646                    && hsame(adp->f.m.DataVersion, tdc->f.versionNo)) {
1647                 ReleaseReadLock(&tdc->lock);
1648                 ReleaseReadLock(&adp->lock);
1649                 afs_osi_Sleep(&tdc->validPos);
1650                 ObtainReadLock(&adp->lock);
1651                 ObtainReadLock(&tdc->lock);
1652             }
1653             if (!(adp->f.states & CStatd)
1654                 || !hsame(adp->f.m.DataVersion, tdc->f.versionNo)) {
1655                 ReleaseReadLock(&tdc->lock);
1656                 ReleaseReadLock(&adp->lock);
1657                 afs_PutDCache(tdc);
1658                 if (tname && tname != aname)
1659                     osi_FreeLargeSpace(tname);
1660                 goto redo;
1661             }
1662         }
1663
1664         /* Save the version number for when we call osi_dnlc_enter */
1665         hset(versionNo, tdc->f.versionNo);
1666
1667         /*
1668          * check for, and handle "@sys" if it's there.  We should be able
1669          * to avoid the alloc and the strcpy with a little work, but it's
1670          * not pressing.  If there aren't any remote users (ie, via the
1671          * NFS translator), we have a slightly easier job.
1672          * the faster way to do this is to check for *aname == '@' and if
1673          * it's there, check for @sys, otherwise, assume there's no @sys
1674          * then, if the lookup fails, check for .*@sys...
1675          */
1676         /* above now implemented by Check_AtSys and Next_AtSys */
1677
1678         /* lookup the name in the appropriate dir, and return a cache entry
1679          * on the resulting fid */
1680         code =
1681             afs_dir_LookupOffset(tdc, sysState.name, &tfid.Fid,
1682                                  &dirCookie);
1683
1684         /* If the first lookup doesn't succeed, maybe it's got @sys in the name */
1685         while (code == ENOENT && Next_AtSys(adp, treq, &sysState))
1686             code =
1687                 afs_dir_LookupOffset(tdc, sysState.name, &tfid.Fid,
1688                                      &dirCookie);
1689         tname = sysState.name;
1690
1691         ReleaseReadLock(&tdc->lock);
1692         if (!afs_InReadDir(adp))
1693             afs_PutDCache(tdc);
1694         if (code == ENOENT && afs_IsDynroot(adp) && dynrootRetry && !tryEvalOnly) {
1695             struct cell *tc;
1696             char *cn = (tname[0] == '.') ? tname + 1 : tname;
1697             ReleaseReadLock(&adp->lock);
1698             /* confirm it's not just hushed */
1699             tc = afs_GetCellByName(cn, WRITE_LOCK);
1700             if (tc) {
1701                 if (tc->states & CHush) {
1702                     tc->states &= ~CHush;
1703                     ReleaseWriteLock(&tc->lock);
1704                     afs_DynrootInvalidate();
1705                     goto redo;
1706                 }
1707                 ReleaseWriteLock(&tc->lock);
1708             }
1709             /* Allow a second dynroot retry if the cell was hushed before */
1710             dynrootRetry = 0;
1711             if (tname[0] == '.')
1712                 afs_LookupAFSDB(tname + 1);
1713             else
1714                 afs_LookupAFSDB(tname);
1715             if (tname && tname != aname)
1716                 osi_FreeLargeSpace(tname);
1717             goto redo;
1718         } else {
1719             ReleaseReadLock(&adp->lock);
1720         }
1721
1722         /* new fid has same cell and volume */
1723         tfid.Cell = adp->f.fid.Cell;
1724         tfid.Fid.Volume = adp->f.fid.Fid.Volume;
1725         afs_Trace4(afs_iclSetp, CM_TRACE_LOOKUP, ICL_TYPE_POINTER, adp,
1726                    ICL_TYPE_STRING, tname, ICL_TYPE_FID, &tfid,
1727                    ICL_TYPE_INT32, code);
1728
1729         if (code) {
1730             if (code != ENOENT) {
1731                 /*printf("LOOKUP dirLookupOff -> %d\n", code);*/
1732             }
1733             goto done;
1734         }
1735
1736         /* prefetch some entries, if the dir is currently open.  The variable
1737          * dirCookie tells us where to start prefetching from.
1738          */
1739         if (!AFS_IS_DISCONNECTED &&
1740             AFSDOBULK && adp->opens > 0 && !(adp->f.states & CForeign)
1741             && !afs_IsDynroot(adp) && !afs_InReadDir(adp)) {
1742             afs_int32 retry;
1743             /* if the entry is not in the cache, or is in the cache,
1744              * but hasn't been statd, then do a bulk stat operation.
1745              */
1746             do {
1747                 retry = 0;
1748                 ObtainReadLock(&afs_xvcache);
1749                 tvc = afs_FindVCache(&tfid, &retry, 0 /* !stats,!lru */ );
1750                 ReleaseReadLock(&afs_xvcache);
1751             } while (tvc && retry);
1752
1753             if (!tvc || !(tvc->f.states & CStatd))
1754                 bulkcode = afs_DoBulkStat(adp, dirCookie, treq);
1755             else
1756                 bulkcode = 0;
1757
1758             /* if the vcache isn't usable, release it */
1759             if (tvc && !(tvc->f.states & CStatd)) {
1760                 afs_PutVCache(tvc);
1761                 tvc = NULL;
1762             }
1763         } else {
1764             tvc = NULL;
1765             bulkcode = 0;
1766         }
1767
1768         /* now get the status info, if we don't already have it */
1769         /* This is kind of weird, but we might wind up accidentally calling
1770          * RXAFS_Lookup because we happened upon a file which legitimately
1771          * has a 0 uniquifier. That is the result of allowing unique to wrap
1772          * to 0. This was fixed in AFS 3.4. For CForeign, Unique == 0 means that
1773          * the file has not yet been looked up.
1774          */
1775         if (!tvc) {
1776             afs_int32 cached = 0;
1777             if (!tfid.Fid.Unique && (adp->f.states & CForeign)) {
1778                 tvc = afs_LookupVCache(&tfid, treq, &cached, adp, tname);
1779             }
1780             if (!tvc && !bulkcode) {    /* lookup failed or wasn't called */
1781                 tvc = afs_GetVCache(&tfid, treq, &cached, NULL);
1782             }
1783         }                       /* if !tvc */
1784     }                           /* sub-block just to reduce stack usage */
1785
1786     if (tvc) {
1787         if (adp->f.states & CForeign)
1788             tvc->f.states |= CForeign;
1789         tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
1790         tvc->f.parent.unique = adp->f.fid.Fid.Unique;
1791         tvc->f.states &= ~CBulkStat;
1792
1793         if (afs_fakestat_enable == 2 && tvc->mvstat == AFS_MVSTAT_MTPT) {
1794             ObtainSharedLock(&tvc->lock, 680);
1795             if (!tvc->linkData) {
1796                 UpgradeSToWLock(&tvc->lock, 681);
1797                 code = afs_HandleLink(tvc, treq);
1798                 ConvertWToRLock(&tvc->lock);
1799             } else {
1800                 ConvertSToRLock(&tvc->lock);
1801                 code = 0;
1802             }
1803             if (!code && !afs_strchr(tvc->linkData, ':'))
1804                 force_eval = 1;
1805             ReleaseReadLock(&tvc->lock);
1806         }
1807         if (tvc->mvstat == AFS_MVSTAT_MTPT && (tvc->f.states & CMValid) && tvc->mvid.target_root != NULL)
1808           force_eval = 1; /* This is now almost for free, get it correct */
1809
1810 #if defined(UKERNEL)
1811         if (!(flags & AFS_LOOKUP_NOEVAL))
1812             /* don't eval mount points */
1813 #endif /* UKERNEL */
1814             if (tvc->mvstat == AFS_MVSTAT_MTPT && force_eval) {
1815                 /* a mt point, possibly unevaluated */
1816                 struct volume *tvolp;
1817
1818                 ObtainWriteLock(&tvc->lock, 133);
1819                 code = EvalMountPoint(tvc, adp, &tvolp, treq);
1820                 ReleaseWriteLock(&tvc->lock);
1821
1822                 if (code) {
1823                     afs_PutVCache(tvc);
1824                     if (tvolp)
1825                         afs_PutVolume(tvolp, WRITE_LOCK);
1826                     goto done;
1827                 }
1828
1829                 /* next, we want to continue using the target of the mt point */
1830                 if (tvc->mvid.target_root && (tvc->f.states & CMValid)) {
1831                     struct vcache *uvc;
1832                     /* now lookup target, to set .. pointer */
1833                     afs_Trace2(afs_iclSetp, CM_TRACE_LOOKUP1,
1834                                ICL_TYPE_POINTER, tvc, ICL_TYPE_FID,
1835                                &tvc->f.fid);
1836                     uvc = tvc;  /* remember for later */
1837
1838                     if (tvolp && (tvolp->states & VForeign)) {
1839                         /* XXXX tvolp has ref cnt on but not locked! XXX */
1840                         tvc =
1841                             afs_GetRootVCache(tvc->mvid.target_root, treq, NULL, tvolp);
1842                     } else {
1843                         tvc = afs_GetVCache(tvc->mvid.target_root, treq, NULL, NULL);
1844                     }
1845                     afs_PutVCache(uvc); /* we're done with it */
1846
1847                     if (!tvc) {
1848                         code = EIO;
1849                         if (tvolp) {
1850                             afs_PutVolume(tvolp, WRITE_LOCK);
1851                         }
1852                         goto done;
1853                     }
1854
1855                     /* now, if we came via a new mt pt (say because of a new
1856                      * release of a R/O volume), we must reevaluate the ..
1857                      * ptr to point back to the appropriate place */
1858                     if (tvolp) {
1859                         ObtainWriteLock(&tvc->lock, 134);
1860                         if (tvc->mvid.parent == NULL) {
1861                             tvc->mvid.parent =
1862                                 osi_AllocSmallSpace(sizeof(struct VenusFid));
1863                         }
1864                         /* setup backpointer */
1865                         *tvc->mvid.parent = tvolp->dotdot;
1866                         ReleaseWriteLock(&tvc->lock);
1867                         afs_PutVolume(tvolp, WRITE_LOCK);
1868                     }
1869                 } else {
1870                     afs_PutVCache(tvc);
1871                     code = ENODEV;
1872                     if (tvolp)
1873                         afs_PutVolume(tvolp, WRITE_LOCK);
1874                     goto done;
1875                 }
1876             }
1877         *avcp = tvc;
1878         if (tvc && !VREFCOUNT_GT(tvc, 0)) {
1879             osi_Panic("TT3");
1880         }
1881         code = 0;
1882     } else {
1883         /* if we get here, we found something in a directory that couldn't
1884          * be located (a Multics "connection failure").  If the volume is
1885          * read-only, we try flushing this entry from the cache and trying
1886          * again. */
1887         if (!AFS_IS_DISCONNECTED) {
1888             if (pass == 0) {
1889                 struct volume *tv;
1890                 tv = afs_GetVolume(&adp->f.fid, treq, READ_LOCK);
1891                 if (tv) {
1892                     if (tv->states & VRO) {
1893                         pass = 1;       /* try this *once* */
1894                         /* re-stat to get later version */
1895                         afs_StaleVCache(adp);
1896                         afs_PutVolume(tv, READ_LOCK);
1897                         goto redo;
1898                     }
1899                     afs_PutVolume(tv, READ_LOCK);
1900                 }
1901             }
1902             code = EIO;
1903         } else {
1904             code = ENETDOWN;
1905         }
1906     }
1907
1908   done:
1909     /* put the network buffer back, if need be */
1910     if (tname != aname && tname)
1911         osi_FreeLargeSpace(tname);
1912     if (code == 0) {
1913
1914         if (afs_mariner)
1915             afs_AddMarinerName(aname, tvc);
1916
1917 #if defined(UKERNEL)
1918         if (!(flags & AFS_LOOKUP_NOEVAL)) {
1919             /* Here we don't enter the name into the DNLC because we want the
1920              * evaluated mount dir to be there (the vcache for the mounted
1921              * volume) rather than the vc of the mount point itself.  We can
1922              * still find the mount point's vc in the vcache by its fid. */
1923 #endif /* UKERNEL */
1924             if (!hit && (force_eval || tvc->mvstat != AFS_MVSTAT_MTPT)) {
1925                 osi_dnlc_enter(adp, aname, tvc, &versionNo);
1926             } else {
1927 #ifdef AFS_LINUX20_ENV
1928                 /* So Linux inode cache is up to date. */
1929                 code = afs_VerifyVCache(tvc, treq);
1930 #else
1931                 afs_PutFakeStat(&fakestate);
1932                 afs_DestroyReq(treq);
1933                 AFS_DISCON_UNLOCK();
1934                 return 0;       /* can't have been any errors if hit and !code */
1935 #endif
1936             }
1937 #if defined(UKERNEL)
1938         }
1939 #endif
1940     }
1941     if (bulkcode)
1942         code = bulkcode;
1943
1944     code = afs_CheckCode(code, treq, 19);
1945     if (code) {
1946         /* If there is an error, make sure *avcp is null.
1947          * Alphas panic otherwise - defect 10719.
1948          */
1949         *avcp = NULL;
1950     }
1951
1952     afs_PutFakeStat(&fakestate);
1953     afs_DestroyReq(treq);
1954     AFS_DISCON_UNLOCK();
1955     return code;
1956 }