src/afs/afs_analyze.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /*
  11  * Implements:
  12  */
  13 #include <afsconfig.h>
  14 #include "afs/param.h"
  15
  16
  17 #include "afs/stds.h"
  18 #include "afs/sysincludes.h"    /* Standard vendor system headers */
  19
  20 #ifndef UKERNEL
  21 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV)
  22 #include <net/if.h>
  23 #include <netinet/in.h>
  24 #endif
  25
  26 #ifdef AFS_SGI62_ENV
  27 #include "h/hashing.h"
  28 #endif
  29 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN_ENV)
  30 #include <netinet/in_var.h>
  31 #endif
  32 #endif /* !UKERNEL */
  33
  34 #include "afsincludes.h"        /* Afs-based standard headers */
  35 #include "afs/afs_stats.h"      /* afs statistics */
  36 #include "afs/afs_util.h"
  37 #include "afs/unified_afs.h"
  38
  39 #if     defined(AFS_SUN5_ENV)
  40 #include <inet/led.h>
  41 #include <inet/common.h>
  42 #include <netinet/ip6.h>
  43 #include <inet/ip.h>
  44 #endif
  45
  46 /* shouldn't do it this way, but for now will do */
  47 #ifndef ERROR_TABLE_BASE_U
  48 #define ERROR_TABLE_BASE_U      (5376L)
  49 #endif /* ubik error base define */
  50
  51 /* shouldn't do it this way, but for now will do */
  52 #ifndef ERROR_TABLE_BASE_uae
  53 #define ERROR_TABLE_BASE_uae    (49733376L)
  54 #endif /* unified afs error base define */
  55
  56 /* same hack for vlserver error base as for ubik error base */
  57 #ifndef ERROR_TABLE_BASE_VL
  58 #define ERROR_TABLE_BASE_VL     (363520L)
  59 #define VL_NOENT                (363524L)
  60 #endif /* vlserver error base define */
  61
  62
  63 int afs_BusyWaitPeriod = 15;    /**< poll period, in seconds */
  64
  65 afs_int32 hm_retry_RO = 0;      /**< enable read-only hard-mount retry */
  66 afs_int32 hm_retry_RW = 0;      /**< enable read-write hard-mount retry */
  67 afs_int32 hm_retry_int = 0;     /**< hard-mount retry interval, in seconds */
  68
  69 #define VSleep(at)      afs_osi_Wait((at)*1000, 0, 0)
  70
  71
  72 int lastcode;
  73 #define DIFFERENT 0
  74 #define SAME 1
  75 #define DUNNO 2
  76 /*!
  77  * \brief
  78  *      Request vldb record to determined if it has changed.
  79  *
  80  * \retval 0 if the vldb record for a specific volume is different from what
  81  *           we have cached -- perhaps the volume has moved.
  82  * \retval 1 if the vldb record is the same
  83  * \retval 2 if we can't tell if it's the same or not.
  84  *
  85  * \note
  86  *      If 0 returned, the caller will probably start over at the beginning of our
  87  *      list of servers for this volume and try to find one that is up.  If
  88  *      not 0, we will probably just keep plugging with what we have
  89  *      cached.   If we fail to contact the VL server, we  should just keep
  90  *      trying with the information we have, rather than failing.
  91  */
  92 static int
  93 VLDB_Same(struct VenusFid *afid, struct vrequest *areq)
  94 {
  95     struct vrequest *treq = NULL;
  96     struct afs_conn *tconn;
  97     int i, type = 0;
  98     union {
  99         struct vldbentry tve;
 100         struct nvldbentry ntve;
 101         struct uvldbentry utve;
 102     } *v;
 103     struct volume *tvp;
 104     struct cell *tcell;
 105     char *bp, tbuf[CVBS];       /* biggest volume id is 2^32, ~ 4*10^9 */
 106     unsigned int changed;
 107     struct server *(oldhosts[NMAXNSERVERS]);
 108     struct rx_connection *rxconn;
 109
 110     AFS_STATCNT(CheckVLDB);
 111     afs_FinalizeReq(areq);
 112
 113     if ((i = afs_CreateReq(&treq, afs_osi_credp)))
 114         return DUNNO;
 115     v = afs_osi_Alloc(sizeof(*v));
 116     osi_Assert(v != NULL);
 117     tcell = afs_GetCell(afid->Cell, READ_LOCK);
 118     bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
 119     do {
 120         VSleep(2);              /* Better safe than sorry. */
 121         tconn =
 122             afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum,
 123                              treq, SHARED_LOCK, 0, &rxconn);
 124         if (tconn) {
 125             if ( tconn->parent->srvr->server->flags & SNO_LHOSTS) {
 126                 type = 0;
 127                 RX_AFS_GUNLOCK();
 128                 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
 129                 RX_AFS_GLOCK();
 130             } else if (tconn->parent->srvr->server->flags & SYES_LHOSTS) {
 131                 type = 1;
 132                 RX_AFS_GUNLOCK();
 133                 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
 134                 RX_AFS_GLOCK();
 135             } else {
 136                 type = 2;
 137                 RX_AFS_GUNLOCK();
 138                 i = VL_GetEntryByNameU(rxconn, bp, &v->utve);
 139                 RX_AFS_GLOCK();
 140                 if (!(tconn->parent->srvr->server->flags & SVLSRV_UUID)) {
 141                     if (i == RXGEN_OPCODE) {
 142                         type = 1;
 143                         RX_AFS_GUNLOCK();
 144                         i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
 145                         RX_AFS_GLOCK();
 146                         if (i == RXGEN_OPCODE) {
 147                             type = 0;
 148                             tconn->parent->srvr->server->flags |= SNO_LHOSTS;
 149                             RX_AFS_GUNLOCK();
 150                             i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
 151                             RX_AFS_GLOCK();
 152                         } else if (!i)
 153                             tconn->parent->srvr->server->flags |= SYES_LHOSTS;
 154                     } else if (!i)
 155                         tconn->parent->srvr->server->flags |= SVLSRV_UUID;
 156                 }
 157                 lastcode = i;
 158             }
 159         } else
 160             i = -1;
 161     } while (afs_Analyze(tconn, rxconn, i, NULL, treq, -1,      /* no op code for this */
 162                          SHARED_LOCK, tcell));
 163
 164     afs_PutCell(tcell, READ_LOCK);
 165     afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
 166                ICL_TYPE_INT32, i);
 167
 168     if (i) {
 169         afs_DestroyReq(treq);
 170         afs_osi_Free(v, sizeof(*v));
 171         return DUNNO;
 172     }
 173     /* have info, copy into serverHost array */
 174     changed = 0;
 175     tvp = afs_FindVolume(afid, WRITE_LOCK);
 176     if (tvp) {
 177         ObtainWriteLock(&tvp->lock, 107);
 178         for (i = 0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
 179             oldhosts[i] = tvp->serverHost[i];
 180         }
 181         ReleaseWriteLock(&tvp->lock);
 182
 183         if (type == 2) {
 184             LockAndInstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, treq);
 185         } else if (type == 1) {
 186             LockAndInstallNVolumeEntry(tvp, &v->ntve, afid->Cell);
 187         } else {
 188             LockAndInstallVolumeEntry(tvp, &v->tve, afid->Cell);
 189         }
 190
 191         if (i < NMAXNSERVERS && tvp->serverHost[i]) {
 192             changed = 1;
 193         }
 194         for (--i; !changed && i >= 0; i--) {
 195             if (tvp->serverHost[i] != oldhosts[i]) {
 196                 changed = 1;    /* also happens if prefs change.  big deal. */
 197             }
 198         }
 199
 200         tvp->states &= ~VRecheck;     /* Just checked it. */
 201         tvp->setupTime = osi_Time();  /* Time the vldb was checked. */
 202
 203         ReleaseWriteLock(&tvp->lock);
 204         afs_PutVolume(tvp, WRITE_LOCK);
 205     } else {                    /* can't find volume */
 206         tvp = afs_GetVolume(afid, treq, WRITE_LOCK);
 207         if (tvp) {
 208             afs_PutVolume(tvp, WRITE_LOCK);
 209             afs_DestroyReq(treq);
 210             afs_osi_Free(v, sizeof(*v));
 211             return DIFFERENT;
 212         } else {
 213             afs_DestroyReq(treq);
 214             afs_osi_Free(v, sizeof(*v));
 215             return DUNNO;
 216         }
 217     }
 218
 219     afs_DestroyReq(treq);
 220     afs_osi_Free(v, sizeof(*v));
 221     return (changed ? DIFFERENT : SAME);
 222 }                               /*VLDB_Same */
 223
 224 /*!
 225  * \brief
 226  *      Mark a server as invalid for further attempts of this request only.
 227  *
 228  * \param[in,out] areq  The request record associated with this operation.
 229  * \param[in]     afid  The FID of the file involved in the action.  This argument
 230  *                      may be null if none was involved.
 231  * \param[in,out] tsp   pointer to a server struct for the server we wish to
 232  *                      blacklist.
 233  *
 234  * \returns
 235  *      Non-zero value if further servers are available to try,
 236  *      zero otherwise.
 237  *
 238  * \note
 239  *      This routine is typically called in situations where we believe
 240  *      one server out of a pool may have an error condition.
 241  *
 242  * \note
 243  *      The afs_Conn* routines use the list of invalidated servers to
 244  *      avoid reusing a server marked as invalid for this request.
 245  */
 246 static afs_int32
 247 afs_BlackListOnce(struct vrequest *areq, struct VenusFid *afid,
 248                   struct server *tsp)
 249 {
 250     struct volume *tvp;
 251     afs_int32 i;
 252     afs_int32 serversleft = 0;
 253
 254     if (afid) {
 255         tvp = afs_FindVolume(afid, READ_LOCK);
 256         if (tvp) {
 257             for (i = 0; i < AFS_MAXHOSTS; i++) {
 258                 if (tvp->serverHost[i] == tsp) {
 259                     areq->skipserver[i] = 1;
 260                 }
 261                 if (tvp->serverHost[i] &&
 262                     (tvp->serverHost[i]->addr->sa_flags &
 263                       SRVR_ISDOWN)) {
 264                     areq->skipserver[i] = 1;
 265                 }
 266             }
 267             for (i = 0; i < AFS_MAXHOSTS; i++) {
 268                 if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
 269                     serversleft = 1;
 270                     break;
 271                 }
 272             }
 273             afs_PutVolume(tvp, READ_LOCK);
 274             return serversleft;
 275         }
 276     }
 277     return serversleft;
 278 }
 279
 280 /*!
 281  * \brief
 282  *      Clear any cached status for the target FID of a failed fileserver
 283  *      write RPC.
 284  *
 285  * \param[in]     afid   The FID of the file involved in the action.  This argument
 286  *                       may be null if none was involved.
 287  * \param[in]     op     which RPC we are analyzing.
 288  * \param[in,out] avp    A pointer to the struct volume, if we already have one.
 289  *
 290  * \returns
 291  *      Non-zero value if the related RPC operation can be retried,
 292  *      zero otherwise.
 293  *
 294  * \note
 295  *      This routine is called when we got a network error,
 296  *      and discards state if the operation was a data-mutating
 297  *      operation.
 298  */
 299 static int
 300 afs_ClearStatus(struct VenusFid *afid, int op, struct volume *avp)
 301 {
 302     struct volume *tvp = NULL;
 303
 304     /* if it's not a write op, we have nothing to veto and shouldn't clear. */
 305     if (!AFS_STATS_FS_RPCIDXES_ISWRITE(op)) {
 306         return 1;
 307     }
 308
 309     if (avp)
 310         tvp = avp;
 311     else if (afid)
 312         tvp = afs_FindVolume(afid, READ_LOCK);
 313
 314     /* don't assume just discarding will fix if no cached volume */
 315     if (tvp) {
 316         struct vcache *tvc;
 317         ObtainReadLock(&afs_xvcache);
 318         if ((tvc = afs_FindVCache(afid, 0, 0))) {
 319             ReleaseReadLock(&afs_xvcache);
 320             afs_StaleVCacheFlags(tvc, AFS_STALEVC_NOCB | AFS_STALEVC_NODNLC,
 321                                  CUnique);
 322             afs_PutVCache(tvc);
 323         } else {
 324             ReleaseReadLock(&afs_xvcache);
 325         }
 326         if (!avp)
 327             afs_PutVolume(tvp, READ_LOCK);
 328     }
 329
 330     if (AFS_STATS_FS_RPCIDXES_WRITE_RETRIABLE(op))
 331         return 1;
 332
 333     /* not retriable: we may have raced ourselves */
 334     return 0;
 335 }
 336
 337 /*!
 338  * \brief
 339  *      Print the last errors from the servers for the volume on
 340  *      this request.
 341  *
 342  * \param[in] areq   The request record associated with this operation.
 343  * \param[in] afid   The FID of the file involved in the action.  This argument
 344  *                   may be null if none was involved.
 345  *
 346  * \return
 347  *      None
 348  *
 349  * \note
 350  *      This routine is called before a hard-mount retry, to display
 351  *      the servers by primary address and the errors encountered.
 352  */
 353 static void
 354 afs_PrintServerErrors(struct vrequest *areq, struct VenusFid *afid)
 355 {
 356     int i;
 357     struct volume *tvp;
 358     struct srvAddr *sa;
 359     afs_uint32 address;
 360     char *sep = " (";
 361     char *term = "";
 362
 363     if (afid) {
 364         tvp = afs_FindVolume(afid, READ_LOCK);
 365         if (tvp) {
 366             for (i = 0; i < AFS_MAXHOSTS; i++) {
 367                 if (areq->lasterror[i] && tvp->serverHost[i]) {
 368                     sa = tvp->serverHost[i]->addr;
 369                     if (sa) {
 370                         address = ntohl(sa->sa_ip);
 371                         afs_warnuser("%s%d.%d.%d.%d code=%d", sep,
 372                                      (address >> 24), (address >> 16) & 0xff,
 373                                      (address >> 8) & 0xff, (address) & 0xff,
 374                                      areq->lasterror[i]);
 375                         sep = ", ";
 376                         term = ")";
 377                     }
 378                 }
 379             }
 380             afs_PutVolume(tvp, READ_LOCK);
 381         }
 382     }
 383     afs_warnuser("%s\n", term);
 384 }
 385
 386 /*!
 387  * \brief
 388  *      Analyze the outcome of an RPC operation, taking whatever support
 389  *      actions are necessary.
 390  *
 391  * \param[in]     aconn  Ptr to the relevant connection on which the call was made.
 392  * \param[in]     rxconn Ptr to the rx_connection.
 393  * \param[in]     acode  The return code experienced by the RPC.
 394  * \param[in]     fid    The FID of the file involved in the action.  This argument
 395  *                       may be null if none was involved.
 396  * \param[in,out] areq   The request record associated with this operation.
 397  * \param[in]     op     which RPC we are analyzing.
 398  * \param[in]     cellp  pointer to a cell struct.  Must provide either fid or cell.
 399  *
 400  * \returns
 401  *      Non-zero value if the related RPC operation should be retried,
 402  *      zero otherwise.
 403  *
 404  * \note
 405  *      This routine is typically called in a do-while loop, causing the
 406  *      embedded RPC operation to be called repeatedly if appropriate
 407  *      until whatever error condition (if any) is intolerable.
 408  *
 409  * \note
 410  *      The retry return value is used by afs_StoreAllSegments to determine
 411  *      if this is a temporary or permanent error.
 412  */
 413 int
 414 afs_Analyze(struct afs_conn *aconn, struct rx_connection *rxconn,
 415             afs_int32 acode, struct VenusFid *afid, struct vrequest *areq,
 416             int op, afs_int32 locktype, struct cell *cellp)
 417 {
 418     afs_int32 i;
 419     struct srvAddr *sa;
 420     struct server *tsp;
 421     struct volume *tvp = NULL;
 422     afs_int32 shouldRetry = 0;
 423     afs_int32 serversleft = 1;
 424     struct afs_stats_RPCErrors *aerrP;
 425     afs_uint32 address;
 426
 427     if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
 428         /* On reconnection, act as connected. XXX: for now.... */
 429         /* SXW - This may get very tired after a while. We should try and
 430          *       intercept all RPCs before they get here ... */
 431         /*printf("afs_Analyze: disconnected\n");*/
 432         afs_FinalizeReq(areq);
 433         if (aconn) {
 434             /* SXW - I suspect that this will _never_ happen - we shouldn't
 435              *       get a connection because we're disconnected !!!*/
 436             afs_PutConn(aconn, rxconn, locktype);
 437         }
 438         return 0;
 439     }
 440
 441     AFS_STATCNT(afs_Analyze);
 442     afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
 443                ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG,
 444                areq->uid);
 445
 446     aerrP = (struct afs_stats_RPCErrors *)0;
 447
 448     if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
 449         aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
 450
 451     afs_FinalizeReq(areq);
 452     if (!aconn && areq->busyCount) {    /* one RPC or more got VBUSY/VRESTARTING */
 453
 454         tvp = afs_FindVolume(afid, READ_LOCK);
 455         if (tvp) {
 456             afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
 457                          (afid ? afid->Fid.Volume : 0),
 458                          (tvp->name ? tvp->name : ""),
 459                          ((tvp->serverHost[0]
 460                            && tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
 461                           cell->cellName : ""));
 462
 463             for (i = 0; i < AFS_MAXHOSTS; i++) {
 464                 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
 465                     tvp->status[i] = not_busy;
 466                 }
 467                 if (tvp->status[i] == not_busy)
 468                     shouldRetry = 1;
 469             }
 470             afs_PutVolume(tvp, READ_LOCK);
 471         } else {
 472             afs_warnuser("afs: Waiting for busy volume %u\n",
 473                          (afid ? afid->Fid.Volume : 0));
 474         }
 475
 476         if (areq->busyCount > 100) {
 477             if (aerrP)
 478                 (aerrP->err_Volume)++;
 479             areq->volumeError = VOLBUSY;
 480             shouldRetry = 0;
 481         } else {
 482             VSleep(afs_BusyWaitPeriod); /* poll periodically */
 483         }
 484         if (shouldRetry != 0)
 485             areq->busyCount++;
 486
 487         return shouldRetry;     /* should retry */
 488     }
 489
 490     if (!aconn || !aconn->parent->srvr) {
 491         if (!areq->volumeError) {
 492             if (aerrP)
 493                 (aerrP->err_Network)++;
 494             if (hm_retry_int && !(areq->flags & O_NONBLOCK) &&  /* "hard" mount */
 495                 ((afid && afs_IsPrimaryCellNum(afid->Cell))
 496                  || (cellp && afs_IsPrimaryCell(cellp)))) {
 497                 if (!afid) {
 498                     static int afs_vl_hm = 0;
 499                     int warn = 0;
 500                     if (!afs_vl_hm) {
 501                         afs_vl_hm = warn = 1;
 502                     }
 503                     if (warn) {
 504                         afs_warnuser
 505                             ("afs: hard-mount waiting for a vlserver to return to service\n");
 506                     }
 507                     VSleep(hm_retry_int);
 508                     afs_CheckServers(1, cellp);
 509                     shouldRetry = 1;
 510
 511                     if (warn) {
 512                         afs_vl_hm = 0;
 513                     }
 514                 } else {
 515                     static int afs_unknown_vhm = 0;
 516                     int warn = 0, vp_vhm = 0;
 517
 518                     tvp = afs_FindVolume(afid, READ_LOCK);
 519                     if (!tvp || (tvp->states & VRO)) {
 520                         shouldRetry = hm_retry_RO;
 521                     } else {
 522                         shouldRetry = hm_retry_RW;
 523                     }
 524
 525                     /* Set 'warn' if we should afs_warnuser. Only let one
 526                      * caller call afs_warnuser per hm_retry_int interval per
 527                      * volume. */
 528                     if (shouldRetry) {
 529                         if (tvp) {
 530                             if (!(tvp->states & VHardMount)) {
 531                                 tvp->states |= VHardMount;
 532                                 warn = vp_vhm = 1;
 533                             }
 534                         } else {
 535                             if (!afs_unknown_vhm) {
 536                                 afs_unknown_vhm = 1;
 537                                 warn = 1;
 538                             }
 539                         }
 540                     }
 541
 542                     if (tvp)
 543                         afs_PutVolume(tvp, READ_LOCK);
 544
 545                     if (shouldRetry) {
 546                         if (warn) {
 547                             afs_warnuser
 548                                 ("afs: hard-mount waiting for volume %u",
 549                                  afid->Fid.Volume);
 550                             afs_PrintServerErrors(areq, afid);
 551                         }
 552
 553                         VSleep(hm_retry_int);
 554                         afs_CheckServers(1, cellp);
 555                         /* clear the black listed servers on this request. */
 556                         memset(areq->skipserver, 0, sizeof(areq->skipserver));
 557
 558                         if (vp_vhm) {
 559                             tvp = afs_FindVolume(afid, READ_LOCK);
 560                             if (tvp) {
 561                                 tvp->states &= ~VHardMount;
 562                                 afs_PutVolume(tvp, READ_LOCK);
 563                             }
 564                         } else if (warn) {
 565                             afs_unknown_vhm = 0;
 566                         }
 567                     }
 568                 }
 569             } /* if (hm_retry_int ... */
 570             else {
 571                 if (acode == RX_MSGSIZE)
 572                     shouldRetry = 1;
 573                 else {
 574                     areq->networkError = 1;
 575                     /* do not promote to shouldRetry if not already */
 576                     if (afs_ClearStatus(afid, op, NULL) == 0)
 577                         shouldRetry = 0;
 578                 }
 579             }
 580         }
 581         if (aconn) /* simply lacking aconn->server doesn't absolve this */
 582             afs_PutConn(aconn, rxconn, locktype);
 583         return shouldRetry;
 584     }
 585
 586     /* Find server associated with this connection. */
 587     sa = aconn->parent->srvr;
 588     tsp = sa->server;
 589     address = ntohl(sa->sa_ip);
 590
 591     /* Before we do anything with acode, make sure we translate it back to
 592      * a system error */
 593     if ((acode & ~0xff) == ERROR_TABLE_BASE_uae)
 594         acode = et_to_sys_error(acode);
 595
 596     if (acode == 0) {
 597         /* If we previously took an error, mark this volume not busy */
 598         if (areq->volumeError) {
 599             tvp = afs_FindVolume(afid, READ_LOCK);
 600             if (tvp) {
 601                 for (i = 0; i < AFS_MAXHOSTS; i++) {
 602                     if (tvp->serverHost[i] == tsp) {
 603                         tvp->status[i] = not_busy;
 604                     }
 605                 }
 606                 afs_PutVolume(tvp, READ_LOCK);
 607             }
 608         }
 609
 610         afs_PutConn(aconn, rxconn, locktype);
 611         return 0;
 612     }
 613
 614     /* Save the last code of this server on this request. */
 615     tvp = afs_FindVolume(afid, READ_LOCK);
 616     if (tvp) {
 617         for (i = 0; i < AFS_MAXHOSTS; i++) {
 618             if (tvp->serverHost[i] == tsp) {
 619                 areq->lasterror[i] = acode;
 620             }
 621         }
 622         afs_PutVolume(tvp, READ_LOCK);
 623     }
 624
 625 #ifdef AFS_64BIT_CLIENT
 626     if (acode == -455)
 627         acode = 455;
 628 #endif /* AFS_64BIT_CLIENT */
 629     if (acode == RX_MSGSIZE) {
 630         shouldRetry = 1;
 631         goto out;
 632     }
 633     if (acode == RX_CALL_TIMEOUT || acode == VNOSERVICE) {
 634         serversleft = afs_BlackListOnce(areq, afid, tsp);
 635         if (afid)
 636             tvp = afs_FindVolume(afid, READ_LOCK);
 637         if ((serversleft == 0) && tvp &&
 638             ((tvp->states & VRO) || (tvp->states & VBackup))) {
 639             shouldRetry = 0;
 640         } else {
 641             shouldRetry = 1;
 642         }
 643         if (!afid || !tvp || (tvp->states & VRO))
 644             areq->idleError++;
 645         else if (afs_ClearStatus(afid, op, tvp) == 0)
 646             shouldRetry = 0;
 647
 648         if (tvp)
 649             afs_PutVolume(tvp, READ_LOCK);
 650         /* By doing this, we avoid ever marking a server down
 651          * in an idle timeout case. That's because the server is
 652          * still responding and may only be letting a single vnode
 653          * time out. We otherwise risk having the server continually
 654          * be marked down, then up, then down again...
 655          */
 656         goto out;
 657     }
 658     /* If network troubles, mark server as having bogued out again. */
 659     /* VRESTARTING is < 0 because of backward compatibility issues
 660      * with 3.4 file servers and older cache managers */
 661     if ((acode < 0) && (acode != VRESTARTING)) {
 662         afs_ServerDown(sa, acode, rxconn);
 663         ForceNewConnections(sa); /* multi homed clients lock:afs_xsrvAddr? */
 664         if (aerrP)
 665             (aerrP->err_Server)++;
 666     }
 667
 668     if (acode == VBUSY || acode == VRESTARTING) {
 669         if (acode == VBUSY) {
 670             areq->busyCount++;
 671             if (aerrP)
 672                 (aerrP->err_VolumeBusies)++;
 673         } else
 674             areq->busyCount = 1;
 675
 676         tvp = afs_FindVolume(afid, READ_LOCK);
 677         if (tvp) {
 678             for (i = 0; i < AFS_MAXHOSTS; i++) {
 679                 if (tvp->serverHost[i] == tsp) {
 680                     tvp->status[i] = rdwr_busy; /* can't tell which yet */
 681                     /* to tell which, have to look at the op code. */
 682                 }
 683             }
 684             afs_PutVolume(tvp, READ_LOCK);
 685         } else {
 686             afs_warnuser("afs: Waiting for busy volume %u in cell %s (server %d.%d.%d.%d)\n",
 687                          (afid ? afid->Fid.Volume : 0), tsp->cell->cellName,
 688                          (address >> 24), (address >> 16) & 0xff,
 689                          (address >> 8) & 0xff, (address) & 0xff);
 690             VSleep(afs_BusyWaitPeriod); /* poll periodically */
 691         }
 692         shouldRetry = 1;
 693         acode = 0;
 694     } else if (acode == VICETOKENDEAD
 695                || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
 696         /* any rxkad error is treated as token expiration */
 697         struct unixuser *tu;
 698         /*
 699          * I'm calling these errors protection errors, since they involve
 700          * faulty authentication.
 701          */
 702         if (aerrP)
 703             (aerrP->err_Protection)++;
 704
 705         tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
 706         if (tu) {
 707             if (acode == VICETOKENDEAD) {
 708                 aconn->forceConnectFS = 1;
 709             } else if (acode == RXKADEXPIRED) {
 710                 aconn->forceConnectFS = 0;      /* don't check until new tokens set */
 711                 aconn->parent->user->states |= UTokensBad;
 712                 afs_NotifyUser(tu, UTokensDropped);
 713                 afs_warnuser
 714                     ("afs: Tokens for user of AFS id %d for cell %s have expired (server %d.%d.%d.%d)\n",
 715                      tu->viceId, aconn->parent->srvr->server->cell->cellName,
 716                      (address >> 24), (address >> 16) & 0xff,
 717                      (address >> 8) & 0xff, (address) & 0xff);
 718             } else {
 719                 serversleft = afs_BlackListOnce(areq, afid, tsp);
 720                 areq->tokenError++;
 721
 722                 if (serversleft) {
 723                     afs_warnuser
 724                         ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d (server %d.%d.%d.%d)\n",
 725                          tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
 726                          (address >> 24), (address >> 16) & 0xff,
 727                          (address >> 8) & 0xff, (address) & 0xff);
 728                     shouldRetry = 1;
 729                 } else {
 730                     areq->tokenError = 0;
 731                     aconn->forceConnectFS = 0;  /* don't check until new tokens set */
 732                     aconn->parent->user->states |= UTokensBad;
 733                     afs_NotifyUser(tu, UTokensDropped);
 734                     afs_warnuser
 735                         ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d, server %d.%d.%d.%d)\n",
 736                          tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
 737                          (address >> 24), (address >> 16) & 0xff,
 738                          (address >> 8) & 0xff, (address) & 0xff);
 739                 }
 740             }
 741             afs_PutUser(tu, READ_LOCK);
 742         } else {
 743             /* The else case shouldn't be possible and should probably be replaced by a panic? */
 744             if (acode == VICETOKENDEAD) {
 745                 aconn->forceConnectFS = 1;
 746             } else if (acode == RXKADEXPIRED) {
 747                 aconn->forceConnectFS = 0;      /* don't check until new tokens set */
 748                 aconn->parent->user->states |= UTokensBad;
 749                 afs_NotifyUser(tu, UTokensDropped);
 750                 afs_warnuser
 751                     ("afs: Tokens for user %d for cell %s have expired (server %d.%d.%d.%d)\n",
 752                      areq->uid, aconn->parent->srvr->server->cell->cellName,
 753                      (address >> 24), (address >> 16) & 0xff,
 754                      (address >> 8) & 0xff, (address) & 0xff);
 755             } else {
 756                 aconn->forceConnectFS = 0;      /* don't check until new tokens set */
 757                 aconn->parent->user->states |= UTokensBad;
 758                 afs_NotifyUser(tu, UTokensDropped);
 759                 afs_warnuser
 760                     ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d, server %d.%d.%d.%d)\n",
 761                      areq->uid, aconn->parent->srvr->server->cell->cellName,
 762                      acode,
 763                      (address >> 24), (address >> 16) & 0xff,
 764                      (address >> 8) & 0xff, (address) & 0xff);
 765
 766             }
 767         }
 768         shouldRetry = 1;        /* Try again (as root). */
 769     }
 770     /* Check for access violation. */
 771     else if (acode == EACCES) {
 772         /* should mark access error in non-existent per-user global structure */
 773         if (aerrP)
 774             (aerrP->err_Protection)++;
 775         areq->accessError = 1;
 776         if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
 777             areq->permWriteError = 1;
 778         shouldRetry = 0;
 779     }
 780     /* check for ubik errors; treat them like crashed servers */
 781     else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) {
 782         afs_ServerDown(sa, acode, rxconn);
 783         if (aerrP)
 784             (aerrP->err_Server)++;
 785         shouldRetry = 1;        /* retryable (maybe one is working) */
 786         VSleep(1);              /* just in case */
 787     }
 788     /* Check for bad volume data base / missing volume. */
 789     else if (acode == VSALVAGE || acode == VOFFLINE || acode == VNOVOL
 790              || acode == VMOVED) {
 791         struct cell *tcell;
 792         int same;
 793
 794         shouldRetry = 1;
 795         areq->volumeError = VOLMISSING;
 796         if (aerrP)
 797             (aerrP->err_Volume)++;
 798         if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
 799             same = VLDB_Same(afid, areq);
 800             tvp = afs_FindVolume(afid, READ_LOCK);
 801             if (tvp) {
 802                 for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
 803                     if (tvp->serverHost[i] == tsp) {
 804                         if (tvp->status[i] == end_not_busy)
 805                             tvp->status[i] = offline;
 806                         else
 807                             tvp->status[i]++;
 808                     } else if (!same) {
 809                         tvp->status[i] = not_busy;      /* reset the others */
 810                     }
 811                 }
 812                 afs_PutVolume(tvp, READ_LOCK);
 813             }
 814         }
 815     } else if (acode >= ERROR_TABLE_BASE_VL && acode <= ERROR_TABLE_BASE_VL + 255) {    /* vlserver errors */
 816         shouldRetry = 0;
 817         areq->volumeError = VOLMISSING;
 818     } else if (acode >= 0) {
 819         if (aerrP)
 820             (aerrP->err_Other)++;
 821         if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
 822             areq->permWriteError = 1;
 823         shouldRetry = 0;        /* Other random Vice error. */
 824     } else if (acode == RX_MSGSIZE) {   /* same meaning as EMSGSIZE... */
 825         afs_warnuser
 826             ("afs: Path MTU may have been exceeded, retrying (server %d.%d.%d.%d)\n",
 827              (address >> 24), (address >> 16) & 0xff,
 828              (address >> 8) & 0xff, (address) & 0xff);
 829
 830         VSleep(1);              /* Just a hack for desperate times. */
 831         if (aerrP)
 832             (aerrP->err_Other)++;
 833         shouldRetry = 1;        /* packet was too big, please retry call */
 834     }
 835
 836     if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
 837         /* If we get here, code < 0 and we have network/Server troubles.
 838          * areq->networkError is not set here, since we always
 839          * retry in case there is another server.  However, if we find
 840          * no connection (aconn == 0) we set the networkError flag.
 841          */
 842         afs_ServerDown(sa, acode, rxconn);
 843         if (aerrP)
 844             (aerrP->err_Server)++;
 845         VSleep(1);              /* Just a hack for desperate times. */
 846         shouldRetry = 1;
 847     }
 848 out:
 849     /* now unlock the connection and return */
 850     afs_PutConn(aconn, rxconn, locktype);
 851     return (shouldRetry);
 852 }                               /*afs_Analyze */